From 2051da858534a73589cdb27af914fe1c03b9ee98 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 1 Dec 2024 17:20:50 -0800 Subject: arm64/crc-t10dif: expose CRC-T10DIF function through lib Move the arm64 CRC-T10DIF assembly code into the lib directory and wire it up to the library interface. This allows it to be used without going through the crypto API. It remains usable via the crypto API too via the shash algorithms that use the library interface. Thus all the arch-specific "shash" code becomes unnecessary and is removed. Note: to see the diff from arch/arm64/crypto/crct10dif-ce-glue.c to arch/arm64/lib/crc-t10dif-glue.c, view this commit with 'git show -M10'. Reviewed-by: Ard Biesheuvel Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20241202012056.209768-7-ebiggers@kernel.org Signed-off-by: Eric Biggers --- tools/testing/selftests/arm64/fp/kernel-test.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/arm64/fp/kernel-test.c b/tools/testing/selftests/arm64/fp/kernel-test.c index 859345379044..348e8bef62c7 100644 --- a/tools/testing/selftests/arm64/fp/kernel-test.c +++ b/tools/testing/selftests/arm64/fp/kernel-test.c @@ -46,8 +46,7 @@ static void handle_kick_signal(int sig, siginfo_t *info, void *context) } static char *drivers[] = { - "crct10dif-arm64-ce", - /* "crct10dif-arm64-neon", - Same priority as generic */ + "crct10dif-arm64", "sha1-ce", "sha224-arm64", "sha224-arm64-neon", -- cgit v1.2.3 From 615ab43b838bb982dc234feff75ee9ad35447c5d Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 22 Nov 2024 14:24:59 +0100 Subject: tests/pid_namespace: add pid_max tests Signed-off-by: Alexander Mikhalitsyn Link: https://lore.kernel.org/r/20241122132459.135120-3-aleksandr.mikhalitsyn@canonical.com Signed-off-by: Christian Brauner --- tools/testing/selftests/pid_namespace/.gitignore | 1 + tools/testing/selftests/pid_namespace/Makefile | 2 +- tools/testing/selftests/pid_namespace/pid_max.c | 358 +++++++++++++++++++++++ 3 files changed, 360 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/pid_namespace/pid_max.c (limited to 'tools') diff --git a/tools/testing/selftests/pid_namespace/.gitignore b/tools/testing/selftests/pid_namespace/.gitignore index 93ab9d7e5b7e..5118f0f3edf4 100644 --- a/tools/testing/selftests/pid_namespace/.gitignore +++ b/tools/testing/selftests/pid_namespace/.gitignore @@ -1 +1,2 @@ +pid_max regression_enomem diff --git a/tools/testing/selftests/pid_namespace/Makefile b/tools/testing/selftests/pid_namespace/Makefile index 9286a1d22cd3..b972f55d07ae 100644 --- a/tools/testing/selftests/pid_namespace/Makefile +++ b/tools/testing/selftests/pid_namespace/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 CFLAGS += -g $(KHDR_INCLUDES) -TEST_GEN_PROGS = regression_enomem +TEST_GEN_PROGS = regression_enomem pid_max LOCAL_HDRS += $(selfdir)/pidfd/pidfd.h diff --git a/tools/testing/selftests/pid_namespace/pid_max.c b/tools/testing/selftests/pid_namespace/pid_max.c new file mode 100644 index 000000000000..51c414faabb0 --- /dev/null +++ b/tools/testing/selftests/pid_namespace/pid_max.c @@ -0,0 +1,358 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../kselftest_harness.h" +#include "../pidfd/pidfd.h" + +#define __STACK_SIZE (8 * 1024 * 1024) +static pid_t do_clone(int (*fn)(void *), void *arg, int flags) +{ + char *stack; + pid_t ret; + + stack = malloc(__STACK_SIZE); + if (!stack) + return -ENOMEM; + +#ifdef __ia64__ + ret = __clone2(fn, stack, __STACK_SIZE, flags | SIGCHLD, arg); +#else + ret = clone(fn, stack + __STACK_SIZE, flags | SIGCHLD, arg); +#endif + free(stack); + return ret; +} + +static int pid_max_cb(void *data) +{ + int fd, ret; + pid_t pid; + + ret = mount("", "/", NULL, MS_PRIVATE | MS_REC, 0); + if (ret) { + fprintf(stderr, "%m - Failed to make rootfs private mount\n"); + return -1; + } + + umount2("/proc", MNT_DETACH); + + ret = mount("proc", "/proc", "proc", 0, NULL); + if (ret) { + fprintf(stderr, "%m - Failed to mount proc\n"); + return -1; + } + + fd = open("/proc/sys/kernel/pid_max", O_RDWR | O_CLOEXEC | O_NOCTTY); + if (fd < 0) { + fprintf(stderr, "%m - Failed to open pid_max\n"); + return -1; + } + + ret = write(fd, "500", sizeof("500") - 1); + if (ret < 0) { + fprintf(stderr, "%m - Failed to write pid_max\n"); + return -1; + } + + for (int i = 0; i < 501; i++) { + pid = fork(); + if (pid == 0) + exit(EXIT_SUCCESS); + wait_for_pid(pid); + if (pid > 500) { + fprintf(stderr, "Managed to create pid number beyond limit\n"); + return -1; + } + } + + return 0; +} + +static int pid_max_nested_inner(void *data) +{ + int fret = -1; + pid_t pids[2]; + int fd, i, ret; + + ret = mount("", "/", NULL, MS_PRIVATE | MS_REC, 0); + if (ret) { + fprintf(stderr, "%m - Failed to make rootfs private mount\n"); + return fret; + } + + umount2("/proc", MNT_DETACH); + + ret = mount("proc", "/proc", "proc", 0, NULL); + if (ret) { + fprintf(stderr, "%m - Failed to mount proc\n"); + return fret; + } + + fd = open("/proc/sys/kernel/pid_max", O_RDWR | O_CLOEXEC | O_NOCTTY); + if (fd < 0) { + fprintf(stderr, "%m - Failed to open pid_max\n"); + return fret; + } + + ret = write(fd, "500", sizeof("500") - 1); + close(fd); + if (ret < 0) { + fprintf(stderr, "%m - Failed to write pid_max\n"); + return fret; + } + + pids[0] = fork(); + if (pids[0] < 0) { + fprintf(stderr, "Failed to create first new process\n"); + return fret; + } + + if (pids[0] == 0) + exit(EXIT_SUCCESS); + + pids[1] = fork(); + wait_for_pid(pids[0]); + if (pids[1] >= 0) { + if (pids[1] == 0) + exit(EXIT_SUCCESS); + wait_for_pid(pids[1]); + + fprintf(stderr, "Managed to create process even though ancestor pid namespace had a limit\n"); + return fret; + } + + /* Now make sure that we wrap pids at 400. */ + for (i = 0; i < 510; i++) { + pid_t pid; + + pid = fork(); + if (pid < 0) + return fret; + + if (pid == 0) + exit(EXIT_SUCCESS); + + wait_for_pid(pid); + if (pid >= 500) { + fprintf(stderr, "Managed to create process with pid %d beyond configured limit\n", pid); + return fret; + } + } + + return 0; +} + +static int pid_max_nested_outer(void *data) +{ + int fret = -1, nr_procs = 400; + pid_t pids[1000]; + int fd, i, ret; + pid_t pid; + + ret = mount("", "/", NULL, MS_PRIVATE | MS_REC, 0); + if (ret) { + fprintf(stderr, "%m - Failed to make rootfs private mount\n"); + return fret; + } + + umount2("/proc", MNT_DETACH); + + ret = mount("proc", "/proc", "proc", 0, NULL); + if (ret) { + fprintf(stderr, "%m - Failed to mount proc\n"); + return fret; + } + + fd = open("/proc/sys/kernel/pid_max", O_RDWR | O_CLOEXEC | O_NOCTTY); + if (fd < 0) { + fprintf(stderr, "%m - Failed to open pid_max\n"); + return fret; + } + + ret = write(fd, "400", sizeof("400") - 1); + close(fd); + if (ret < 0) { + fprintf(stderr, "%m - Failed to write pid_max\n"); + return fret; + } + + /* + * Create 397 processes. This leaves room for do_clone() (398) and + * one more 399. So creating another process needs to fail. + */ + for (nr_procs = 0; nr_procs < 396; nr_procs++) { + pid = fork(); + if (pid < 0) + goto reap; + + if (pid == 0) + exit(EXIT_SUCCESS); + + pids[nr_procs] = pid; + } + + pid = do_clone(pid_max_nested_inner, NULL, CLONE_NEWPID | CLONE_NEWNS); + if (pid < 0) { + fprintf(stderr, "%m - Failed to clone nested pidns\n"); + goto reap; + } + + if (wait_for_pid(pid)) { + fprintf(stderr, "%m - Nested pid_max failed\n"); + goto reap; + } + + fret = 0; + +reap: + for (int i = 0; i < nr_procs; i++) + wait_for_pid(pids[i]); + + return fret; +} + +static int pid_max_nested_limit_inner(void *data) +{ + int fret = -1, nr_procs = 400; + int fd, ret; + pid_t pid; + pid_t pids[1000]; + + ret = mount("", "/", NULL, MS_PRIVATE | MS_REC, 0); + if (ret) { + fprintf(stderr, "%m - Failed to make rootfs private mount\n"); + return fret; + } + + umount2("/proc", MNT_DETACH); + + ret = mount("proc", "/proc", "proc", 0, NULL); + if (ret) { + fprintf(stderr, "%m - Failed to mount proc\n"); + return fret; + } + + fd = open("/proc/sys/kernel/pid_max", O_RDWR | O_CLOEXEC | O_NOCTTY); + if (fd < 0) { + fprintf(stderr, "%m - Failed to open pid_max\n"); + return fret; + } + + ret = write(fd, "500", sizeof("500") - 1); + close(fd); + if (ret < 0) { + fprintf(stderr, "%m - Failed to write pid_max\n"); + return fret; + } + + for (nr_procs = 0; nr_procs < 500; nr_procs++) { + pid = fork(); + if (pid < 0) + break; + + if (pid == 0) + exit(EXIT_SUCCESS); + + pids[nr_procs] = pid; + } + + if (nr_procs >= 400) { + fprintf(stderr, "Managed to create processes beyond the configured outer limit\n"); + goto reap; + } + + fret = 0; + +reap: + for (int i = 0; i < nr_procs; i++) + wait_for_pid(pids[i]); + + return fret; +} + +static int pid_max_nested_limit_outer(void *data) +{ + int fd, ret; + pid_t pid; + + ret = mount("", "/", NULL, MS_PRIVATE | MS_REC, 0); + if (ret) { + fprintf(stderr, "%m - Failed to make rootfs private mount\n"); + return -1; + } + + umount2("/proc", MNT_DETACH); + + ret = mount("proc", "/proc", "proc", 0, NULL); + if (ret) { + fprintf(stderr, "%m - Failed to mount proc\n"); + return -1; + } + + fd = open("/proc/sys/kernel/pid_max", O_RDWR | O_CLOEXEC | O_NOCTTY); + if (fd < 0) { + fprintf(stderr, "%m - Failed to open pid_max\n"); + return -1; + } + + ret = write(fd, "400", sizeof("400") - 1); + close(fd); + if (ret < 0) { + fprintf(stderr, "%m - Failed to write pid_max\n"); + return -1; + } + + pid = do_clone(pid_max_nested_limit_inner, NULL, CLONE_NEWPID | CLONE_NEWNS); + if (pid < 0) { + fprintf(stderr, "%m - Failed to clone nested pidns\n"); + return -1; + } + + if (wait_for_pid(pid)) { + fprintf(stderr, "%m - Nested pid_max failed\n"); + return -1; + } + + return 0; +} + +TEST(pid_max_simple) +{ + pid_t pid; + + + pid = do_clone(pid_max_cb, NULL, CLONE_NEWPID | CLONE_NEWNS); + ASSERT_GT(pid, 0); + ASSERT_EQ(0, wait_for_pid(pid)); +} + +TEST(pid_max_nested_limit) +{ + pid_t pid; + + pid = do_clone(pid_max_nested_limit_outer, NULL, CLONE_NEWPID | CLONE_NEWNS); + ASSERT_GT(pid, 0); + ASSERT_EQ(0, wait_for_pid(pid)); +} + +TEST(pid_max_nested) +{ + pid_t pid; + + pid = do_clone(pid_max_nested_outer, NULL, CLONE_NEWPID | CLONE_NEWNS); + ASSERT_GT(pid, 0); + ASSERT_EQ(0, wait_for_pid(pid)); +} + +TEST_HARNESS_MAIN -- cgit v1.2.3 From eb449bd96954b1c1e491d19066cfd2a010f0aa47 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Fri, 22 Nov 2024 09:44:15 -0800 Subject: mm: convert mm_lock_seq to a proper seqcount Convert mm_lock_seq to be seqcount_t and change all mmap_write_lock variants to increment it, in-line with the usual seqcount usage pattern. This lets us check whether the mmap_lock is write-locked by checking mm_lock_seq.sequence counter (odd=locked, even=unlocked). This will be used when implementing mmap_lock speculation functions. As a result vm_lock_seq is also change to be unsigned to match the type of mm_lock_seq.sequence. Suggested-by: Peter Zijlstra Signed-off-by: Suren Baghdasaryan Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Liam R. Howlett Link: https://lkml.kernel.org/r/20241122174416.1367052-2-surenb@google.com --- tools/testing/vma/vma.c | 4 ++-- tools/testing/vma/vma_internal.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/vma/vma.c b/tools/testing/vma/vma.c index 8fab5e13c7c3..9bcf1736bf18 100644 --- a/tools/testing/vma/vma.c +++ b/tools/testing/vma/vma.c @@ -89,7 +89,7 @@ static struct vm_area_struct *alloc_and_link_vma(struct mm_struct *mm, * begun. Linking to the tree will have caused this to be incremented, * which means we will get a false positive otherwise. */ - vma->vm_lock_seq = -1; + vma->vm_lock_seq = UINT_MAX; return vma; } @@ -214,7 +214,7 @@ static bool vma_write_started(struct vm_area_struct *vma) int seq = vma->vm_lock_seq; /* We reset after each check. */ - vma->vm_lock_seq = -1; + vma->vm_lock_seq = UINT_MAX; /* The vma_start_write() stub simply increments this value. */ return seq > -1; diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h index e76ff579e1fd..1d9fc97b8e80 100644 --- a/tools/testing/vma/vma_internal.h +++ b/tools/testing/vma/vma_internal.h @@ -241,7 +241,7 @@ struct vm_area_struct { * counter reuse can only lead to occasional unnecessary use of the * slowpath. */ - int vm_lock_seq; + unsigned int vm_lock_seq; struct vma_lock *vm_lock; #endif @@ -416,7 +416,7 @@ static inline bool vma_lock_alloc(struct vm_area_struct *vma) return false; init_rwsem(&vma->vm_lock->lock); - vma->vm_lock_seq = -1; + vma->vm_lock_seq = UINT_MAX; return true; } -- cgit v1.2.3 From 2116b349e29a2e9ba17ea2e45b31234e4b350793 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 28 Nov 2024 10:38:52 +0100 Subject: objtool: Generic annotation infrastructure Avoid endless .discard.foo sections for each annotation, create a single .discard.annotate_insn section that takes an annotation type along with the instruction. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20241128094310.932794537@infradead.org --- tools/objtool/check.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) (limited to 'tools') diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 4ce176ad411f..b0efc8ee16d6 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -2373,6 +2373,49 @@ static int read_unwind_hints(struct objtool_file *file) return 0; } +static int read_annotate(struct objtool_file *file, void (*func)(int type, struct instruction *insn)) +{ + struct section *sec; + struct instruction *insn; + struct reloc *reloc; + int type; + + sec = find_section_by_name(file->elf, ".discard.annotate_insn"); + if (!sec) + return 0; + + if (!sec->rsec) + return 0; + + if (sec->sh.sh_entsize != 8) { + static bool warned = false; + if (!warned) { + WARN("%s: dodgy linker, sh_entsize != 8", sec->name); + warned = true; + } + sec->sh.sh_entsize = 8; + } + + for_each_reloc(sec->rsec, reloc) { + type = *(u32 *)(sec->data->d_buf + (reloc_idx(reloc) * sec->sh.sh_entsize) + 4); + + insn = find_insn(file, reloc->sym->sec, + reloc->sym->offset + reloc_addend(reloc)); + if (!insn) { + WARN("bad .discard.annotate_insn entry: %d of type %d", reloc_idx(reloc), type); + return -1; + } + + func(type, insn); + } + + return 0; +} + +static void __annotate_nop(int type, struct instruction *insn) +{ +} + static int read_noendbr_hints(struct objtool_file *file) { struct instruction *insn; @@ -2670,6 +2713,8 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; + read_annotate(file, __annotate_nop); + /* * Must be before read_unwind_hints() since that needs insn->noendbr. */ -- cgit v1.2.3 From 22c3d58079688b697f36d670616e463cbb14d058 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 28 Nov 2024 10:38:53 +0100 Subject: objtool: Convert ANNOTATE_NOENDBR to ANNOTATE Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20241128094311.042140333@infradead.org --- tools/include/linux/objtool_types.h | 5 +++++ tools/objtool/check.c | 32 +++++--------------------------- 2 files changed, 10 insertions(+), 27 deletions(-) (limited to 'tools') diff --git a/tools/include/linux/objtool_types.h b/tools/include/linux/objtool_types.h index 453a4f4ef39d..4884f8cf8429 100644 --- a/tools/include/linux/objtool_types.h +++ b/tools/include/linux/objtool_types.h @@ -54,4 +54,9 @@ struct unwind_hint { #define UNWIND_HINT_TYPE_SAVE 6 #define UNWIND_HINT_TYPE_RESTORE 7 +/* + * Annotate types + */ +#define ANNOTYPE_NOENDBR 1 + #endif /* _LINUX_OBJTOOL_TYPES_H */ diff --git a/tools/objtool/check.c b/tools/objtool/check.c index b0efc8ee16d6..a74ff26860f7 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -2412,32 +2412,12 @@ static int read_annotate(struct objtool_file *file, void (*func)(int type, struc return 0; } -static void __annotate_nop(int type, struct instruction *insn) +static void __annotate_noendbr(int type, struct instruction *insn) { -} - -static int read_noendbr_hints(struct objtool_file *file) -{ - struct instruction *insn; - struct section *rsec; - struct reloc *reloc; - - rsec = find_section_by_name(file->elf, ".rela.discard.noendbr"); - if (!rsec) - return 0; - - for_each_reloc(rsec, reloc) { - insn = find_insn(file, reloc->sym->sec, - reloc->sym->offset + reloc_addend(reloc)); - if (!insn) { - WARN("bad .discard.noendbr entry"); - return -1; - } - - insn->noendbr = 1; - } + if (type != ANNOTYPE_NOENDBR) + return; - return 0; + insn->noendbr = 1; } static int read_retpoline_hints(struct objtool_file *file) @@ -2713,12 +2693,10 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; - read_annotate(file, __annotate_nop); - /* * Must be before read_unwind_hints() since that needs insn->noendbr. */ - ret = read_noendbr_hints(file); + ret = read_annotate(file, __annotate_noendbr); if (ret) return ret; -- cgit v1.2.3 From bf5febebd99fddfc6226a94e937d38a8d470b24e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 28 Nov 2024 10:38:54 +0100 Subject: objtool: Convert ANNOTATE_RETPOLINE_SAFE to ANNOTATE Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20241128094311.145275669@infradead.org --- tools/include/linux/objtool_types.h | 1 + tools/objtool/check.c | 52 +++++++++++++------------------------ 2 files changed, 19 insertions(+), 34 deletions(-) (limited to 'tools') diff --git a/tools/include/linux/objtool_types.h b/tools/include/linux/objtool_types.h index 4884f8cf8429..1b348361ad1d 100644 --- a/tools/include/linux/objtool_types.h +++ b/tools/include/linux/objtool_types.h @@ -58,5 +58,6 @@ struct unwind_hint { * Annotate types */ #define ANNOTYPE_NOENDBR 1 +#define ANNOTYPE_RETPOLINE_SAFE 2 #endif /* _LINUX_OBJTOOL_TYPES_H */ diff --git a/tools/objtool/check.c b/tools/objtool/check.c index a74ff26860f7..c5b52309b80d 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -2373,12 +2373,12 @@ static int read_unwind_hints(struct objtool_file *file) return 0; } -static int read_annotate(struct objtool_file *file, void (*func)(int type, struct instruction *insn)) +static int read_annotate(struct objtool_file *file, int (*func)(int type, struct instruction *insn)) { struct section *sec; struct instruction *insn; struct reloc *reloc; - int type; + int type, ret; sec = find_section_by_name(file->elf, ".discard.annotate_insn"); if (!sec) @@ -2406,53 +2406,37 @@ static int read_annotate(struct objtool_file *file, void (*func)(int type, struc return -1; } - func(type, insn); + ret = func(type, insn); + if (ret < 0) + return ret; } return 0; } -static void __annotate_noendbr(int type, struct instruction *insn) +static int __annotate_noendbr(int type, struct instruction *insn) { if (type != ANNOTYPE_NOENDBR) - return; + return 0; insn->noendbr = 1; + return 0; } -static int read_retpoline_hints(struct objtool_file *file) +static int __annotate_retpoline_safe(int type, struct instruction *insn) { - struct section *rsec; - struct instruction *insn; - struct reloc *reloc; - - rsec = find_section_by_name(file->elf, ".rela.discard.retpoline_safe"); - if (!rsec) + if (type != ANNOTYPE_RETPOLINE_SAFE) return 0; - for_each_reloc(rsec, reloc) { - if (reloc->sym->type != STT_SECTION) { - WARN("unexpected relocation symbol type in %s", rsec->name); - return -1; - } - - insn = find_insn(file, reloc->sym->sec, reloc_addend(reloc)); - if (!insn) { - WARN("bad .discard.retpoline_safe entry"); - return -1; - } - - if (insn->type != INSN_JUMP_DYNAMIC && - insn->type != INSN_CALL_DYNAMIC && - insn->type != INSN_RETURN && - insn->type != INSN_NOP) { - WARN_INSN(insn, "retpoline_safe hint not an indirect jump/call/ret/nop"); - return -1; - } - - insn->retpoline_safe = true; + if (insn->type != INSN_JUMP_DYNAMIC && + insn->type != INSN_CALL_DYNAMIC && + insn->type != INSN_RETURN && + insn->type != INSN_NOP) { + WARN_INSN(insn, "retpoline_safe hint not an indirect jump/call/ret/nop"); + return -1; } + insn->retpoline_safe = true; return 0; } @@ -2742,7 +2726,7 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; - ret = read_retpoline_hints(file); + ret = read_annotate(file, __annotate_retpoline_safe); if (ret) return ret; -- cgit v1.2.3 From 317f2a64618c528539d17fe6957a64106087fbd2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 28 Nov 2024 10:38:55 +0100 Subject: objtool: Convert instrumentation_{begin,end}() to ANNOTATE Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20241128094311.245980207@infradead.org --- tools/include/linux/objtool_types.h | 2 ++ tools/objtool/check.c | 49 ++++++++----------------------------- 2 files changed, 12 insertions(+), 39 deletions(-) (limited to 'tools') diff --git a/tools/include/linux/objtool_types.h b/tools/include/linux/objtool_types.h index 1b348361ad1d..d4d68dd36f7a 100644 --- a/tools/include/linux/objtool_types.h +++ b/tools/include/linux/objtool_types.h @@ -59,5 +59,7 @@ struct unwind_hint { */ #define ANNOTYPE_NOENDBR 1 #define ANNOTYPE_RETPOLINE_SAFE 2 +#define ANNOTYPE_INSTR_BEGIN 3 +#define ANNOTYPE_INSTR_END 4 #endif /* _LINUX_OBJTOOL_TYPES_H */ diff --git a/tools/objtool/check.c b/tools/objtool/check.c index c5b52309b80d..8e39c7f484d8 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -2440,48 +2440,19 @@ static int __annotate_retpoline_safe(int type, struct instruction *insn) return 0; } -static int read_instr_hints(struct objtool_file *file) +static int __annotate_instr(int type, struct instruction *insn) { - struct section *rsec; - struct instruction *insn; - struct reloc *reloc; - - rsec = find_section_by_name(file->elf, ".rela.discard.instr_end"); - if (!rsec) - return 0; - - for_each_reloc(rsec, reloc) { - if (reloc->sym->type != STT_SECTION) { - WARN("unexpected relocation symbol type in %s", rsec->name); - return -1; - } - - insn = find_insn(file, reloc->sym->sec, reloc_addend(reloc)); - if (!insn) { - WARN("bad .discard.instr_end entry"); - return -1; - } + switch (type) { + case ANNOTYPE_INSTR_BEGIN: + insn->instr++; + break; + case ANNOTYPE_INSTR_END: insn->instr--; - } - - rsec = find_section_by_name(file->elf, ".rela.discard.instr_begin"); - if (!rsec) - return 0; - - for_each_reloc(rsec, reloc) { - if (reloc->sym->type != STT_SECTION) { - WARN("unexpected relocation symbol type in %s", rsec->name); - return -1; - } - - insn = find_insn(file, reloc->sym->sec, reloc_addend(reloc)); - if (!insn) { - WARN("bad .discard.instr_begin entry"); - return -1; - } + break; - insn->instr++; + default: + break; } return 0; @@ -2730,7 +2701,7 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; - ret = read_instr_hints(file); + ret = read_annotate(file, __annotate_instr); if (ret) return ret; -- cgit v1.2.3 From 18aa6118a1689b4d73c5ebbd917ae3f20c9c0db1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 28 Nov 2024 10:38:56 +0100 Subject: objtool: Convert VALIDATE_UNRET_BEGIN to ANNOTATE Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20241128094311.358508242@infradead.org --- tools/include/linux/objtool_types.h | 1 + tools/objtool/check.c | 28 +++++----------------------- 2 files changed, 6 insertions(+), 23 deletions(-) (limited to 'tools') diff --git a/tools/include/linux/objtool_types.h b/tools/include/linux/objtool_types.h index d4d68dd36f7a..16236a56364b 100644 --- a/tools/include/linux/objtool_types.h +++ b/tools/include/linux/objtool_types.h @@ -61,5 +61,6 @@ struct unwind_hint { #define ANNOTYPE_RETPOLINE_SAFE 2 #define ANNOTYPE_INSTR_BEGIN 3 #define ANNOTYPE_INSTR_END 4 +#define ANNOTYPE_UNRET_BEGIN 5 #endif /* _LINUX_OBJTOOL_TYPES_H */ diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 8e39c7f484d8..2a703748cad1 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -2458,33 +2458,15 @@ static int __annotate_instr(int type, struct instruction *insn) return 0; } -static int read_validate_unret_hints(struct objtool_file *file) +static int __annotate_unret(int type, struct instruction *insn) { - struct section *rsec; - struct instruction *insn; - struct reloc *reloc; - - rsec = find_section_by_name(file->elf, ".rela.discard.validate_unret"); - if (!rsec) + if (type != ANNOTYPE_UNRET_BEGIN) return 0; - for_each_reloc(rsec, reloc) { - if (reloc->sym->type != STT_SECTION) { - WARN("unexpected relocation symbol type in %s", rsec->name); - return -1; - } - - insn = find_insn(file, reloc->sym->sec, reloc_addend(reloc)); - if (!insn) { - WARN("bad .discard.instr_end entry"); - return -1; - } - insn->unret = 1; - } - + insn->unret = 1; return 0; -} +} static int read_intra_function_calls(struct objtool_file *file) { @@ -2705,7 +2687,7 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; - ret = read_validate_unret_hints(file); + ret = read_annotate(file, __annotate_unret); if (ret) return ret; -- cgit v1.2.3 From f0cd57c35a75f152d3b31b9be3f7f413b96a6d3f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 28 Nov 2024 10:38:57 +0100 Subject: objtool: Convert ANNOTATE_IGNORE_ALTERNATIVE to ANNOTATE Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20241128094311.465691316@infradead.org --- tools/include/linux/objtool_types.h | 1 + tools/objtool/check.c | 45 +++++++++---------------------------- 2 files changed, 11 insertions(+), 35 deletions(-) (limited to 'tools') diff --git a/tools/include/linux/objtool_types.h b/tools/include/linux/objtool_types.h index 16236a56364b..eab15dbe1cb7 100644 --- a/tools/include/linux/objtool_types.h +++ b/tools/include/linux/objtool_types.h @@ -62,5 +62,6 @@ struct unwind_hint { #define ANNOTYPE_INSTR_BEGIN 3 #define ANNOTYPE_INSTR_END 4 #define ANNOTYPE_UNRET_BEGIN 5 +#define ANNOTYPE_IGNORE_ALTS 6 #endif /* _LINUX_OBJTOOL_TYPES_H */ diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 2a703748cad1..ba2cb9b69399 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1309,40 +1309,6 @@ static void add_uaccess_safe(struct objtool_file *file) } } -/* - * FIXME: For now, just ignore any alternatives which add retpolines. This is - * a temporary hack, as it doesn't allow ORC to unwind from inside a retpoline. - * But it at least allows objtool to understand the control flow *around* the - * retpoline. - */ -static int add_ignore_alternatives(struct objtool_file *file) -{ - struct section *rsec; - struct reloc *reloc; - struct instruction *insn; - - rsec = find_section_by_name(file->elf, ".rela.discard.ignore_alts"); - if (!rsec) - return 0; - - for_each_reloc(rsec, reloc) { - if (reloc->sym->type != STT_SECTION) { - WARN("unexpected relocation symbol type in %s", rsec->name); - return -1; - } - - insn = find_insn(file, reloc->sym->sec, reloc_addend(reloc)); - if (!insn) { - WARN("bad .discard.ignore_alts entry"); - return -1; - } - - insn->ignore_alts = true; - } - - return 0; -} - /* * Symbols that replace INSN_CALL_DYNAMIC, every (tail) call to such a symbol * will be added to the .retpoline_sites section. @@ -2414,6 +2380,15 @@ static int read_annotate(struct objtool_file *file, int (*func)(int type, struct return 0; } +static int __annotate_ignore_alts(int type, struct instruction *insn) +{ + if (type != ANNOTYPE_IGNORE_ALTS) + return 0; + + insn->ignore_alts = true; + return 0; +} + static int __annotate_noendbr(int type, struct instruction *insn) { if (type != ANNOTYPE_NOENDBR) @@ -2626,7 +2601,7 @@ static int decode_sections(struct objtool_file *file) add_ignores(file); add_uaccess_safe(file); - ret = add_ignore_alternatives(file); + ret = read_annotate(file, __annotate_ignore_alts); if (ret) return ret; -- cgit v1.2.3 From 112765ca1cb9353e71b4f5af4e6e6c4a69c28d99 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 28 Nov 2024 10:38:58 +0100 Subject: objtool: Convert ANNOTATE_INTRA_FUNCTION_CALL to ANNOTATE Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20241128094311.584892071@infradead.org --- tools/include/linux/objtool_types.h | 1 + tools/objtool/check.c | 96 +++++++++++++++---------------------- 2 files changed, 40 insertions(+), 57 deletions(-) (limited to 'tools') diff --git a/tools/include/linux/objtool_types.h b/tools/include/linux/objtool_types.h index eab15dbe1cb7..23d6fb6d04c7 100644 --- a/tools/include/linux/objtool_types.h +++ b/tools/include/linux/objtool_types.h @@ -63,5 +63,6 @@ struct unwind_hint { #define ANNOTYPE_INSTR_END 4 #define ANNOTYPE_UNRET_BEGIN 5 #define ANNOTYPE_IGNORE_ALTS 6 +#define ANNOTYPE_INTRA_FUNCTION_CALL 7 #endif /* _LINUX_OBJTOOL_TYPES_H */ diff --git a/tools/objtool/check.c b/tools/objtool/check.c index ba2cb9b69399..2222fe710832 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -2339,7 +2339,8 @@ static int read_unwind_hints(struct objtool_file *file) return 0; } -static int read_annotate(struct objtool_file *file, int (*func)(int type, struct instruction *insn)) +static int read_annotate(struct objtool_file *file, + int (*func)(struct objtool_file *file, int type, struct instruction *insn)) { struct section *sec; struct instruction *insn; @@ -2372,7 +2373,7 @@ static int read_annotate(struct objtool_file *file, int (*func)(int type, struct return -1; } - ret = func(type, insn); + ret = func(file, type, insn); if (ret < 0) return ret; } @@ -2380,7 +2381,7 @@ static int read_annotate(struct objtool_file *file, int (*func)(int type, struct return 0; } -static int __annotate_ignore_alts(int type, struct instruction *insn) +static int __annotate_ignore_alts(struct objtool_file *file, int type, struct instruction *insn) { if (type != ANNOTYPE_IGNORE_ALTS) return 0; @@ -2389,7 +2390,7 @@ static int __annotate_ignore_alts(int type, struct instruction *insn) return 0; } -static int __annotate_noendbr(int type, struct instruction *insn) +static int __annotate_noendbr(struct objtool_file *file, int type, struct instruction *insn) { if (type != ANNOTYPE_NOENDBR) return 0; @@ -2398,7 +2399,37 @@ static int __annotate_noendbr(int type, struct instruction *insn) return 0; } -static int __annotate_retpoline_safe(int type, struct instruction *insn) +static int __annotate_ifc(struct objtool_file *file, int type, struct instruction *insn) +{ + unsigned long dest_off; + + if (type != ANNOTYPE_INTRA_FUNCTION_CALL) + return 0; + + if (insn->type != INSN_CALL) { + WARN_INSN(insn, "intra_function_call not a direct call"); + return -1; + } + + /* + * Treat intra-function CALLs as JMPs, but with a stack_op. + * See add_call_destinations(), which strips stack_ops from + * normal CALLs. + */ + insn->type = INSN_JUMP_UNCONDITIONAL; + + dest_off = arch_jump_destination(insn); + insn->jump_dest = find_insn(file, insn->sec, dest_off); + if (!insn->jump_dest) { + WARN_INSN(insn, "can't find call dest at %s+0x%lx", + insn->sec->name, dest_off); + return -1; + } + + return 0; +} + +static int __annotate_retpoline_safe(struct objtool_file *file, int type, struct instruction *insn) { if (type != ANNOTYPE_RETPOLINE_SAFE) return 0; @@ -2415,7 +2446,7 @@ static int __annotate_retpoline_safe(int type, struct instruction *insn) return 0; } -static int __annotate_instr(int type, struct instruction *insn) +static int __annotate_instr(struct objtool_file *file, int type, struct instruction *insn) { switch (type) { case ANNOTYPE_INSTR_BEGIN: @@ -2433,7 +2464,7 @@ static int __annotate_instr(int type, struct instruction *insn) return 0; } -static int __annotate_unret(int type, struct instruction *insn) +static int __annotate_unret(struct objtool_file *file, int type, struct instruction *insn) { if (type != ANNOTYPE_UNRET_BEGIN) return 0; @@ -2443,55 +2474,6 @@ static int __annotate_unret(int type, struct instruction *insn) } -static int read_intra_function_calls(struct objtool_file *file) -{ - struct instruction *insn; - struct section *rsec; - struct reloc *reloc; - - rsec = find_section_by_name(file->elf, ".rela.discard.intra_function_calls"); - if (!rsec) - return 0; - - for_each_reloc(rsec, reloc) { - unsigned long dest_off; - - if (reloc->sym->type != STT_SECTION) { - WARN("unexpected relocation symbol type in %s", - rsec->name); - return -1; - } - - insn = find_insn(file, reloc->sym->sec, reloc_addend(reloc)); - if (!insn) { - WARN("bad .discard.intra_function_call entry"); - return -1; - } - - if (insn->type != INSN_CALL) { - WARN_INSN(insn, "intra_function_call not a direct call"); - return -1; - } - - /* - * Treat intra-function CALLs as JMPs, but with a stack_op. - * See add_call_destinations(), which strips stack_ops from - * normal CALLs. - */ - insn->type = INSN_JUMP_UNCONDITIONAL; - - dest_off = arch_jump_destination(insn); - insn->jump_dest = find_insn(file, insn->sec, dest_off); - if (!insn->jump_dest) { - WARN_INSN(insn, "can't find call dest at %s+0x%lx", - insn->sec->name, dest_off); - return -1; - } - } - - return 0; -} - /* * Return true if name matches an instrumentation function, where calls to that * function from noinstr code can safely be removed, but compilers won't do so. @@ -2630,7 +2612,7 @@ static int decode_sections(struct objtool_file *file) * Must be before add_call_destination(); it changes INSN_CALL to * INSN_JUMP. */ - ret = read_intra_function_calls(file); + ret = read_annotate(file, __annotate_ifc); if (ret) return ret; -- cgit v1.2.3 From a8a330dd9900024dc18b048c4f0f3c6ad22ff4c1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 28 Nov 2024 10:38:59 +0100 Subject: objtool: Collapse annotate sequences Reduce read_annotate() runs by collapsing subsequent runs into a single call. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20241128094311.688871544@infradead.org --- tools/objtool/check.c | 87 +++++++++++++++++++-------------------------------- 1 file changed, 32 insertions(+), 55 deletions(-) (limited to 'tools') diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 2222fe710832..3bea8b2963d3 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -2381,21 +2381,24 @@ static int read_annotate(struct objtool_file *file, return 0; } -static int __annotate_ignore_alts(struct objtool_file *file, int type, struct instruction *insn) +static int __annotate_early(struct objtool_file *file, int type, struct instruction *insn) { - if (type != ANNOTYPE_IGNORE_ALTS) - return 0; + switch (type) { + case ANNOTYPE_IGNORE_ALTS: + insn->ignore_alts = true; + break; - insn->ignore_alts = true; - return 0; -} + /* + * Must be before read_unwind_hints() since that needs insn->noendbr. + */ + case ANNOTYPE_NOENDBR: + insn->noendbr = 1; + break; -static int __annotate_noendbr(struct objtool_file *file, int type, struct instruction *insn) -{ - if (type != ANNOTYPE_NOENDBR) - return 0; + default: + break; + } - insn->noendbr = 1; return 0; } @@ -2429,26 +2432,21 @@ static int __annotate_ifc(struct objtool_file *file, int type, struct instructio return 0; } -static int __annotate_retpoline_safe(struct objtool_file *file, int type, struct instruction *insn) +static int __annotate_late(struct objtool_file *file, int type, struct instruction *insn) { - if (type != ANNOTYPE_RETPOLINE_SAFE) - return 0; - - if (insn->type != INSN_JUMP_DYNAMIC && - insn->type != INSN_CALL_DYNAMIC && - insn->type != INSN_RETURN && - insn->type != INSN_NOP) { - WARN_INSN(insn, "retpoline_safe hint not an indirect jump/call/ret/nop"); - return -1; - } + switch (type) { + case ANNOTYPE_RETPOLINE_SAFE: + if (insn->type != INSN_JUMP_DYNAMIC && + insn->type != INSN_CALL_DYNAMIC && + insn->type != INSN_RETURN && + insn->type != INSN_NOP) { + WARN_INSN(insn, "retpoline_safe hint not an indirect jump/call/ret/nop"); + return -1; + } - insn->retpoline_safe = true; - return 0; -} + insn->retpoline_safe = true; + break; -static int __annotate_instr(struct objtool_file *file, int type, struct instruction *insn) -{ - switch (type) { case ANNOTYPE_INSTR_BEGIN: insn->instr++; break; @@ -2457,6 +2455,10 @@ static int __annotate_instr(struct objtool_file *file, int type, struct instruct insn->instr--; break; + case ANNOTYPE_UNRET_BEGIN: + insn->unret = 1; + break; + default: break; } @@ -2464,16 +2466,6 @@ static int __annotate_instr(struct objtool_file *file, int type, struct instruct return 0; } -static int __annotate_unret(struct objtool_file *file, int type, struct instruction *insn) -{ - if (type != ANNOTYPE_UNRET_BEGIN) - return 0; - - insn->unret = 1; - return 0; - -} - /* * Return true if name matches an instrumentation function, where calls to that * function from noinstr code can safely be removed, but compilers won't do so. @@ -2583,14 +2575,7 @@ static int decode_sections(struct objtool_file *file) add_ignores(file); add_uaccess_safe(file); - ret = read_annotate(file, __annotate_ignore_alts); - if (ret) - return ret; - - /* - * Must be before read_unwind_hints() since that needs insn->noendbr. - */ - ret = read_annotate(file, __annotate_noendbr); + ret = read_annotate(file, __annotate_early); if (ret) return ret; @@ -2636,15 +2621,7 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; - ret = read_annotate(file, __annotate_retpoline_safe); - if (ret) - return ret; - - ret = read_annotate(file, __annotate_instr); - if (ret) - return ret; - - ret = read_annotate(file, __annotate_unret); + ret = read_annotate(file, __annotate_late); if (ret) return ret; -- cgit v1.2.3 From 06e24745985c8dd0da18337503afcf2f2fdbdff1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 28 Nov 2024 10:39:04 +0100 Subject: objtool: Remove annotate_{,un}reachable() There are no users of annotate_reachable() left. And the annotate_unreachable() usage in unreachable() is plain wrong; it will hide dangerous fall-through code-gen. Remove both. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20241128094312.235637588@infradead.org --- tools/objtool/check.c | 43 ++----------------------------------------- 1 file changed, 2 insertions(+), 41 deletions(-) (limited to 'tools') diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 3bea8b2963d3..798cff5bffc4 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -638,47 +638,8 @@ static int add_dead_ends(struct objtool_file *file) uint64_t offset; /* - * Check for manually annotated dead ends. - */ - rsec = find_section_by_name(file->elf, ".rela.discard.unreachable"); - if (!rsec) - goto reachable; - - for_each_reloc(rsec, reloc) { - if (reloc->sym->type == STT_SECTION) { - offset = reloc_addend(reloc); - } else if (reloc->sym->local_label) { - offset = reloc->sym->offset; - } else { - WARN("unexpected relocation symbol type in %s", rsec->name); - return -1; - } - - insn = find_insn(file, reloc->sym->sec, offset); - if (insn) - insn = prev_insn_same_sec(file, insn); - else if (offset == reloc->sym->sec->sh.sh_size) { - insn = find_last_insn(file, reloc->sym->sec); - if (!insn) { - WARN("can't find unreachable insn at %s+0x%" PRIx64, - reloc->sym->sec->name, offset); - return -1; - } - } else { - WARN("can't find unreachable insn at %s+0x%" PRIx64, - reloc->sym->sec->name, offset); - return -1; - } - - insn->dead_end = true; - } - -reachable: - /* - * These manually annotated reachable checks are needed for GCC 4.4, - * where the Linux unreachable() macro isn't supported. In that case - * GCC doesn't know the "ud2" is fatal, so it generates code as if it's - * not a dead end. + * UD2 defaults to being a dead-end, allow them to be annotated for + * non-fatal, eg WARN. */ rsec = find_section_by_name(file->elf, ".rela.discard.reachable"); if (!rsec) -- cgit v1.2.3 From e7a174fb43d24adca066e82d1cb9fdee092d48d1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 28 Nov 2024 10:39:05 +0100 Subject: objtool: Convert {.UN}REACHABLE to ANNOTATE Suggested-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20241128094312.353431347@infradead.org --- tools/include/linux/objtool_types.h | 1 + tools/objtool/check.c | 82 ++++++++++--------------------------- 2 files changed, 23 insertions(+), 60 deletions(-) (limited to 'tools') diff --git a/tools/include/linux/objtool_types.h b/tools/include/linux/objtool_types.h index 23d6fb6d04c7..df5d9fa84dba 100644 --- a/tools/include/linux/objtool_types.h +++ b/tools/include/linux/objtool_types.h @@ -64,5 +64,6 @@ struct unwind_hint { #define ANNOTYPE_UNRET_BEGIN 5 #define ANNOTYPE_IGNORE_ALTS 6 #define ANNOTYPE_INTRA_FUNCTION_CALL 7 +#define ANNOTYPE_REACHABLE 8 #endif /* _LINUX_OBJTOOL_TYPES_H */ diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 798cff5bffc4..27d0c4153582 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -627,56 +627,6 @@ static struct instruction *find_last_insn(struct objtool_file *file, return insn; } -/* - * Mark "ud2" instructions and manually annotated dead ends. - */ -static int add_dead_ends(struct objtool_file *file) -{ - struct section *rsec; - struct reloc *reloc; - struct instruction *insn; - uint64_t offset; - - /* - * UD2 defaults to being a dead-end, allow them to be annotated for - * non-fatal, eg WARN. - */ - rsec = find_section_by_name(file->elf, ".rela.discard.reachable"); - if (!rsec) - return 0; - - for_each_reloc(rsec, reloc) { - if (reloc->sym->type == STT_SECTION) { - offset = reloc_addend(reloc); - } else if (reloc->sym->local_label) { - offset = reloc->sym->offset; - } else { - WARN("unexpected relocation symbol type in %s", rsec->name); - return -1; - } - - insn = find_insn(file, reloc->sym->sec, offset); - if (insn) - insn = prev_insn_same_sec(file, insn); - else if (offset == reloc->sym->sec->sh.sh_size) { - insn = find_last_insn(file, reloc->sym->sec); - if (!insn) { - WARN("can't find reachable insn at %s+0x%" PRIx64, - reloc->sym->sec->name, offset); - return -1; - } - } else { - WARN("can't find reachable insn at %s+0x%" PRIx64, - reloc->sym->sec->name, offset); - return -1; - } - - insn->dead_end = false; - } - - return 0; -} - static int create_static_call_sections(struct objtool_file *file) { struct static_call_site *site; @@ -2306,6 +2256,7 @@ static int read_annotate(struct objtool_file *file, struct section *sec; struct instruction *insn; struct reloc *reloc; + uint64_t offset; int type, ret; sec = find_section_by_name(file->elf, ".discard.annotate_insn"); @@ -2327,8 +2278,19 @@ static int read_annotate(struct objtool_file *file, for_each_reloc(sec->rsec, reloc) { type = *(u32 *)(sec->data->d_buf + (reloc_idx(reloc) * sec->sh.sh_entsize) + 4); - insn = find_insn(file, reloc->sym->sec, - reloc->sym->offset + reloc_addend(reloc)); + offset = reloc->sym->offset + reloc_addend(reloc); + insn = find_insn(file, reloc->sym->sec, offset); + + /* + * Reachable annotations are 'funneh' and act on the previous instruction :/ + */ + if (type == ANNOTYPE_REACHABLE) { + if (insn) + insn = prev_insn_same_sec(file, insn); + else if (offset == reloc->sym->sec->sh.sh_size) + insn = find_last_insn(file, reloc->sym->sec); + } + if (!insn) { WARN("bad .discard.annotate_insn entry: %d of type %d", reloc_idx(reloc), type); return -1; @@ -2420,6 +2382,10 @@ static int __annotate_late(struct objtool_file *file, int type, struct instructi insn->unret = 1; break; + case ANNOTYPE_REACHABLE: + insn->dead_end = false; + break; + default: break; } @@ -2566,14 +2532,6 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; - /* - * Must be after add_call_destinations() such that it can override - * dead_end_function() marks. - */ - ret = add_dead_ends(file); - if (ret) - return ret; - ret = add_jump_table_alts(file); if (ret) return ret; @@ -2582,6 +2540,10 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; + /* + * Must be after add_call_destinations() such that it can override + * dead_end_function() marks. + */ ret = read_annotate(file, __annotate_late); if (ret) return ret; -- cgit v1.2.3 From 87116ae6da034242baf06e799f9f0e2a8ee6a796 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 28 Nov 2024 10:39:06 +0100 Subject: objtool: Fix ANNOTATE_REACHABLE to be a normal annotation Currently REACHABLE is weird for being on the instruction after the instruction it modifies. Since all REACHABLE annotations have an explicit instruction, flip them around. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20241128094312.494176035@infradead.org --- tools/objtool/check.c | 23 ----------------------- 1 file changed, 23 deletions(-) (limited to 'tools') diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 27d0c4153582..26bdd3ebf5d2 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -614,19 +614,6 @@ static int init_pv_ops(struct objtool_file *file) return 0; } -static struct instruction *find_last_insn(struct objtool_file *file, - struct section *sec) -{ - struct instruction *insn = NULL; - unsigned int offset; - unsigned int end = (sec->sh.sh_size > 10) ? sec->sh.sh_size - 10 : 0; - - for (offset = sec->sh.sh_size - 1; offset >= end && !insn; offset--) - insn = find_insn(file, sec, offset); - - return insn; -} - static int create_static_call_sections(struct objtool_file *file) { struct static_call_site *site; @@ -2281,16 +2268,6 @@ static int read_annotate(struct objtool_file *file, offset = reloc->sym->offset + reloc_addend(reloc); insn = find_insn(file, reloc->sym->sec, offset); - /* - * Reachable annotations are 'funneh' and act on the previous instruction :/ - */ - if (type == ANNOTYPE_REACHABLE) { - if (insn) - insn = prev_insn_same_sec(file, insn); - else if (offset == reloc->sym->sec->sh.sh_size) - insn = find_last_insn(file, reloc->sym->sec); - } - if (!insn) { WARN("bad .discard.annotate_insn entry: %d of type %d", reloc_idx(reloc), type); return -1; -- cgit v1.2.3 From e7e0eb53c2f0f68fe2577472ce2802a4efd9d7ce Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 28 Nov 2024 10:39:07 +0100 Subject: objtool: Warn about unknown annotation types Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20241128094312.611961175@infradead.org --- tools/objtool/check.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'tools') diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 26bdd3ebf5d2..bfb407f3ac96 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -2335,6 +2335,10 @@ static int __annotate_ifc(struct objtool_file *file, int type, struct instructio static int __annotate_late(struct objtool_file *file, int type, struct instruction *insn) { switch (type) { + case ANNOTYPE_NOENDBR: + /* early */ + break; + case ANNOTYPE_RETPOLINE_SAFE: if (insn->type != INSN_JUMP_DYNAMIC && insn->type != INSN_CALL_DYNAMIC && @@ -2359,11 +2363,20 @@ static int __annotate_late(struct objtool_file *file, int type, struct instructi insn->unret = 1; break; + case ANNOTYPE_IGNORE_ALTS: + /* early */ + break; + + case ANNOTYPE_INTRA_FUNCTION_CALL: + /* ifc */ + break; + case ANNOTYPE_REACHABLE: insn->dead_end = false; break; default: + WARN_INSN(insn, "Unknown annotation type: %d", type); break; } -- cgit v1.2.3 From c3cb6c158c64dc39838208d51dcd06d1990b371d Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 11 Oct 2024 19:08:50 +0200 Subject: objtool: Allow arch code to discover jump table size In preparation for adding support for annotated jump tables, where ELF relocations and symbols are used to describe the locations of jump tables in the executable, refactor the jump table discovery logic so the table size can be returned from arch_find_switch_table(). Signed-off-by: Ard Biesheuvel Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20241011170847.334429-12-ardb+git@google.com --- tools/objtool/arch/loongarch/special.c | 3 ++- tools/objtool/arch/powerpc/special.c | 3 ++- tools/objtool/arch/x86/special.c | 4 +++- tools/objtool/check.c | 31 ++++++++++++++++++++----------- tools/objtool/include/objtool/check.h | 5 ++++- tools/objtool/include/objtool/special.h | 3 ++- 6 files changed, 33 insertions(+), 16 deletions(-) (limited to 'tools') diff --git a/tools/objtool/arch/loongarch/special.c b/tools/objtool/arch/loongarch/special.c index 9bba1e9318e0..87230ed570fd 100644 --- a/tools/objtool/arch/loongarch/special.c +++ b/tools/objtool/arch/loongarch/special.c @@ -9,7 +9,8 @@ bool arch_support_alt_relocation(struct special_alt *special_alt, } struct reloc *arch_find_switch_table(struct objtool_file *file, - struct instruction *insn) + struct instruction *insn, + unsigned long *table_size) { return NULL; } diff --git a/tools/objtool/arch/powerpc/special.c b/tools/objtool/arch/powerpc/special.c index d33868147196..51610689abf7 100644 --- a/tools/objtool/arch/powerpc/special.c +++ b/tools/objtool/arch/powerpc/special.c @@ -13,7 +13,8 @@ bool arch_support_alt_relocation(struct special_alt *special_alt, } struct reloc *arch_find_switch_table(struct objtool_file *file, - struct instruction *insn) + struct instruction *insn, + unsigned long *table_size) { exit(-1); } diff --git a/tools/objtool/arch/x86/special.c b/tools/objtool/arch/x86/special.c index 4ea0f9815fda..9c1c9df09aaa 100644 --- a/tools/objtool/arch/x86/special.c +++ b/tools/objtool/arch/x86/special.c @@ -109,7 +109,8 @@ bool arch_support_alt_relocation(struct special_alt *special_alt, * NOTE: MITIGATION_RETPOLINE made it harder still to decode dynamic jumps. */ struct reloc *arch_find_switch_table(struct objtool_file *file, - struct instruction *insn) + struct instruction *insn, + unsigned long *table_size) { struct reloc *text_reloc, *rodata_reloc; struct section *table_sec; @@ -158,5 +159,6 @@ struct reloc *arch_find_switch_table(struct objtool_file *file, if (reloc_type(text_reloc) == R_X86_64_PC32) file->ignore_unreachables = true; + *table_size = 0; return rodata_reloc; } diff --git a/tools/objtool/check.c b/tools/objtool/check.c index bfb407f3ac96..e92c5564d9ca 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -150,6 +150,15 @@ static inline struct reloc *insn_jump_table(struct instruction *insn) return NULL; } +static inline unsigned long insn_jump_table_size(struct instruction *insn) +{ + if (insn->type == INSN_JUMP_DYNAMIC || + insn->type == INSN_CALL_DYNAMIC) + return insn->_jump_table_size; + + return 0; +} + static bool is_jump_table_jump(struct instruction *insn) { struct alt_group *alt_group = insn->alt_group; @@ -1937,6 +1946,7 @@ out: static int add_jump_table(struct objtool_file *file, struct instruction *insn, struct reloc *next_table) { + unsigned long table_size = insn_jump_table_size(insn); struct symbol *pfunc = insn_func(insn)->pfunc; struct reloc *table = insn_jump_table(insn); struct instruction *dest_insn; @@ -1951,6 +1961,8 @@ static int add_jump_table(struct objtool_file *file, struct instruction *insn, for_each_reloc_from(table->sec, reloc) { /* Check for the end of the table: */ + if (table_size && reloc_offset(reloc) - reloc_offset(table) >= table_size) + break; if (reloc != table && reloc == next_table) break; @@ -1995,12 +2007,12 @@ static int add_jump_table(struct objtool_file *file, struct instruction *insn, * find_jump_table() - Given a dynamic jump, find the switch jump table * associated with it. */ -static struct reloc *find_jump_table(struct objtool_file *file, - struct symbol *func, - struct instruction *insn) +static void find_jump_table(struct objtool_file *file, struct symbol *func, + struct instruction *insn) { struct reloc *table_reloc; struct instruction *dest_insn, *orig_insn = insn; + unsigned long table_size; /* * Backward search using the @first_jump_src links, these help avoid @@ -2021,17 +2033,17 @@ static struct reloc *find_jump_table(struct objtool_file *file, insn->jump_dest->offset > orig_insn->offset)) break; - table_reloc = arch_find_switch_table(file, insn); + table_reloc = arch_find_switch_table(file, insn, &table_size); if (!table_reloc) continue; dest_insn = find_insn(file, table_reloc->sym->sec, reloc_addend(table_reloc)); if (!dest_insn || !insn_func(dest_insn) || insn_func(dest_insn)->pfunc != func) continue; - return table_reloc; + orig_insn->_jump_table = table_reloc; + orig_insn->_jump_table_size = table_size; + break; } - - return NULL; } /* @@ -2042,7 +2054,6 @@ static void mark_func_jump_tables(struct objtool_file *file, struct symbol *func) { struct instruction *insn, *last = NULL; - struct reloc *reloc; func_for_each_insn(file, func, insn) { if (!last) @@ -2065,9 +2076,7 @@ static void mark_func_jump_tables(struct objtool_file *file, if (insn->type != INSN_JUMP_DYNAMIC) continue; - reloc = find_jump_table(file, func, insn); - if (reloc) - insn->_jump_table = reloc; + find_jump_table(file, func, insn); } } diff --git a/tools/objtool/include/objtool/check.h b/tools/objtool/include/objtool/check.h index daa46f1f0965..e1cd13cd28a3 100644 --- a/tools/objtool/include/objtool/check.h +++ b/tools/objtool/include/objtool/check.h @@ -71,7 +71,10 @@ struct instruction { struct instruction *first_jump_src; union { struct symbol *_call_dest; - struct reloc *_jump_table; + struct { + struct reloc *_jump_table; + unsigned long _jump_table_size; + }; }; struct alternative *alts; struct symbol *sym; diff --git a/tools/objtool/include/objtool/special.h b/tools/objtool/include/objtool/special.h index 86d4af9c5aa9..e7ee7ffccefd 100644 --- a/tools/objtool/include/objtool/special.h +++ b/tools/objtool/include/objtool/special.h @@ -38,5 +38,6 @@ bool arch_support_alt_relocation(struct special_alt *special_alt, struct instruction *insn, struct reloc *reloc); struct reloc *arch_find_switch_table(struct objtool_file *file, - struct instruction *insn); + struct instruction *insn, + unsigned long *table_size); #endif /* _SPECIAL_H */ -- cgit v1.2.3 From 2fe34a116c707821c99bb352cb33be277c99d491 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexis=20Lothor=C3=A9=20=28eBPF=20Foundation=29?= Date: Wed, 20 Nov 2024 08:43:11 +0100 Subject: selftests/bpf: add a macro to compare raw memory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We sometimes need to compare whole structures in an assert. It is possible to use the existing macros on each field, but when the whole structure has to be checked, it is more convenient to simply compare the whole structure memory Add a dedicated assert macro, ASSERT_MEMEQ, to allow bare memory comparision The output generated by this new macro looks like the following: [...] run_tests_skb_less:FAIL:returned flow keys unexpected memory mismatch actual: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 expected: 0E 00 3E 00 DD 86 01 01 00 06 86 DD 50 00 90 1F 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 01 00 00 00 00 00 00 00 [...] Acked-by: Stanislav Fomichev Signed-off-by: Alexis Lothoré (eBPF Foundation) Link: https://lore.kernel.org/r/20241120-flow_dissector-v3-1-45b46494f937@bootlin.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/test_progs.c | 15 +++++++++++++++ tools/testing/selftests/bpf/test_progs.h | 15 +++++++++++++++ 2 files changed, 30 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 6088d8222d59..c9e745d49493 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -1282,6 +1282,21 @@ void crash_handler(int signum) backtrace_symbols_fd(bt, sz, STDERR_FILENO); } +void hexdump(const char *prefix, const void *buf, size_t len) +{ + for (int i = 0; i < len; i++) { + if (!(i % 16)) { + if (i) + fprintf(stdout, "\n"); + fprintf(stdout, "%s", prefix); + } + if (i && !(i % 8) && (i % 16)) + fprintf(stdout, "\t"); + fprintf(stdout, "%02X ", ((uint8_t *)(buf))[i]); + } + fprintf(stdout, "\n"); +} + static void sigint_handler(int signum) { int i; diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index 74de33ae37e5..404d0d4915d5 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -185,6 +185,7 @@ void test__end_subtest(void); void test__skip(void); void test__fail(void); int test__join_cgroup(const char *path); +void hexdump(const char *prefix, const void *buf, size_t len); #define PRINT_FAIL(format...) \ ({ \ @@ -344,6 +345,20 @@ int test__join_cgroup(const char *path); ___ok; \ }) +#define ASSERT_MEMEQ(actual, expected, len, name) ({ \ + static int duration = 0; \ + const void *__act = actual; \ + const void *__exp = expected; \ + int __len = len; \ + bool ___ok = memcmp(__act, __exp, __len) == 0; \ + CHECK(!___ok, (name), "unexpected memory mismatch\n"); \ + fprintf(stdout, "actual:\n"); \ + hexdump("\t", __act, __len); \ + fprintf(stdout, "expected:\n"); \ + hexdump("\t", __exp, __len); \ + ___ok; \ +}) + #define ASSERT_OK(res, name) ({ \ static int duration = 0; \ long long ___res = (res); \ -- cgit v1.2.3 From 3fed5d084fb36365ccf06b6fe20d19e0e672a47d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexis=20Lothor=C3=A9=20=28eBPF=20Foundation=29?= Date: Wed, 20 Nov 2024 08:43:12 +0100 Subject: selftests/bpf: use ASSERT_MEMEQ to compare bpf flow keys MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The flow_dissector program currently compares flow keys returned by bpf program with the expected one thanks to a custom macro using memcmp. Use the new ASSERT_MEMEQ macro to perform this comparision. This update also allows to get rid of the unused bpf_test_run_opts variable in run_tests_skb_less (it was only used by the CHECK macro for its duration field) Acked-by: Stanislav Fomichev Signed-off-by: Alexis Lothoré (eBPF Foundation) Link: https://lore.kernel.org/r/20241120-flow_dissector-v3-2-45b46494f937@bootlin.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/flow_dissector.c | 36 ++++------------------ 1 file changed, 6 insertions(+), 30 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c index cfcc90cb7ffb..3ea25ecdf3c9 100644 --- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c @@ -13,33 +13,6 @@ #define IP_MF 0x2000 #endif -#define CHECK_FLOW_KEYS(desc, got, expected) \ - _CHECK(memcmp(&got, &expected, sizeof(got)) != 0, \ - desc, \ - topts.duration, \ - "nhoff=%u/%u " \ - "thoff=%u/%u " \ - "addr_proto=0x%x/0x%x " \ - "is_frag=%u/%u " \ - "is_first_frag=%u/%u " \ - "is_encap=%u/%u " \ - "ip_proto=0x%x/0x%x " \ - "n_proto=0x%x/0x%x " \ - "flow_label=0x%x/0x%x " \ - "sport=%u/%u " \ - "dport=%u/%u\n", \ - got.nhoff, expected.nhoff, \ - got.thoff, expected.thoff, \ - got.addr_proto, expected.addr_proto, \ - got.is_frag, expected.is_frag, \ - got.is_first_frag, expected.is_first_frag, \ - got.is_encap, expected.is_encap, \ - got.ip_proto, expected.ip_proto, \ - got.n_proto, expected.n_proto, \ - got.flow_label, expected.flow_label, \ - got.sport, expected.sport, \ - got.dport, expected.dport) - struct ipv4_pkt { struct ethhdr eth; struct iphdr iph; @@ -545,7 +518,6 @@ static void run_tests_skb_less(int tap_fd, struct bpf_map *keys) /* Keep in sync with 'flags' from eth_get_headlen. */ __u32 eth_get_headlen_flags = BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG; - LIBBPF_OPTS(bpf_test_run_opts, topts); struct bpf_flow_keys flow_keys = {}; __u32 key = (__u32)(tests[i].keys.sport) << 16 | tests[i].keys.dport; @@ -567,7 +539,9 @@ static void run_tests_skb_less(int tap_fd, struct bpf_map *keys) err = bpf_map_lookup_elem(keys_fd, &key, &flow_keys); ASSERT_OK(err, "bpf_map_lookup_elem"); - CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys); + ASSERT_MEMEQ(&flow_keys, &tests[i].keys, + sizeof(struct bpf_flow_keys), + "returned flow keys"); err = bpf_map_delete_elem(keys_fd, &key); ASSERT_OK(err, "bpf_map_delete_elem"); @@ -656,7 +630,9 @@ void test_flow_dissector(void) continue; ASSERT_EQ(topts.data_size_out, sizeof(flow_keys), "test_run data_size_out"); - CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys); + ASSERT_MEMEQ(&flow_keys, &tests[i].keys, + sizeof(struct bpf_flow_keys), + "returned flow keys"); } /* Do the same tests but for skb-less flow dissector. -- cgit v1.2.3 From 28494d6a277ebe5b9a5b1313b1f1ee396b890e35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexis=20Lothor=C3=A9=20=28eBPF=20Foundation=29?= Date: Wed, 20 Nov 2024 08:43:13 +0100 Subject: selftests/bpf: replace CHECK calls with ASSERT macros in flow_dissector test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The flow dissector test currently relies on generic CHECK macros to perform tests. Update those to newer, more-specific ASSERT macros. This update allows to get rid of the global duration variable, which was needed by the CHECK macros Acked-by: Stanislav Fomichev Signed-off-by: Alexis Lothoré (eBPF Foundation) Link: https://lore.kernel.org/r/20241120-flow_dissector-v3-3-45b46494f937@bootlin.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/flow_dissector.c | 41 +++++++++++----------- 1 file changed, 21 insertions(+), 20 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c index 3ea25ecdf3c9..6fbe8b6dad56 100644 --- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c @@ -79,7 +79,6 @@ struct test { #define VLAN_HLEN 4 -static __u32 duration; struct test tests[] = { { .name = "ipv4", @@ -511,7 +510,7 @@ static void run_tests_skb_less(int tap_fd, struct bpf_map *keys) int i, err, keys_fd; keys_fd = bpf_map__fd(keys); - if (CHECK(keys_fd < 0, "bpf_map__fd", "err %d\n", keys_fd)) + if (!ASSERT_OK_FD(keys_fd, "bpf_map__fd")) return; for (i = 0; i < ARRAY_SIZE(tests); i++) { @@ -530,14 +529,16 @@ static void run_tests_skb_less(int tap_fd, struct bpf_map *keys) continue; err = tx_tap(tap_fd, &tests[i].pkt, sizeof(tests[i].pkt)); - CHECK(err < 0, "tx_tap", "err %d errno %d\n", err, errno); + if (!ASSERT_EQ(err, sizeof(tests[i].pkt), "tx_tap")) + continue; /* check the stored flow_keys only if BPF_OK expected */ if (tests[i].retval != BPF_OK) continue; err = bpf_map_lookup_elem(keys_fd, &key, &flow_keys); - ASSERT_OK(err, "bpf_map_lookup_elem"); + if (!ASSERT_OK(err, "bpf_map_lookup_elem")) + continue; ASSERT_MEMEQ(&flow_keys, &tests[i].keys, sizeof(struct bpf_flow_keys), @@ -553,17 +554,17 @@ static void test_skb_less_prog_attach(struct bpf_flow *skel, int tap_fd) int err, prog_fd; prog_fd = bpf_program__fd(skel->progs._dissect); - if (CHECK(prog_fd < 0, "bpf_program__fd", "err %d\n", prog_fd)) + if (!ASSERT_OK_FD(prog_fd, "bpf_program__fd")) return; err = bpf_prog_attach(prog_fd, 0, BPF_FLOW_DISSECTOR, 0); - if (CHECK(err, "bpf_prog_attach", "err %d errno %d\n", err, errno)) + if (!ASSERT_OK(err, "bpf_prog_attach")) return; run_tests_skb_less(tap_fd, skel->maps.last_dissection); err = bpf_prog_detach2(prog_fd, 0, BPF_FLOW_DISSECTOR); - CHECK(err, "bpf_prog_detach2", "err %d errno %d\n", err, errno); + ASSERT_OK(err, "bpf_prog_detach2"); } static void test_skb_less_link_create(struct bpf_flow *skel, int tap_fd) @@ -572,7 +573,7 @@ static void test_skb_less_link_create(struct bpf_flow *skel, int tap_fd) int err, net_fd; net_fd = open("/proc/self/ns/net", O_RDONLY); - if (CHECK(net_fd < 0, "open(/proc/self/ns/net)", "err %d\n", errno)) + if (!ASSERT_OK_FD(net_fd, "open(/proc/self/ns/net")) return; link = bpf_program__attach_netns(skel->progs._dissect, net_fd); @@ -582,7 +583,7 @@ static void test_skb_less_link_create(struct bpf_flow *skel, int tap_fd) run_tests_skb_less(tap_fd, skel->maps.last_dissection); err = bpf_link__destroy(link); - CHECK(err, "bpf_link__destroy", "err %d\n", err); + ASSERT_OK(err, "bpf_link__destroy"); out_close: close(net_fd); } @@ -593,18 +594,18 @@ void test_flow_dissector(void) struct bpf_flow *skel; skel = bpf_flow__open_and_load(); - if (CHECK(!skel, "skel", "failed to open/load skeleton\n")) + if (!ASSERT_OK_PTR(skel, "open/load skeleton")) return; prog_fd = bpf_program__fd(skel->progs._dissect); - if (CHECK(prog_fd < 0, "bpf_program__fd", "err %d\n", prog_fd)) - goto out_destroy_skel; + if (!ASSERT_OK_FD(prog_fd, "bpf_program__fd")) + return; keys_fd = bpf_map__fd(skel->maps.last_dissection); - if (CHECK(keys_fd < 0, "bpf_map__fd", "err %d\n", keys_fd)) - goto out_destroy_skel; + if (!ASSERT_OK_FD(keys_fd, "bpf_map__fd")) + return; err = init_prog_array(skel->obj, skel->maps.jmp_table); - if (CHECK(err, "init_prog_array", "err %d\n", err)) - goto out_destroy_skel; + if (!ASSERT_OK(err, "init_prog_array")) + return; for (i = 0; i < ARRAY_SIZE(tests); i++) { struct bpf_flow_keys flow_keys; @@ -634,17 +635,17 @@ void test_flow_dissector(void) sizeof(struct bpf_flow_keys), "returned flow keys"); } - /* Do the same tests but for skb-less flow dissector. * We use a known path in the net/tun driver that calls * eth_get_headlen and we manually export bpf_flow_keys * via BPF map in this case. */ - tap_fd = create_tap("tap0"); - CHECK(tap_fd < 0, "create_tap", "tap_fd %d errno %d\n", tap_fd, errno); + if (!ASSERT_OK_FD(tap_fd, "create_tap")) + goto out_destroy_skel; err = ifup("tap0"); - CHECK(err, "ifup", "err %d errno %d\n", err, errno); + if (!ASSERT_OK(err, "ifup")) + goto out_destroy_skel; /* Test direct prog attachment */ test_skb_less_prog_attach(skel, tap_fd); -- cgit v1.2.3 From 2b044dd186f0f378894f1e590d62325cbbf9b085 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexis=20Lothor=C3=A9=20=28eBPF=20Foundation=29?= Date: Wed, 20 Nov 2024 08:43:14 +0100 Subject: selftests/bpf: re-split main function into dedicated tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The flow_dissector runs plenty of tests over diffent kind of packets, grouped into three categories: skb mode, non-skb mode with direct attach, and non-skb with indirect attach. Re-split the main function into dedicated tests. Each test now must have its own setup/teardown, but for the advantage of being able to run them separately. While at it, make sure that tests attaching the bpf programs are run in a dedicated ns. Acked-by: Stanislav Fomichev Signed-off-by: Alexis Lothoré (eBPF Foundation) Link: https://lore.kernel.org/r/20241120-flow_dissector-v3-4-45b46494f937@bootlin.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/flow_dissector.c | 108 ++++++++++++++------- 1 file changed, 73 insertions(+), 35 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c index 6fbe8b6dad56..7e7051a85be7 100644 --- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c @@ -549,63 +549,117 @@ static void run_tests_skb_less(int tap_fd, struct bpf_map *keys) } } -static void test_skb_less_prog_attach(struct bpf_flow *skel, int tap_fd) +void test_flow_dissector_skb_less_direct_attach(void) { - int err, prog_fd; + int err, prog_fd, tap_fd; + struct bpf_flow *skel; + struct netns_obj *ns; + + ns = netns_new("flow_dissector_skb_less_indirect_attach_ns", true); + if (!ASSERT_OK_PTR(ns, "create and open netns")) + return; + + skel = bpf_flow__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open/load skeleton")) + goto out_clean_ns; + + err = init_prog_array(skel->obj, skel->maps.jmp_table); + if (!ASSERT_OK(err, "init_prog_array")) + goto out_destroy_skel; prog_fd = bpf_program__fd(skel->progs._dissect); if (!ASSERT_OK_FD(prog_fd, "bpf_program__fd")) - return; + goto out_destroy_skel; err = bpf_prog_attach(prog_fd, 0, BPF_FLOW_DISSECTOR, 0); if (!ASSERT_OK(err, "bpf_prog_attach")) - return; + goto out_destroy_skel; + + tap_fd = create_tap("tap0"); + if (!ASSERT_OK_FD(tap_fd, "create_tap")) + goto out_destroy_skel; + err = ifup("tap0"); + if (!ASSERT_OK(err, "ifup")) + goto out_close_tap; run_tests_skb_less(tap_fd, skel->maps.last_dissection); err = bpf_prog_detach2(prog_fd, 0, BPF_FLOW_DISSECTOR); ASSERT_OK(err, "bpf_prog_detach2"); + +out_close_tap: + close(tap_fd); +out_destroy_skel: + bpf_flow__destroy(skel); +out_clean_ns: + netns_free(ns); } -static void test_skb_less_link_create(struct bpf_flow *skel, int tap_fd) +void test_flow_dissector_skb_less_indirect_attach(void) { + int err, net_fd, tap_fd; + struct bpf_flow *skel; struct bpf_link *link; - int err, net_fd; + struct netns_obj *ns; + + ns = netns_new("flow_dissector_skb_less_indirect_attach_ns", true); + if (!ASSERT_OK_PTR(ns, "create and open netns")) + return; + + skel = bpf_flow__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open/load skeleton")) + goto out_clean_ns; net_fd = open("/proc/self/ns/net", O_RDONLY); if (!ASSERT_OK_FD(net_fd, "open(/proc/self/ns/net")) - return; + goto out_destroy_skel; + + err = init_prog_array(skel->obj, skel->maps.jmp_table); + if (!ASSERT_OK(err, "init_prog_array")) + goto out_destroy_skel; + + tap_fd = create_tap("tap0"); + if (!ASSERT_OK_FD(tap_fd, "create_tap")) + goto out_close_ns; + err = ifup("tap0"); + if (!ASSERT_OK(err, "ifup")) + goto out_close_tap; link = bpf_program__attach_netns(skel->progs._dissect, net_fd); if (!ASSERT_OK_PTR(link, "attach_netns")) - goto out_close; + goto out_close_tap; run_tests_skb_less(tap_fd, skel->maps.last_dissection); err = bpf_link__destroy(link); ASSERT_OK(err, "bpf_link__destroy"); -out_close: + +out_close_tap: + close(tap_fd); +out_close_ns: close(net_fd); +out_destroy_skel: + bpf_flow__destroy(skel); +out_clean_ns: + netns_free(ns); } -void test_flow_dissector(void) +void test_flow_dissector_skb(void) { - int i, err, prog_fd, keys_fd = -1, tap_fd; struct bpf_flow *skel; + int i, err, prog_fd; skel = bpf_flow__open_and_load(); if (!ASSERT_OK_PTR(skel, "open/load skeleton")) return; - prog_fd = bpf_program__fd(skel->progs._dissect); - if (!ASSERT_OK_FD(prog_fd, "bpf_program__fd")) - return; - keys_fd = bpf_map__fd(skel->maps.last_dissection); - if (!ASSERT_OK_FD(keys_fd, "bpf_map__fd")) - return; err = init_prog_array(skel->obj, skel->maps.jmp_table); if (!ASSERT_OK(err, "init_prog_array")) - return; + goto out_destroy_skel; + + prog_fd = bpf_program__fd(skel->progs._dissect); + if (!ASSERT_OK_FD(prog_fd, "bpf_program__fd")) + goto out_destroy_skel; for (i = 0; i < ARRAY_SIZE(tests); i++) { struct bpf_flow_keys flow_keys; @@ -635,24 +689,8 @@ void test_flow_dissector(void) sizeof(struct bpf_flow_keys), "returned flow keys"); } - /* Do the same tests but for skb-less flow dissector. - * We use a known path in the net/tun driver that calls - * eth_get_headlen and we manually export bpf_flow_keys - * via BPF map in this case. - */ - tap_fd = create_tap("tap0"); - if (!ASSERT_OK_FD(tap_fd, "create_tap")) - goto out_destroy_skel; - err = ifup("tap0"); - if (!ASSERT_OK(err, "ifup")) - goto out_destroy_skel; - - /* Test direct prog attachment */ - test_skb_less_prog_attach(skel, tap_fd); - /* Test indirect prog attachment via link */ - test_skb_less_link_create(skel, tap_fd); - close(tap_fd); out_destroy_skel: bpf_flow__destroy(skel); } + -- cgit v1.2.3 From a2cc66bb937a68d22b5acf7d08f7c8cb5fa859be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexis=20Lothor=C3=A9=20=28eBPF=20Foundation=29?= Date: Wed, 20 Nov 2024 08:43:15 +0100 Subject: selftests/bpf: expose all subtests from flow_dissector MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The flow_dissector test integrated in test_progs actually runs a wide matrix of tests over different packets types and bpf programs modes, but exposes only 3 main tests, preventing tests users from running specific subtests with a specific input only. Expose all subtests executed by flow_dissector by using test__start_subtest(). Acked-by: Stanislav Fomichev Signed-off-by: Alexis Lothoré (eBPF Foundation) Link: https://lore.kernel.org/r/20241120-flow_dissector-v3-5-45b46494f937@bootlin.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/flow_dissector.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c index 7e7051a85be7..29182009cda9 100644 --- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c @@ -8,6 +8,7 @@ #include "bpf_flow.skel.h" #define FLOW_CONTINUE_SADDR 0x7f00007f /* 127.0.0.127 */ +#define TEST_NAME_MAX_LEN 64 #ifndef IP_MF #define IP_MF 0x2000 @@ -505,8 +506,10 @@ static int init_prog_array(struct bpf_object *obj, struct bpf_map *prog_array) return 0; } -static void run_tests_skb_less(int tap_fd, struct bpf_map *keys) +static void run_tests_skb_less(int tap_fd, struct bpf_map *keys, + char *test_suffix) { + char test_name[TEST_NAME_MAX_LEN]; int i, err, keys_fd; keys_fd = bpf_map__fd(keys); @@ -520,6 +523,10 @@ static void run_tests_skb_less(int tap_fd, struct bpf_map *keys) struct bpf_flow_keys flow_keys = {}; __u32 key = (__u32)(tests[i].keys.sport) << 16 | tests[i].keys.dport; + snprintf(test_name, TEST_NAME_MAX_LEN, "%s-%s", tests[i].name, + test_suffix); + if (!test__start_subtest(test_name)) + continue; /* For skb-less case we can't pass input flags; run * only the tests that have a matching set of flags. @@ -582,7 +589,8 @@ void test_flow_dissector_skb_less_direct_attach(void) if (!ASSERT_OK(err, "ifup")) goto out_close_tap; - run_tests_skb_less(tap_fd, skel->maps.last_dissection); + run_tests_skb_less(tap_fd, skel->maps.last_dissection, + "non-skb-direct-attach"); err = bpf_prog_detach2(prog_fd, 0, BPF_FLOW_DISSECTOR); ASSERT_OK(err, "bpf_prog_detach2"); @@ -629,7 +637,8 @@ void test_flow_dissector_skb_less_indirect_attach(void) if (!ASSERT_OK_PTR(link, "attach_netns")) goto out_close_tap; - run_tests_skb_less(tap_fd, skel->maps.last_dissection); + run_tests_skb_less(tap_fd, skel->maps.last_dissection, + "non-skb-indirect-attach"); err = bpf_link__destroy(link); ASSERT_OK(err, "bpf_link__destroy"); @@ -646,6 +655,7 @@ out_clean_ns: void test_flow_dissector_skb(void) { + char test_name[TEST_NAME_MAX_LEN]; struct bpf_flow *skel; int i, err, prog_fd; @@ -670,6 +680,10 @@ void test_flow_dissector_skb(void) ); static struct bpf_flow_keys ctx = {}; + snprintf(test_name, TEST_NAME_MAX_LEN, "%s-skb", tests[i].name); + if (!test__start_subtest(test_name)) + continue; + if (tests[i].flags) { topts.ctx_in = &ctx; topts.ctx_size_in = sizeof(ctx); -- cgit v1.2.3 From b4940402675004a7e2a14c83824d5c15e3e80f9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexis=20Lothor=C3=A9=20=28eBPF=20Foundation=29?= Date: Wed, 20 Nov 2024 08:43:16 +0100 Subject: selftests/bpf: add gre packets testing to flow_dissector MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The bpf_flow program is able to handle GRE headers in IP packets. Add a few test data input simulating those GRE packets, with 2 different cases: - parse GRE and the encapsulated packet - parse GRE only Acked-by: Stanislav Fomichev Signed-off-by: Alexis Lothoré (eBPF Foundation) Link: https://lore.kernel.org/r/20241120-flow_dissector-v3-6-45b46494f937@bootlin.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/flow_dissector.c | 76 ++++++++++++++++++++++ 1 file changed, 76 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c index 29182009cda9..1e17254376ec 100644 --- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c @@ -63,6 +63,19 @@ struct dvlan_ipv6_pkt { struct tcphdr tcp; } __packed; +struct gre_base_hdr { + __be16 flags; + __be16 protocol; +} gre_base_hdr; + +struct gre_minimal_pkt { + struct ethhdr eth; + struct iphdr iph; + struct gre_base_hdr gre_hdr; + struct iphdr iph_inner; + struct tcphdr tcp; +} __packed; + struct test { const char *name; union { @@ -72,6 +85,7 @@ struct test { struct ipv6_pkt ipv6; struct ipv6_frag_pkt ipv6_frag; struct dvlan_ipv6_pkt dvlan_ipv6; + struct gre_minimal_pkt gre_minimal; } pkt; struct bpf_flow_keys keys; __u32 flags; @@ -417,6 +431,68 @@ struct test tests[] = { }, .retval = BPF_FLOW_DISSECTOR_CONTINUE, }, + { + .name = "ip-gre", + .pkt.gre_minimal = { + .eth.h_proto = __bpf_constant_htons(ETH_P_IP), + .iph.ihl = 5, + .iph.protocol = IPPROTO_GRE, + .iph.tot_len = __bpf_constant_htons(MAGIC_BYTES), + .gre_hdr = { + .flags = 0, + .protocol = __bpf_constant_htons(ETH_P_IP), + }, + .iph_inner.ihl = 5, + .iph_inner.protocol = IPPROTO_TCP, + .iph_inner.tot_len = + __bpf_constant_htons(MAGIC_BYTES - + sizeof(struct iphdr)), + .tcp.doff = 5, + .tcp.source = 80, + .tcp.dest = 8080, + }, + .keys = { + .nhoff = ETH_HLEN, + .thoff = ETH_HLEN + sizeof(struct iphdr) * 2 + + sizeof(struct gre_base_hdr), + .addr_proto = ETH_P_IP, + .ip_proto = IPPROTO_TCP, + .n_proto = __bpf_constant_htons(ETH_P_IP), + .is_encap = true, + .sport = 80, + .dport = 8080, + }, + .retval = BPF_OK, + }, + { + .name = "ip-gre-no-encap", + .pkt.ipip = { + .eth.h_proto = __bpf_constant_htons(ETH_P_IP), + .iph.ihl = 5, + .iph.protocol = IPPROTO_GRE, + .iph.tot_len = __bpf_constant_htons(MAGIC_BYTES), + .iph_inner.ihl = 5, + .iph_inner.protocol = IPPROTO_TCP, + .iph_inner.tot_len = + __bpf_constant_htons(MAGIC_BYTES - + sizeof(struct iphdr)), + .tcp.doff = 5, + .tcp.source = 80, + .tcp.dest = 8080, + }, + .keys = { + .flags = BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP, + .nhoff = ETH_HLEN, + .thoff = ETH_HLEN + sizeof(struct iphdr) + + sizeof(struct gre_base_hdr), + .addr_proto = ETH_P_IP, + .ip_proto = IPPROTO_GRE, + .n_proto = __bpf_constant_htons(ETH_P_IP), + .is_encap = true, + }, + .flags = BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP, + .retval = BPF_OK, + }, }; static int create_tap(const char *ifname) -- cgit v1.2.3 From 6fb5be12d1bb66e8dce6238e4387f0db99efee25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexis=20Lothor=C3=A9=20=28eBPF=20Foundation=29?= Date: Wed, 20 Nov 2024 08:43:17 +0100 Subject: selftests/bpf: migrate flow_dissector namespace exclusivity test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit a11c397c43d5 ("bpf/flow_dissector: add mode to enforce global BPF flow dissector") is currently tested in test_flow_dissector.sh, which is not part of test_progs. Add the corresponding test to flow_dissector.c, which is part of test_progs. The new test reproduces the behavior implemented in its shell script counterpart: - attach a flow dissector program to the root net namespace, ensure that we can not attach another flow dissector in any non-root net namespace - attach a flow dissector program to a non-root net namespace, ensure that we can not attach another flow dissector in root namespace Since the new test is performing operations in the root net namespace, make sure to set it as a "serial" test to make sure not to conflict with any other test. Acked-by: Stanislav Fomichev Signed-off-by: Alexis Lothoré (eBPF Foundation) Link: https://lore.kernel.org/r/20241120-flow_dissector-v3-7-45b46494f937@bootlin.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/flow_dissector.c | 62 ++++++++++++++++++++++ 1 file changed, 62 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c index 1e17254376ec..8e6e483fead3 100644 --- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c @@ -7,6 +7,7 @@ #include "bpf_flow.skel.h" +#define TEST_NS "flow_dissector_ns" #define FLOW_CONTINUE_SADDR 0x7f00007f /* 127.0.0.127 */ #define TEST_NAME_MAX_LEN 64 @@ -495,6 +496,67 @@ struct test tests[] = { }, }; +void serial_test_flow_dissector_namespace(void) +{ + struct bpf_flow *skel; + struct nstoken *ns; + int err, prog_fd; + + skel = bpf_flow__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open/load skeleton")) + return; + + prog_fd = bpf_program__fd(skel->progs._dissect); + if (!ASSERT_OK_FD(prog_fd, "get dissector fd")) + goto out_destroy_skel; + + /* We must be able to attach a flow dissector to root namespace */ + err = bpf_prog_attach(prog_fd, 0, BPF_FLOW_DISSECTOR, 0); + if (!ASSERT_OK(err, "attach on root namespace ok")) + goto out_destroy_skel; + + err = make_netns(TEST_NS); + if (!ASSERT_OK(err, "create non-root net namespace")) + goto out_destroy_skel; + + /* We must not be able to additionally attach a flow dissector to a + * non-root net namespace + */ + ns = open_netns(TEST_NS); + if (!ASSERT_OK_PTR(ns, "enter non-root net namespace")) + goto out_clean_ns; + + err = bpf_prog_attach(prog_fd, 0, BPF_FLOW_DISSECTOR, 0); + close_netns(ns); + ASSERT_ERR(err, "refuse new flow dissector in non-root net namespace"); + ASSERT_EQ(errno, EEXIST, "refused because of already attached prog"); + + /* If no flow dissector is attached to the root namespace, we must + * be able to attach one to a non-root net namespace + */ + bpf_prog_detach2(prog_fd, 0, BPF_FLOW_DISSECTOR); + ns = open_netns(TEST_NS); + ASSERT_OK_PTR(ns, "enter non-root net namespace"); + err = bpf_prog_attach(prog_fd, 0, BPF_FLOW_DISSECTOR, 0); + close_netns(ns); + ASSERT_OK(err, "accept new flow dissector in non-root net namespace"); + + /* If a flow dissector is attached to non-root net namespace, attaching + * a flow dissector to root namespace must fail + */ + err = bpf_prog_attach(prog_fd, 0, BPF_FLOW_DISSECTOR, 0); + ASSERT_ERR(err, "refuse new flow dissector on root namespace"); + ASSERT_EQ(errno, EEXIST, "refused because of already attached prog"); + + ns = open_netns(TEST_NS); + bpf_prog_detach2(prog_fd, 0, BPF_FLOW_DISSECTOR); + close_netns(ns); +out_clean_ns: + remove_netns(TEST_NS); +out_destroy_skel: + bpf_flow__destroy(skel); +} + static int create_tap(const char *ifname) { struct ifreq ifr = { -- cgit v1.2.3 From c24010821a89954a93b39354c42596d315518c9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexis=20Lothor=C3=A9=20=28eBPF=20Foundation=29?= Date: Wed, 20 Nov 2024 08:43:18 +0100 Subject: selftests/bpf: Enable generic tc actions in selftests config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enable CONFIG_NET_ACT_GACT to allow adding simple actions with tc filters. This is for example needed to migrate test_flow_dissector into the automated testing performed in CI. Acked-by: Stanislav Fomichev Signed-off-by: Alexis Lothoré (eBPF Foundation) Link: https://lore.kernel.org/r/20241120-flow_dissector-v3-8-45b46494f937@bootlin.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/config | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 4ca84c8d9116..c378d5d07e02 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -58,6 +58,7 @@ CONFIG_MPLS=y CONFIG_MPLS_IPTUNNEL=y CONFIG_MPLS_ROUTING=y CONFIG_MPTCP=y +CONFIG_NET_ACT_GACT=y CONFIG_NET_ACT_SKBMOD=y CONFIG_NET_CLS=y CONFIG_NET_CLS_ACT=y -- cgit v1.2.3 From f4504af68575c1675235bacd8d36157cfc27ae5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexis=20Lothor=C3=A9=20=28eBPF=20Foundation=29?= Date: Wed, 20 Nov 2024 08:43:19 +0100 Subject: selftests/bpf: move ip checksum helper to network helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit xdp_metadata test has a small helper computing ipv4 checksums to allow manually building packets. Move this helper to network_helpers to share it with other tests. Signed-off-by: Alexis Lothoré (eBPF Foundation) Acked-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20241120-flow_dissector-v3-9-45b46494f937@bootlin.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/network_helpers.h | 24 ++++++++++++++++++++++ .../selftests/bpf/prog_tests/xdp_metadata.c | 19 +---------------- 2 files changed, 25 insertions(+), 18 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h index 5764155b6d25..0b2ed90c763f 100644 --- a/tools/testing/selftests/bpf/network_helpers.h +++ b/tools/testing/selftests/bpf/network_helpers.h @@ -105,6 +105,30 @@ static __u16 csum_fold(__u32 csum) return (__u16)~csum; } +static __wsum csum_partial(const void *buf, int len, __wsum sum) +{ + __u16 *p = (__u16 *)buf; + int num_u16 = len >> 1; + int i; + + for (i = 0; i < num_u16; i++) + sum += p[i]; + + return sum; +} + +static inline __sum16 build_ip_csum(struct iphdr *iph) +{ + __u32 sum = 0; + __u16 *p; + + iph->check = 0; + p = (void *)iph; + sum = csum_partial(p, iph->ihl << 2, 0); + + return csum_fold(sum); +} + static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __wsum csum) diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c index c87ee2bf558c..7f8e16165533 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c @@ -133,23 +133,6 @@ static void close_xsk(struct xsk *xsk) munmap(xsk->umem_area, UMEM_SIZE); } -static void ip_csum(struct iphdr *iph) -{ - __u32 sum = 0; - __u16 *p; - int i; - - iph->check = 0; - p = (void *)iph; - for (i = 0; i < sizeof(*iph) / sizeof(*p); i++) - sum += p[i]; - - while (sum >> 16) - sum = (sum & 0xffff) + (sum >> 16); - - iph->check = ~sum; -} - static int generate_packet(struct xsk *xsk, __u16 dst_port) { struct xsk_tx_metadata *meta; @@ -192,7 +175,7 @@ static int generate_packet(struct xsk *xsk, __u16 dst_port) iph->protocol = IPPROTO_UDP; ASSERT_EQ(inet_pton(FAMILY, TX_ADDR, &iph->saddr), 1, "inet_pton(TX_ADDR)"); ASSERT_EQ(inet_pton(FAMILY, RX_ADDR, &iph->daddr), 1, "inet_pton(RX_ADDR)"); - ip_csum(iph); + iph->check = build_ip_csum(iph); udph->source = htons(UDP_SOURCE_PORT); udph->dest = htons(dst_port); -- cgit v1.2.3 From 752fddc0501c540214875b26bde2538f0a831811 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexis=20Lothor=C3=A9=20=28eBPF=20Foundation=29?= Date: Wed, 20 Nov 2024 08:43:20 +0100 Subject: selftests/bpf: document pseudo-header checksum helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit network_helpers.h provides helpers to compute checksum for pseudo headers but no helpers to compute the global checksums. Before adding those, clarify csum_tcpudp_magic and csum_ipv6_magic purpose by adding some documentation. Signed-off-by: Alexis Lothoré (eBPF Foundation) Acked-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20241120-flow_dissector-v3-10-45b46494f937@bootlin.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/network_helpers.h | 30 +++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h index 0b2ed90c763f..00d6e7d52545 100644 --- a/tools/testing/selftests/bpf/network_helpers.h +++ b/tools/testing/selftests/bpf/network_helpers.h @@ -129,6 +129,21 @@ static inline __sum16 build_ip_csum(struct iphdr *iph) return csum_fold(sum); } +/** + * csum_tcpudp_magic - compute IP pseudo-header checksum + * + * Compute the IPv4 pseudo header checksum. The helper can take a + * accumulated sum from the transport layer to accumulate it and directly + * return the transport layer + * + * @saddr: IP source address + * @daddr: IP dest address + * @len: IP data size + * @proto: transport layer protocol + * @csum: The accumulated partial sum to add to the computation + * + * Returns the folded sum + */ static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __wsum csum) @@ -144,6 +159,21 @@ static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, return csum_fold((__u32)s); } +/** + * csum_ipv6_magic - compute IPv6 pseudo-header checksum + * + * Compute the ipv6 pseudo header checksum. The helper can take a + * accumulated sum from the transport layer to accumulate it and directly + * return the transport layer + * + * @saddr: IPv6 source address + * @daddr: IPv6 dest address + * @len: IPv6 data size + * @proto: transport layer protocol + * @csum: The accumulated partial sum to add to the computation + * + * Returns the folded sum + */ static inline __sum16 csum_ipv6_magic(const struct in6_addr *saddr, const struct in6_addr *daddr, __u32 len, __u8 proto, -- cgit v1.2.3 From a2f482c34a52176ae89d143979bbc9e7a72857c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexis=20Lothor=C3=A9=20=28eBPF=20Foundation=29?= Date: Wed, 20 Nov 2024 08:43:21 +0100 Subject: selftests/bpf: use the same udp and tcp headers in tests under test_progs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Trying to add udp-dedicated helpers in network_helpers involves including some udp header, which makes multiple test_progs tests build fail: In file included from ./progs/test_cls_redirect.h:13, from [...]/prog_tests/cls_redirect.c:15: [...]/usr/include/linux/udp.h:23:8: error: redefinition of ‘struct udphdr’ 23 | struct udphdr { | ^~~~~~ In file included from ./network_helpers.h:17, from [...]/prog_tests/cls_redirect.c:13: [...]/usr/include/netinet/udp.h:55:8: note: originally defined here 55 | struct udphdr | ^~~~~~ This error is due to struct udphdr being defined in both and . Use only in every test. While at it, perform the same for tcp.h. For some tests, the change needs to be done in the eBPF program part as well, because of some headers sharing between both sides. Signed-off-by: Alexis Lothoré (eBPF Foundation) Acked-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20241120-flow_dissector-v3-11-45b46494f937@bootlin.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/network_helpers.c | 2 +- tools/testing/selftests/bpf/prog_tests/sockopt_sk.c | 2 +- tools/testing/selftests/bpf/prog_tests/xdp_bonding.c | 2 +- tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c | 2 +- tools/testing/selftests/bpf/prog_tests/xdp_flowtable.c | 2 +- tools/testing/selftests/bpf/prog_tests/xdp_metadata.c | 2 +- tools/testing/selftests/bpf/progs/test_cls_redirect.c | 2 +- tools/testing/selftests/bpf/progs/test_cls_redirect.h | 2 +- tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c | 2 +- tools/testing/selftests/bpf/xdp_hw_metadata.c | 2 +- 10 files changed, 10 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c index 27784946b01b..80844a5fb1fe 100644 --- a/tools/testing/selftests/bpf/network_helpers.c +++ b/tools/testing/selftests/bpf/network_helpers.c @@ -21,7 +21,7 @@ #include #include -#include +#include #include #include diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c index 05d0e07da394..ba6b3ec1156a 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c +++ b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c @@ -2,7 +2,7 @@ #include #include "cgroup_helpers.h" -#include +#include #include #include "sockopt_sk.skel.h" diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c index 6d8b54124cb3..fb952703653e 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c @@ -17,7 +17,7 @@ #include "network_helpers.h" #include #include -#include +#include #include #include "xdp_dummy.skel.h" diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c index bad0ea167be7..d12f926b4b8b 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include "test_xdp_do_redirect.skel.h" diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_flowtable.c b/tools/testing/selftests/bpf/prog_tests/xdp_flowtable.c index e1bf141d3401..3f9146d83d79 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_flowtable.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_flowtable.c @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include #include diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c index 7f8e16165533..3d47878ef6bf 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect.c b/tools/testing/selftests/bpf/progs/test_cls_redirect.c index 683c8aaa63da..f344c6835e84 100644 --- a/tools/testing/selftests/bpf/progs/test_cls_redirect.c +++ b/tools/testing/selftests/bpf/progs/test_cls_redirect.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect.h b/tools/testing/selftests/bpf/progs/test_cls_redirect.h index 233b089d1fba..eb55cb8a3dbd 100644 --- a/tools/testing/selftests/bpf/progs/test_cls_redirect.h +++ b/tools/testing/selftests/bpf/progs/test_cls_redirect.h @@ -10,7 +10,7 @@ #include #include #include -#include +#include /* offsetof() is used in static asserts, and the libbpf-redefined CO-RE * friendly version breaks compilation for older clang versions <= 15 diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c b/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c index 464515b824b9..d0f7670351e5 100644 --- a/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c +++ b/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c index 6f9956eed797..06266aad2f99 100644 --- a/tools/testing/selftests/bpf/xdp_hw_metadata.c +++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c @@ -27,7 +27,7 @@ #include #include #include -#include +#include #include #include #include -- cgit v1.2.3 From bcc00987bc56faa3d0e20762ececefc831506846 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexis=20Lothor=C3=A9=20=28eBPF=20Foundation=29?= Date: Wed, 20 Nov 2024 08:43:22 +0100 Subject: selftests/bpf: add network helpers to generate udp checksums MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit network_helpers.c provides some helpers to generate ip checksums or ip pseudo-header checksums, but not for upper layers (eg: udp checksums) Add helpers for udp checksum to allow manually building udp packets. Signed-off-by: Alexis Lothoré (eBPF Foundation) Acked-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20241120-flow_dissector-v3-12-45b46494f937@bootlin.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/network_helpers.h | 42 +++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h index 00d6e7d52545..ebec8a8d6f81 100644 --- a/tools/testing/selftests/bpf/network_helpers.h +++ b/tools/testing/selftests/bpf/network_helpers.h @@ -14,6 +14,7 @@ typedef __u16 __sum16; #include #include #include +#include #include #include @@ -193,6 +194,47 @@ static inline __sum16 csum_ipv6_magic(const struct in6_addr *saddr, return csum_fold((__u32)s); } +/** + * build_udp_v4_csum - compute UDP checksum for UDP over IPv4 + * + * Compute the checksum to embed in UDP header, composed of the sum of IP + * pseudo-header checksum, UDP header checksum and UDP data checksum + * @iph IP header + * @udph UDP header, which must be immediately followed by UDP data + * + * Returns the total checksum + */ + +static inline __sum16 build_udp_v4_csum(const struct iphdr *iph, + const struct udphdr *udph) +{ + unsigned long sum; + + sum = csum_partial(udph, ntohs(udph->len), 0); + return csum_tcpudp_magic(iph->saddr, iph->daddr, ntohs(udph->len), + IPPROTO_UDP, sum); +} + +/** + * build_udp_v6_csum - compute UDP checksum for UDP over IPv6 + * + * Compute the checksum to embed in UDP header, composed of the sum of IPv6 + * pseudo-header checksum, UDP header checksum and UDP data checksum + * @ip6h IPv6 header + * @udph UDP header, which must be immediately followed by UDP data + * + * Returns the total checksum + */ +static inline __sum16 build_udp_v6_csum(const struct ipv6hdr *ip6h, + const struct udphdr *udph) +{ + unsigned long sum; + + sum = csum_partial(udph, ntohs(udph->len), 0); + return csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, ntohs(udph->len), + IPPROTO_UDP, sum); +} + struct tmonitor_ctx; #ifdef TRAFFIC_MONITOR -- cgit v1.2.3 From 20203a51e3940abb1639d41bbcfe995f6463bbd0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexis=20Lothor=C3=A9=20=28eBPF=20Foundation=29?= Date: Wed, 20 Nov 2024 08:43:23 +0100 Subject: selftests/bpf: migrate bpf flow dissectors tests to test_progs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit test_flow_dissector.sh loads flow_dissector program and subprograms, creates and configured relevant tunnels and interfaces, and ensure that the bpf dissection is actually performed correctly. Similar tests exist in test_progs (thanks to flow_dissector.c) and run the same programs, but those are only executed with BPF_PROG_RUN: those tests are then missing some coverage (eg: coverage for flow keys manipulated when the configured flower uses a port range, which has a dedicated test in test_flow_dissector.sh) Convert test_flow_dissector.sh into test_progs so that the corresponding tests are also run in CI. Signed-off-by: Alexis Lothoré (eBPF Foundation) Acked-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20241120-flow_dissector-v3-13-45b46494f937@bootlin.com Signed-off-by: Alexei Starovoitov --- .../bpf/prog_tests/flow_dissector_classification.c | 792 +++++++++++++++++++++ 1 file changed, 792 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/flow_dissector_classification.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_classification.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_classification.c new file mode 100644 index 000000000000..3729fbfd3084 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_classification.c @@ -0,0 +1,792 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "test_progs.h" +#include "network_helpers.h" +#include "bpf_util.h" +#include "bpf_flow.skel.h" + +#define CFG_PORT_INNER 8000 +#define CFG_PORT_GUE 6080 +#define SUBTEST_NAME_MAX_LEN 32 +#define TEST_NAME_MAX_LEN (32 + SUBTEST_NAME_MAX_LEN) +#define MAX_SOURCE_PORTS 3 +#define TEST_PACKETS_COUNT 10 +#define TEST_PACKET_LEN 100 +#define TEST_PACKET_PATTERN 'a' +#define TEST_IPV4 "192.168.0.1/32" +#define TEST_IPV6 "100::a/128" +#define TEST_TUNNEL_REMOTE "127.0.0.2" +#define TEST_TUNNEL_LOCAL "127.0.0.1" + +#define INIT_ADDR4(addr4, port) \ + { \ + .sin_family = AF_INET, \ + .sin_port = __constant_htons(port), \ + .sin_addr.s_addr = __constant_htonl(addr4), \ + } + +#define INIT_ADDR6(addr6, port) \ + { \ + .sin6_family = AF_INET6, \ + .sin6_port = __constant_htons(port), \ + .sin6_addr = addr6, \ + } +#define TEST_IN4_SRC_ADDR_DEFAULT INIT_ADDR4(INADDR_LOOPBACK + 2, 0) +#define TEST_IN4_DST_ADDR_DEFAULT INIT_ADDR4(INADDR_LOOPBACK, CFG_PORT_INNER) +#define TEST_OUT4_SRC_ADDR_DEFAULT INIT_ADDR4(INADDR_LOOPBACK + 1, 0) +#define TEST_OUT4_DST_ADDR_DEFAULT INIT_ADDR4(INADDR_LOOPBACK, 0) + +#define TEST_IN6_SRC_ADDR_DEFAULT INIT_ADDR6(IN6ADDR_LOOPBACK_INIT, 0) +#define TEST_IN6_DST_ADDR_DEFAULT \ + INIT_ADDR6(IN6ADDR_LOOPBACK_INIT, CFG_PORT_INNER) +#define TEST_OUT6_SRC_ADDR_DEFAULT INIT_ADDR6(IN6ADDR_LOOPBACK_INIT, 0) +#define TEST_OUT6_DST_ADDR_DEFAULT INIT_ADDR6(IN6ADDR_LOOPBACK_INIT, 0) + +#define TEST_IN4_SRC_ADDR_DISSECT_CONTINUE INIT_ADDR4(INADDR_LOOPBACK + 126, 0) +#define TEST_IN4_SRC_ADDR_IPIP INIT_ADDR4((in_addr_t)0x01010101, 0) +#define TEST_IN4_DST_ADDR_IPIP INIT_ADDR4((in_addr_t)0xC0A80001, CFG_PORT_INNER) + +struct grehdr { + uint16_t unused; + uint16_t protocol; +} __packed; + +struct guehdr { + union { + struct { +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u8 hlen : 5, control : 1, version : 2; +#elif defined(__BIG_ENDIAN_BITFIELD) + __u8 version : 2, control : 1, hlen : 5; +#else +#error "Please fix " +#endif + __u8 proto_ctype; + __be16 flags; + }; + __be32 word; + }; +}; + +static char buf[ETH_DATA_LEN]; + +struct test_configuration { + char name[SUBTEST_NAME_MAX_LEN]; + int (*test_setup)(void); + void (*test_teardown)(void); + int source_ports[MAX_SOURCE_PORTS]; + int cfg_l3_inner; + struct sockaddr_in in_saddr4; + struct sockaddr_in in_daddr4; + struct sockaddr_in6 in_saddr6; + struct sockaddr_in6 in_daddr6; + int cfg_l3_outer; + struct sockaddr_in out_saddr4; + struct sockaddr_in out_daddr4; + struct sockaddr_in6 out_saddr6; + struct sockaddr_in6 out_daddr6; + int cfg_encap_proto; + uint8_t cfg_dsfield_inner; + uint8_t cfg_dsfield_outer; + int cfg_l3_extra; + struct sockaddr_in extra_saddr4; + struct sockaddr_in extra_daddr4; + struct sockaddr_in6 extra_saddr6; + struct sockaddr_in6 extra_daddr6; +}; + +static unsigned long util_gettime(void) +{ + struct timeval tv; + + gettimeofday(&tv, NULL); + return (tv.tv_sec * 1000) + (tv.tv_usec / 1000); +} + +static void build_ipv4_header(void *header, uint8_t proto, uint32_t src, + uint32_t dst, int payload_len, uint8_t tos) +{ + struct iphdr *iph = header; + + iph->ihl = 5; + iph->version = 4; + iph->tos = tos; + iph->ttl = 8; + iph->tot_len = htons(sizeof(*iph) + payload_len); + iph->id = htons(1337); + iph->protocol = proto; + iph->saddr = src; + iph->daddr = dst; + iph->check = build_ip_csum((void *)iph); +} + +static void ipv6_set_dsfield(struct ipv6hdr *ip6h, uint8_t dsfield) +{ + uint16_t val, *ptr = (uint16_t *)ip6h; + + val = ntohs(*ptr); + val &= 0xF00F; + val |= ((uint16_t)dsfield) << 4; + *ptr = htons(val); +} + +static void build_ipv6_header(void *header, uint8_t proto, + const struct sockaddr_in6 *src, + const struct sockaddr_in6 *dst, int payload_len, + uint8_t dsfield) +{ + struct ipv6hdr *ip6h = header; + + ip6h->version = 6; + ip6h->payload_len = htons(payload_len); + ip6h->nexthdr = proto; + ip6h->hop_limit = 8; + ipv6_set_dsfield(ip6h, dsfield); + + memcpy(&ip6h->saddr, &src->sin6_addr, sizeof(ip6h->saddr)); + memcpy(&ip6h->daddr, &dst->sin6_addr, sizeof(ip6h->daddr)); +} + +static void build_udp_header(void *header, int payload_len, uint16_t sport, + uint16_t dport, int family) +{ + struct udphdr *udph = header; + int len = sizeof(*udph) + payload_len; + + udph->source = htons(sport); + udph->dest = htons(dport); + udph->len = htons(len); + udph->check = 0; + if (family == AF_INET) + udph->check = build_udp_v4_csum(header - sizeof(struct iphdr), + udph); + else + udph->check = build_udp_v6_csum(header - sizeof(struct ipv6hdr), + udph); +} + +static void build_gue_header(void *header, uint8_t proto) +{ + struct guehdr *gueh = header; + + gueh->proto_ctype = proto; +} + +static void build_gre_header(void *header, uint16_t proto) +{ + struct grehdr *greh = header; + + greh->protocol = htons(proto); +} + +static int l3_length(int family) +{ + if (family == AF_INET) + return sizeof(struct iphdr); + else + return sizeof(struct ipv6hdr); +} + +static int build_packet(const struct test_configuration *test, uint16_t sport) +{ + int ol3_len = 0, ol4_len = 0, il3_len = 0, il4_len = 0; + int el3_len = 0, packet_len; + + memset(buf, 0, ETH_DATA_LEN); + + if (test->cfg_l3_extra) + el3_len = l3_length(test->cfg_l3_extra); + + /* calculate header offsets */ + if (test->cfg_encap_proto) { + ol3_len = l3_length(test->cfg_l3_outer); + + if (test->cfg_encap_proto == IPPROTO_GRE) + ol4_len = sizeof(struct grehdr); + else if (test->cfg_encap_proto == IPPROTO_UDP) + ol4_len = sizeof(struct udphdr) + sizeof(struct guehdr); + } + + il3_len = l3_length(test->cfg_l3_inner); + il4_len = sizeof(struct udphdr); + + packet_len = el3_len + ol3_len + ol4_len + il3_len + il4_len + + TEST_PACKET_LEN; + if (!ASSERT_LE(packet_len, sizeof(buf), "check packet size")) + return -1; + + /* + * Fill packet from inside out, to calculate correct checksums. + * But create ip before udp headers, as udp uses ip for pseudo-sum. + */ + memset(buf + el3_len + ol3_len + ol4_len + il3_len + il4_len, + TEST_PACKET_PATTERN, TEST_PACKET_LEN); + + /* add zero byte for udp csum padding */ + buf[el3_len + ol3_len + ol4_len + il3_len + il4_len + TEST_PACKET_LEN] = + 0; + + switch (test->cfg_l3_inner) { + case PF_INET: + build_ipv4_header(buf + el3_len + ol3_len + ol4_len, + IPPROTO_UDP, test->in_saddr4.sin_addr.s_addr, + test->in_daddr4.sin_addr.s_addr, + il4_len + TEST_PACKET_LEN, + test->cfg_dsfield_inner); + break; + case PF_INET6: + build_ipv6_header(buf + el3_len + ol3_len + ol4_len, + IPPROTO_UDP, &test->in_saddr6, + &test->in_daddr6, il4_len + TEST_PACKET_LEN, + test->cfg_dsfield_inner); + break; + } + + build_udp_header(buf + el3_len + ol3_len + ol4_len + il3_len, + TEST_PACKET_LEN, sport, CFG_PORT_INNER, + test->cfg_l3_inner); + + if (!test->cfg_encap_proto) + return il3_len + il4_len + TEST_PACKET_LEN; + + switch (test->cfg_l3_outer) { + case PF_INET: + build_ipv4_header(buf + el3_len, test->cfg_encap_proto, + test->out_saddr4.sin_addr.s_addr, + test->out_daddr4.sin_addr.s_addr, + ol4_len + il3_len + il4_len + TEST_PACKET_LEN, + test->cfg_dsfield_outer); + break; + case PF_INET6: + build_ipv6_header(buf + el3_len, test->cfg_encap_proto, + &test->out_saddr6, &test->out_daddr6, + ol4_len + il3_len + il4_len + TEST_PACKET_LEN, + test->cfg_dsfield_outer); + break; + } + + switch (test->cfg_encap_proto) { + case IPPROTO_UDP: + build_gue_header(buf + el3_len + ol3_len + ol4_len - + sizeof(struct guehdr), + test->cfg_l3_inner == PF_INET ? IPPROTO_IPIP : + IPPROTO_IPV6); + build_udp_header(buf + el3_len + ol3_len, + sizeof(struct guehdr) + il3_len + il4_len + + TEST_PACKET_LEN, + sport, CFG_PORT_GUE, test->cfg_l3_outer); + break; + case IPPROTO_GRE: + build_gre_header(buf + el3_len + ol3_len, + test->cfg_l3_inner == PF_INET ? ETH_P_IP : + ETH_P_IPV6); + break; + } + + switch (test->cfg_l3_extra) { + case PF_INET: + build_ipv4_header(buf, + test->cfg_l3_outer == PF_INET ? IPPROTO_IPIP : + IPPROTO_IPV6, + test->extra_saddr4.sin_addr.s_addr, + test->extra_daddr4.sin_addr.s_addr, + ol3_len + ol4_len + il3_len + il4_len + + TEST_PACKET_LEN, + 0); + break; + case PF_INET6: + build_ipv6_header(buf, + test->cfg_l3_outer == PF_INET ? IPPROTO_IPIP : + IPPROTO_IPV6, + &test->extra_saddr6, &test->extra_daddr6, + ol3_len + ol4_len + il3_len + il4_len + + TEST_PACKET_LEN, + 0); + break; + } + + return el3_len + ol3_len + ol4_len + il3_len + il4_len + + TEST_PACKET_LEN; +} + +/* sender transmits encapsulated over RAW or unencap'd over UDP */ +static int setup_tx(const struct test_configuration *test) +{ + int family, fd, ret; + + if (test->cfg_l3_extra) + family = test->cfg_l3_extra; + else if (test->cfg_l3_outer) + family = test->cfg_l3_outer; + else + family = test->cfg_l3_inner; + + fd = socket(family, SOCK_RAW, IPPROTO_RAW); + if (!ASSERT_OK_FD(fd, "setup tx socket")) + return fd; + + if (test->cfg_l3_extra) { + if (test->cfg_l3_extra == PF_INET) + ret = connect(fd, (void *)&test->extra_daddr4, + sizeof(test->extra_daddr4)); + else + ret = connect(fd, (void *)&test->extra_daddr6, + sizeof(test->extra_daddr6)); + if (!ASSERT_OK(ret, "connect")) { + close(fd); + return ret; + } + } else if (test->cfg_l3_outer) { + /* connect to destination if not encapsulated */ + if (test->cfg_l3_outer == PF_INET) + ret = connect(fd, (void *)&test->out_daddr4, + sizeof(test->out_daddr4)); + else + ret = connect(fd, (void *)&test->out_daddr6, + sizeof(test->out_daddr6)); + if (!ASSERT_OK(ret, "connect")) { + close(fd); + return ret; + } + } else { + /* otherwise using loopback */ + if (test->cfg_l3_inner == PF_INET) + ret = connect(fd, (void *)&test->in_daddr4, + sizeof(test->in_daddr4)); + else + ret = connect(fd, (void *)&test->in_daddr6, + sizeof(test->in_daddr6)); + if (!ASSERT_OK(ret, "connect")) { + close(fd); + return ret; + } + } + + return fd; +} + +/* receiver reads unencapsulated UDP */ +static int setup_rx(const struct test_configuration *test) +{ + int fd, ret; + + fd = socket(test->cfg_l3_inner, SOCK_DGRAM, 0); + if (!ASSERT_OK_FD(fd, "socket rx")) + return fd; + + if (test->cfg_l3_inner == PF_INET) + ret = bind(fd, (void *)&test->in_daddr4, + sizeof(test->in_daddr4)); + else + ret = bind(fd, (void *)&test->in_daddr6, + sizeof(test->in_daddr6)); + if (!ASSERT_OK(ret, "bind rx")) { + close(fd); + return ret; + } + + return fd; +} + +static int do_tx(int fd, const char *pkt, int len) +{ + int ret; + + ret = write(fd, pkt, len); + return ret != len; +} + +static int do_poll(int fd, short events, int timeout) +{ + struct pollfd pfd; + int ret; + + pfd.fd = fd; + pfd.events = events; + + ret = poll(&pfd, 1, timeout); + return ret; +} + +static int do_rx(int fd) +{ + char rbuf; + int ret, num = 0; + + while (1) { + ret = recv(fd, &rbuf, 1, MSG_DONTWAIT); + if (ret == -1 && errno == EAGAIN) + break; + if (ret < 0) + return -1; + if (!ASSERT_EQ(rbuf, TEST_PACKET_PATTERN, "check pkt pattern")) + return -1; + num++; + } + + return num; +} + +static int run_test(const struct test_configuration *test, + int source_port_index) +{ + int fdt = -1, fdr = -1, len, tx = 0, rx = 0, err; + unsigned long tstop, tcur; + + fdr = setup_rx(test); + fdt = setup_tx(test); + if (!ASSERT_OK_FD(fdr, "setup rx") || !ASSERT_OK_FD(fdt, "setup tx")) { + err = -1; + goto out_close_sockets; + } + + len = build_packet(test, + (uint16_t)test->source_ports[source_port_index]); + if (!ASSERT_GT(len, 0, "build test packet")) + return -1; + + tcur = util_gettime(); + tstop = tcur; + + while (tx < TEST_PACKETS_COUNT) { + if (!ASSERT_OK(do_tx(fdt, buf, len), "do_tx")) + break; + tx++; + err = do_rx(fdr); + if (!ASSERT_GE(err, 0, "do_rx")) + break; + rx += err; + } + + /* read straggler packets, if any */ + if (rx < tx) { + tstop = util_gettime() + 100; + while (rx < tx) { + tcur = util_gettime(); + if (tcur >= tstop) + break; + + err = do_poll(fdr, POLLIN, tstop - tcur); + if (err < 0) + break; + err = do_rx(fdr); + if (err >= 0) + rx += err; + } + } + +out_close_sockets: + close(fdt); + close(fdr); + return rx; +} + +static int attach_and_configure_program(struct bpf_flow *skel) +{ + struct bpf_map *prog_array = skel->maps.jmp_table; + int main_prog_fd, sub_prog_fd, map_fd, i, err; + struct bpf_program *prog; + char prog_name[32]; + + main_prog_fd = bpf_program__fd(skel->progs._dissect); + if (main_prog_fd < 0) + return main_prog_fd; + + err = bpf_prog_attach(main_prog_fd, 0, BPF_FLOW_DISSECTOR, 0); + if (err) + return err; + + map_fd = bpf_map__fd(prog_array); + if (map_fd < 0) + return map_fd; + + for (i = 0; i < bpf_map__max_entries(prog_array); i++) { + snprintf(prog_name, sizeof(prog_name), "flow_dissector_%d", i); + + prog = bpf_object__find_program_by_name(skel->obj, prog_name); + if (!prog) + return -1; + + sub_prog_fd = bpf_program__fd(prog); + if (sub_prog_fd < 0) + return -1; + + err = bpf_map_update_elem(map_fd, &i, &sub_prog_fd, BPF_ANY); + if (err) + return -1; + } + + return main_prog_fd; +} + +static void detach_program(struct bpf_flow *skel, int prog_fd) +{ + bpf_prog_detach2(prog_fd, 0, BPF_FLOW_DISSECTOR); +} + +static int set_port_drop(int pf, bool multi_port) +{ + SYS(fail, "tc qdisc add dev lo ingress"); + SYS(fail_delete_qdisc, "tc filter add %s %s %s %s %s %s %s %s %s %s", + "dev lo", + "parent FFFF:", + "protocol", pf == PF_INET6 ? "ipv6" : "ip", + "pref 1337", + "flower", + "ip_proto udp", + "src_port", multi_port ? "8-10" : "9", + "action drop"); + return 0; + +fail_delete_qdisc: + SYS_NOFAIL("tc qdisc del dev lo ingress"); +fail: + return 1; +} + +static void remove_filter(void) +{ + SYS_NOFAIL("tc filter del dev lo ingress"); + SYS_NOFAIL("tc qdisc del dev lo ingress"); +} + +static int ipv4_setup(void) +{ + return set_port_drop(PF_INET, false); +} + +static int ipv6_setup(void) +{ + return set_port_drop(PF_INET6, false); +} + +static int port_range_setup(void) +{ + return set_port_drop(PF_INET, true); +} + +static int set_addresses(void) +{ + SYS(out, "ip -4 addr add %s dev lo", TEST_IPV4); + SYS(out_remove_ipv4, "ip -6 addr add %s dev lo", TEST_IPV6); + return 0; +out_remove_ipv4: + SYS_NOFAIL("ip -4 addr del %s dev lo", TEST_IPV4); +out: + return -1; +} + +static void unset_addresses(void) +{ + SYS_NOFAIL("ip -4 addr del %s dev lo", TEST_IPV4); + SYS_NOFAIL("ip -6 addr del %s dev lo", TEST_IPV6); +} + +static int ipip_setup(void) +{ + if (!ASSERT_OK(set_addresses(), "configure addresses")) + return -1; + if (!ASSERT_OK(set_port_drop(PF_INET, false), "set filter")) + goto out_unset_addresses; + SYS(out_remove_filter, + "ip link add ipip_test type ipip remote %s local %s dev lo", + TEST_TUNNEL_REMOTE, TEST_TUNNEL_LOCAL); + SYS(out_clean_netif, "ip link set ipip_test up"); + return 0; + +out_clean_netif: + SYS_NOFAIL("ip link del ipip_test"); +out_remove_filter: + remove_filter(); +out_unset_addresses: + unset_addresses(); + return -1; +} + +static void ipip_shutdown(void) +{ + SYS_NOFAIL("ip link del ipip_test"); + remove_filter(); + unset_addresses(); +} + +static int gre_setup(void) +{ + if (!ASSERT_OK(set_addresses(), "configure addresses")) + return -1; + if (!ASSERT_OK(set_port_drop(PF_INET, false), "set filter")) + goto out_unset_addresses; + SYS(out_remove_filter, + "ip link add gre_test type gre remote %s local %s dev lo", + TEST_TUNNEL_REMOTE, TEST_TUNNEL_LOCAL); + SYS(out_clean_netif, "ip link set gre_test up"); + return 0; + +out_clean_netif: + SYS_NOFAIL("ip link del ipip_test"); +out_remove_filter: + remove_filter(); +out_unset_addresses: + unset_addresses(); + return -1; +} + +static void gre_shutdown(void) +{ + SYS_NOFAIL("ip link del gre_test"); + remove_filter(); + unset_addresses(); +} + +static const struct test_configuration tests_input[] = { + { + .name = "ipv4", + .test_setup = ipv4_setup, + .test_teardown = remove_filter, + .source_ports = { 8, 9, 10 }, + .cfg_l3_inner = PF_INET, + .in_saddr4 = TEST_IN4_SRC_ADDR_DEFAULT, + .in_daddr4 = TEST_IN4_DST_ADDR_DEFAULT + }, + { + .name = "ipv4_continue_dissect", + .test_setup = ipv4_setup, + .test_teardown = remove_filter, + .source_ports = { 8, 9, 10 }, + .cfg_l3_inner = PF_INET, + .in_saddr4 = TEST_IN4_SRC_ADDR_DISSECT_CONTINUE, + .in_daddr4 = TEST_IN4_DST_ADDR_DEFAULT }, + { + .name = "ipip", + .test_setup = ipip_setup, + .test_teardown = ipip_shutdown, + .source_ports = { 8, 9, 10 }, + .cfg_l3_inner = PF_INET, + .in_saddr4 = TEST_IN4_SRC_ADDR_IPIP, + .in_daddr4 = TEST_IN4_DST_ADDR_IPIP, + .out_saddr4 = TEST_OUT4_SRC_ADDR_DEFAULT, + .out_daddr4 = TEST_OUT4_DST_ADDR_DEFAULT, + .cfg_l3_outer = PF_INET, + .cfg_encap_proto = IPPROTO_IPIP, + + }, + { + .name = "gre", + .test_setup = gre_setup, + .test_teardown = gre_shutdown, + .source_ports = { 8, 9, 10 }, + .cfg_l3_inner = PF_INET, + .in_saddr4 = TEST_IN4_SRC_ADDR_IPIP, + .in_daddr4 = TEST_IN4_DST_ADDR_IPIP, + .out_saddr4 = TEST_OUT4_SRC_ADDR_DEFAULT, + .out_daddr4 = TEST_OUT4_DST_ADDR_DEFAULT, + .cfg_l3_outer = PF_INET, + .cfg_encap_proto = IPPROTO_GRE, + }, + { + .name = "port_range", + .test_setup = port_range_setup, + .test_teardown = remove_filter, + .source_ports = { 7, 9, 11 }, + .cfg_l3_inner = PF_INET, + .in_saddr4 = TEST_IN4_SRC_ADDR_DEFAULT, + .in_daddr4 = TEST_IN4_DST_ADDR_DEFAULT }, + { + .name = "ipv6", + .test_setup = ipv6_setup, + .test_teardown = remove_filter, + .source_ports = { 8, 9, 10 }, + .cfg_l3_inner = PF_INET6, + .in_saddr6 = TEST_IN6_SRC_ADDR_DEFAULT, + .in_daddr6 = TEST_IN6_DST_ADDR_DEFAULT + }, +}; + +struct test_ctx { + struct bpf_flow *skel; + struct netns_obj *ns; + int prog_fd; +}; + +static int test_global_init(struct test_ctx *ctx) +{ + int err; + + ctx->skel = bpf_flow__open_and_load(); + if (!ASSERT_OK_PTR(ctx->skel, "open and load flow_dissector")) + return -1; + + ctx->ns = netns_new("flow_dissector_classification", true); + if (!ASSERT_OK_PTR(ctx->ns, "switch ns")) + goto out_destroy_skel; + + err = write_sysctl("/proc/sys/net/ipv4/conf/default/rp_filter", "0"); + err |= write_sysctl("/proc/sys/net/ipv4/conf/all/rp_filter", "0"); + err |= write_sysctl("/proc/sys/net/ipv4/conf/lo/rp_filter", "0"); + if (!ASSERT_OK(err, "configure net tunables")) + goto out_clean_ns; + + ctx->prog_fd = attach_and_configure_program(ctx->skel); + if (!ASSERT_OK_FD(ctx->prog_fd, "attach and configure program")) + goto out_clean_ns; + return 0; +out_clean_ns: + netns_free(ctx->ns); +out_destroy_skel: + bpf_flow__destroy(ctx->skel); + return -1; +} + +static void test_global_shutdown(struct test_ctx *ctx) +{ + detach_program(ctx->skel, ctx->prog_fd); + netns_free(ctx->ns); + bpf_flow__destroy(ctx->skel); +} + +void test_flow_dissector_classification(void) +{ + struct test_ctx ctx; + const struct test_configuration *test; + int i; + + if (test_global_init(&ctx)) + return; + + for (i = 0; i < ARRAY_SIZE(tests_input); i++) { + if (!test__start_subtest(tests_input[i].name)) + continue; + test = &tests_input[i]; + /* All tests are expected to have one rx-ok port first, + * then a non-working rx port, and finally a rx-ok port + */ + if (test->test_setup && + !ASSERT_OK(test->test_setup(), "init filter")) + continue; + + ASSERT_EQ(run_test(test, 0), TEST_PACKETS_COUNT, + "test first port"); + ASSERT_EQ(run_test(test, 1), 0, "test second port"); + ASSERT_EQ(run_test(test, 2), TEST_PACKETS_COUNT, + "test third port"); + if (test->test_teardown) + test->test_teardown(); + } + test_global_shutdown(&ctx); +} -- cgit v1.2.3 From 63b37657c5fdd0625f5825d5f90e7bec1dfafb64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexis=20Lothor=C3=A9=20=28eBPF=20Foundation=29?= Date: Wed, 20 Nov 2024 08:43:24 +0100 Subject: selftests/bpf: remove test_flow_dissector.sh MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that test_flow_dissector.sh has been converted to test_progs, remove the legacy test. Acked-by: Stanislav Fomichev Signed-off-by: Alexis Lothoré (eBPF Foundation) Link: https://lore.kernel.org/r/20241120-flow_dissector-v3-14-45b46494f937@bootlin.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/.gitignore | 1 - tools/testing/selftests/bpf/Makefile | 2 - tools/testing/selftests/bpf/test_flow_dissector.c | 780 --------------------- tools/testing/selftests/bpf/test_flow_dissector.sh | 178 ----- 4 files changed, 961 deletions(-) delete mode 100644 tools/testing/selftests/bpf/test_flow_dissector.c delete mode 100755 tools/testing/selftests/bpf/test_flow_dissector.sh (limited to 'tools') diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index c2a1842c3d8b..5ad1b9f5e8e6 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -19,7 +19,6 @@ feature urandom_read test_sockmap test_lirc_mode2_user -test_flow_dissector flow_dissector_load test_tcpnotify_user test_libbpf diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 6ad3b1ba1920..a1964d40a60e 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -133,7 +133,6 @@ TEST_PROGS := test_kmod.sh \ test_tunnel.sh \ test_lwt_seg6local.sh \ test_lirc_mode2.sh \ - test_flow_dissector.sh \ test_xdp_vlan_mode_generic.sh \ test_xdp_vlan_mode_native.sh \ test_lwt_ip_encap.sh \ @@ -161,7 +160,6 @@ TEST_GEN_PROGS_EXTENDED = \ flow_dissector_load \ runqslower \ test_cpp \ - test_flow_dissector \ test_lirc_mode2_user \ veristat \ xdp_features \ diff --git a/tools/testing/selftests/bpf/test_flow_dissector.c b/tools/testing/selftests/bpf/test_flow_dissector.c deleted file mode 100644 index 571cc076dd7d..000000000000 --- a/tools/testing/selftests/bpf/test_flow_dissector.c +++ /dev/null @@ -1,780 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Inject packets with all sorts of encapsulation into the kernel. - * - * IPv4/IPv6 outer layer 3 - * GRE/GUE/BARE outer layer 4, where bare is IPIP/SIT/IPv4-in-IPv6/.. - * IPv4/IPv6 inner layer 3 - */ - -#define _GNU_SOURCE - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define CFG_PORT_INNER 8000 - -/* Add some protocol definitions that do not exist in userspace */ - -struct grehdr { - uint16_t unused; - uint16_t protocol; -} __attribute__((packed)); - -struct guehdr { - union { - struct { -#if defined(__LITTLE_ENDIAN_BITFIELD) - __u8 hlen:5, - control:1, - version:2; -#elif defined (__BIG_ENDIAN_BITFIELD) - __u8 version:2, - control:1, - hlen:5; -#else -#error "Please fix " -#endif - __u8 proto_ctype; - __be16 flags; - }; - __be32 word; - }; -}; - -static uint8_t cfg_dsfield_inner; -static uint8_t cfg_dsfield_outer; -static uint8_t cfg_encap_proto; -static bool cfg_expect_failure = false; -static int cfg_l3_extra = AF_UNSPEC; /* optional SIT prefix */ -static int cfg_l3_inner = AF_UNSPEC; -static int cfg_l3_outer = AF_UNSPEC; -static int cfg_num_pkt = 10; -static int cfg_num_secs = 0; -static char cfg_payload_char = 'a'; -static int cfg_payload_len = 100; -static int cfg_port_gue = 6080; -static bool cfg_only_rx; -static bool cfg_only_tx; -static int cfg_src_port = 9; - -static char buf[ETH_DATA_LEN]; - -#define INIT_ADDR4(name, addr4, port) \ - static struct sockaddr_in name = { \ - .sin_family = AF_INET, \ - .sin_port = __constant_htons(port), \ - .sin_addr.s_addr = __constant_htonl(addr4), \ - }; - -#define INIT_ADDR6(name, addr6, port) \ - static struct sockaddr_in6 name = { \ - .sin6_family = AF_INET6, \ - .sin6_port = __constant_htons(port), \ - .sin6_addr = addr6, \ - }; - -INIT_ADDR4(in_daddr4, INADDR_LOOPBACK, CFG_PORT_INNER) -INIT_ADDR4(in_saddr4, INADDR_LOOPBACK + 2, 0) -INIT_ADDR4(out_daddr4, INADDR_LOOPBACK, 0) -INIT_ADDR4(out_saddr4, INADDR_LOOPBACK + 1, 0) -INIT_ADDR4(extra_daddr4, INADDR_LOOPBACK, 0) -INIT_ADDR4(extra_saddr4, INADDR_LOOPBACK + 1, 0) - -INIT_ADDR6(in_daddr6, IN6ADDR_LOOPBACK_INIT, CFG_PORT_INNER) -INIT_ADDR6(in_saddr6, IN6ADDR_LOOPBACK_INIT, 0) -INIT_ADDR6(out_daddr6, IN6ADDR_LOOPBACK_INIT, 0) -INIT_ADDR6(out_saddr6, IN6ADDR_LOOPBACK_INIT, 0) -INIT_ADDR6(extra_daddr6, IN6ADDR_LOOPBACK_INIT, 0) -INIT_ADDR6(extra_saddr6, IN6ADDR_LOOPBACK_INIT, 0) - -static unsigned long util_gettime(void) -{ - struct timeval tv; - - gettimeofday(&tv, NULL); - return (tv.tv_sec * 1000) + (tv.tv_usec / 1000); -} - -static void util_printaddr(const char *msg, struct sockaddr *addr) -{ - unsigned long off = 0; - char nbuf[INET6_ADDRSTRLEN]; - - switch (addr->sa_family) { - case PF_INET: - off = __builtin_offsetof(struct sockaddr_in, sin_addr); - break; - case PF_INET6: - off = __builtin_offsetof(struct sockaddr_in6, sin6_addr); - break; - default: - error(1, 0, "printaddr: unsupported family %u\n", - addr->sa_family); - } - - if (!inet_ntop(addr->sa_family, ((void *) addr) + off, nbuf, - sizeof(nbuf))) - error(1, errno, "inet_ntop"); - - fprintf(stderr, "%s: %s\n", msg, nbuf); -} - -static unsigned long add_csum_hword(const uint16_t *start, int num_u16) -{ - unsigned long sum = 0; - int i; - - for (i = 0; i < num_u16; i++) - sum += start[i]; - - return sum; -} - -static uint16_t build_ip_csum(const uint16_t *start, int num_u16, - unsigned long sum) -{ - sum += add_csum_hword(start, num_u16); - - while (sum >> 16) - sum = (sum & 0xffff) + (sum >> 16); - - return ~sum; -} - -static void build_ipv4_header(void *header, uint8_t proto, - uint32_t src, uint32_t dst, - int payload_len, uint8_t tos) -{ - struct iphdr *iph = header; - - iph->ihl = 5; - iph->version = 4; - iph->tos = tos; - iph->ttl = 8; - iph->tot_len = htons(sizeof(*iph) + payload_len); - iph->id = htons(1337); - iph->protocol = proto; - iph->saddr = src; - iph->daddr = dst; - iph->check = build_ip_csum((void *) iph, iph->ihl << 1, 0); -} - -static void ipv6_set_dsfield(struct ipv6hdr *ip6h, uint8_t dsfield) -{ - uint16_t val, *ptr = (uint16_t *)ip6h; - - val = ntohs(*ptr); - val &= 0xF00F; - val |= ((uint16_t) dsfield) << 4; - *ptr = htons(val); -} - -static void build_ipv6_header(void *header, uint8_t proto, - struct sockaddr_in6 *src, - struct sockaddr_in6 *dst, - int payload_len, uint8_t dsfield) -{ - struct ipv6hdr *ip6h = header; - - ip6h->version = 6; - ip6h->payload_len = htons(payload_len); - ip6h->nexthdr = proto; - ip6h->hop_limit = 8; - ipv6_set_dsfield(ip6h, dsfield); - - memcpy(&ip6h->saddr, &src->sin6_addr, sizeof(ip6h->saddr)); - memcpy(&ip6h->daddr, &dst->sin6_addr, sizeof(ip6h->daddr)); -} - -static uint16_t build_udp_v4_csum(const struct iphdr *iph, - const struct udphdr *udph, - int num_words) -{ - unsigned long pseudo_sum; - int num_u16 = sizeof(iph->saddr); /* halfwords: twice byte len */ - - pseudo_sum = add_csum_hword((void *) &iph->saddr, num_u16); - pseudo_sum += htons(IPPROTO_UDP); - pseudo_sum += udph->len; - return build_ip_csum((void *) udph, num_words, pseudo_sum); -} - -static uint16_t build_udp_v6_csum(const struct ipv6hdr *ip6h, - const struct udphdr *udph, - int num_words) -{ - unsigned long pseudo_sum; - int num_u16 = sizeof(ip6h->saddr); /* halfwords: twice byte len */ - - pseudo_sum = add_csum_hword((void *) &ip6h->saddr, num_u16); - pseudo_sum += htons(ip6h->nexthdr); - pseudo_sum += ip6h->payload_len; - return build_ip_csum((void *) udph, num_words, pseudo_sum); -} - -static void build_udp_header(void *header, int payload_len, - uint16_t dport, int family) -{ - struct udphdr *udph = header; - int len = sizeof(*udph) + payload_len; - - udph->source = htons(cfg_src_port); - udph->dest = htons(dport); - udph->len = htons(len); - udph->check = 0; - if (family == AF_INET) - udph->check = build_udp_v4_csum(header - sizeof(struct iphdr), - udph, len >> 1); - else - udph->check = build_udp_v6_csum(header - sizeof(struct ipv6hdr), - udph, len >> 1); -} - -static void build_gue_header(void *header, uint8_t proto) -{ - struct guehdr *gueh = header; - - gueh->proto_ctype = proto; -} - -static void build_gre_header(void *header, uint16_t proto) -{ - struct grehdr *greh = header; - - greh->protocol = htons(proto); -} - -static int l3_length(int family) -{ - if (family == AF_INET) - return sizeof(struct iphdr); - else - return sizeof(struct ipv6hdr); -} - -static int build_packet(void) -{ - int ol3_len = 0, ol4_len = 0, il3_len = 0, il4_len = 0; - int el3_len = 0; - - if (cfg_l3_extra) - el3_len = l3_length(cfg_l3_extra); - - /* calculate header offsets */ - if (cfg_encap_proto) { - ol3_len = l3_length(cfg_l3_outer); - - if (cfg_encap_proto == IPPROTO_GRE) - ol4_len = sizeof(struct grehdr); - else if (cfg_encap_proto == IPPROTO_UDP) - ol4_len = sizeof(struct udphdr) + sizeof(struct guehdr); - } - - il3_len = l3_length(cfg_l3_inner); - il4_len = sizeof(struct udphdr); - - if (el3_len + ol3_len + ol4_len + il3_len + il4_len + cfg_payload_len >= - sizeof(buf)) - error(1, 0, "packet too large\n"); - - /* - * Fill packet from inside out, to calculate correct checksums. - * But create ip before udp headers, as udp uses ip for pseudo-sum. - */ - memset(buf + el3_len + ol3_len + ol4_len + il3_len + il4_len, - cfg_payload_char, cfg_payload_len); - - /* add zero byte for udp csum padding */ - buf[el3_len + ol3_len + ol4_len + il3_len + il4_len + cfg_payload_len] = 0; - - switch (cfg_l3_inner) { - case PF_INET: - build_ipv4_header(buf + el3_len + ol3_len + ol4_len, - IPPROTO_UDP, - in_saddr4.sin_addr.s_addr, - in_daddr4.sin_addr.s_addr, - il4_len + cfg_payload_len, - cfg_dsfield_inner); - break; - case PF_INET6: - build_ipv6_header(buf + el3_len + ol3_len + ol4_len, - IPPROTO_UDP, - &in_saddr6, &in_daddr6, - il4_len + cfg_payload_len, - cfg_dsfield_inner); - break; - } - - build_udp_header(buf + el3_len + ol3_len + ol4_len + il3_len, - cfg_payload_len, CFG_PORT_INNER, cfg_l3_inner); - - if (!cfg_encap_proto) - return il3_len + il4_len + cfg_payload_len; - - switch (cfg_l3_outer) { - case PF_INET: - build_ipv4_header(buf + el3_len, cfg_encap_proto, - out_saddr4.sin_addr.s_addr, - out_daddr4.sin_addr.s_addr, - ol4_len + il3_len + il4_len + cfg_payload_len, - cfg_dsfield_outer); - break; - case PF_INET6: - build_ipv6_header(buf + el3_len, cfg_encap_proto, - &out_saddr6, &out_daddr6, - ol4_len + il3_len + il4_len + cfg_payload_len, - cfg_dsfield_outer); - break; - } - - switch (cfg_encap_proto) { - case IPPROTO_UDP: - build_gue_header(buf + el3_len + ol3_len + ol4_len - - sizeof(struct guehdr), - cfg_l3_inner == PF_INET ? IPPROTO_IPIP - : IPPROTO_IPV6); - build_udp_header(buf + el3_len + ol3_len, - sizeof(struct guehdr) + il3_len + il4_len + - cfg_payload_len, - cfg_port_gue, cfg_l3_outer); - break; - case IPPROTO_GRE: - build_gre_header(buf + el3_len + ol3_len, - cfg_l3_inner == PF_INET ? ETH_P_IP - : ETH_P_IPV6); - break; - } - - switch (cfg_l3_extra) { - case PF_INET: - build_ipv4_header(buf, - cfg_l3_outer == PF_INET ? IPPROTO_IPIP - : IPPROTO_IPV6, - extra_saddr4.sin_addr.s_addr, - extra_daddr4.sin_addr.s_addr, - ol3_len + ol4_len + il3_len + il4_len + - cfg_payload_len, 0); - break; - case PF_INET6: - build_ipv6_header(buf, - cfg_l3_outer == PF_INET ? IPPROTO_IPIP - : IPPROTO_IPV6, - &extra_saddr6, &extra_daddr6, - ol3_len + ol4_len + il3_len + il4_len + - cfg_payload_len, 0); - break; - } - - return el3_len + ol3_len + ol4_len + il3_len + il4_len + - cfg_payload_len; -} - -/* sender transmits encapsulated over RAW or unencap'd over UDP */ -static int setup_tx(void) -{ - int family, fd, ret; - - if (cfg_l3_extra) - family = cfg_l3_extra; - else if (cfg_l3_outer) - family = cfg_l3_outer; - else - family = cfg_l3_inner; - - fd = socket(family, SOCK_RAW, IPPROTO_RAW); - if (fd == -1) - error(1, errno, "socket tx"); - - if (cfg_l3_extra) { - if (cfg_l3_extra == PF_INET) - ret = connect(fd, (void *) &extra_daddr4, - sizeof(extra_daddr4)); - else - ret = connect(fd, (void *) &extra_daddr6, - sizeof(extra_daddr6)); - if (ret) - error(1, errno, "connect tx"); - } else if (cfg_l3_outer) { - /* connect to destination if not encapsulated */ - if (cfg_l3_outer == PF_INET) - ret = connect(fd, (void *) &out_daddr4, - sizeof(out_daddr4)); - else - ret = connect(fd, (void *) &out_daddr6, - sizeof(out_daddr6)); - if (ret) - error(1, errno, "connect tx"); - } else { - /* otherwise using loopback */ - if (cfg_l3_inner == PF_INET) - ret = connect(fd, (void *) &in_daddr4, - sizeof(in_daddr4)); - else - ret = connect(fd, (void *) &in_daddr6, - sizeof(in_daddr6)); - if (ret) - error(1, errno, "connect tx"); - } - - return fd; -} - -/* receiver reads unencapsulated UDP */ -static int setup_rx(void) -{ - int fd, ret; - - fd = socket(cfg_l3_inner, SOCK_DGRAM, 0); - if (fd == -1) - error(1, errno, "socket rx"); - - if (cfg_l3_inner == PF_INET) - ret = bind(fd, (void *) &in_daddr4, sizeof(in_daddr4)); - else - ret = bind(fd, (void *) &in_daddr6, sizeof(in_daddr6)); - if (ret) - error(1, errno, "bind rx"); - - return fd; -} - -static int do_tx(int fd, const char *pkt, int len) -{ - int ret; - - ret = write(fd, pkt, len); - if (ret == -1) - error(1, errno, "send"); - if (ret != len) - error(1, errno, "send: len (%d < %d)\n", ret, len); - - return 1; -} - -static int do_poll(int fd, short events, int timeout) -{ - struct pollfd pfd; - int ret; - - pfd.fd = fd; - pfd.events = events; - - ret = poll(&pfd, 1, timeout); - if (ret == -1) - error(1, errno, "poll"); - if (ret && !(pfd.revents & POLLIN)) - error(1, errno, "poll: unexpected event 0x%x\n", pfd.revents); - - return ret; -} - -static int do_rx(int fd) -{ - char rbuf; - int ret, num = 0; - - while (1) { - ret = recv(fd, &rbuf, 1, MSG_DONTWAIT); - if (ret == -1 && errno == EAGAIN) - break; - if (ret == -1) - error(1, errno, "recv"); - if (rbuf != cfg_payload_char) - error(1, 0, "recv: payload mismatch"); - num++; - } - - return num; -} - -static int do_main(void) -{ - unsigned long tstop, treport, tcur; - int fdt = -1, fdr = -1, len, tx = 0, rx = 0; - - if (!cfg_only_tx) - fdr = setup_rx(); - if (!cfg_only_rx) - fdt = setup_tx(); - - len = build_packet(); - - tcur = util_gettime(); - treport = tcur + 1000; - tstop = tcur + (cfg_num_secs * 1000); - - while (1) { - if (!cfg_only_rx) - tx += do_tx(fdt, buf, len); - - if (!cfg_only_tx) - rx += do_rx(fdr); - - if (cfg_num_secs) { - tcur = util_gettime(); - if (tcur >= tstop) - break; - if (tcur >= treport) { - fprintf(stderr, "pkts: tx=%u rx=%u\n", tx, rx); - tx = 0; - rx = 0; - treport = tcur + 1000; - } - } else { - if (tx == cfg_num_pkt) - break; - } - } - - /* read straggler packets, if any */ - if (rx < tx) { - tstop = util_gettime() + 100; - while (rx < tx) { - tcur = util_gettime(); - if (tcur >= tstop) - break; - - do_poll(fdr, POLLIN, tstop - tcur); - rx += do_rx(fdr); - } - } - - fprintf(stderr, "pkts: tx=%u rx=%u\n", tx, rx); - - if (fdr != -1 && close(fdr)) - error(1, errno, "close rx"); - if (fdt != -1 && close(fdt)) - error(1, errno, "close tx"); - - /* - * success (== 0) only if received all packets - * unless failure is expected, in which case none must arrive. - */ - if (cfg_expect_failure) - return rx != 0; - else - return rx != tx; -} - - -static void __attribute__((noreturn)) usage(const char *filepath) -{ - fprintf(stderr, "Usage: %s [-e gre|gue|bare|none] [-i 4|6] [-l len] " - "[-O 4|6] [-o 4|6] [-n num] [-t secs] [-R] [-T] " - "[-s [-d ] [-S ] [-D ] " - "[-x ] [-X ] [-f ] [-F]\n", - filepath); - exit(1); -} - -static void parse_addr(int family, void *addr, const char *optarg) -{ - int ret; - - ret = inet_pton(family, optarg, addr); - if (ret == -1) - error(1, errno, "inet_pton"); - if (ret == 0) - error(1, 0, "inet_pton: bad string"); -} - -static void parse_addr4(struct sockaddr_in *addr, const char *optarg) -{ - parse_addr(AF_INET, &addr->sin_addr, optarg); -} - -static void parse_addr6(struct sockaddr_in6 *addr, const char *optarg) -{ - parse_addr(AF_INET6, &addr->sin6_addr, optarg); -} - -static int parse_protocol_family(const char *filepath, const char *optarg) -{ - if (!strcmp(optarg, "4")) - return PF_INET; - if (!strcmp(optarg, "6")) - return PF_INET6; - - usage(filepath); -} - -static void parse_opts(int argc, char **argv) -{ - int c; - - while ((c = getopt(argc, argv, "d:D:e:f:Fhi:l:n:o:O:Rs:S:t:Tx:X:")) != -1) { - switch (c) { - case 'd': - if (cfg_l3_outer == AF_UNSPEC) - error(1, 0, "-d must be preceded by -o"); - if (cfg_l3_outer == AF_INET) - parse_addr4(&out_daddr4, optarg); - else - parse_addr6(&out_daddr6, optarg); - break; - case 'D': - if (cfg_l3_inner == AF_UNSPEC) - error(1, 0, "-D must be preceded by -i"); - if (cfg_l3_inner == AF_INET) - parse_addr4(&in_daddr4, optarg); - else - parse_addr6(&in_daddr6, optarg); - break; - case 'e': - if (!strcmp(optarg, "gre")) - cfg_encap_proto = IPPROTO_GRE; - else if (!strcmp(optarg, "gue")) - cfg_encap_proto = IPPROTO_UDP; - else if (!strcmp(optarg, "bare")) - cfg_encap_proto = IPPROTO_IPIP; - else if (!strcmp(optarg, "none")) - cfg_encap_proto = IPPROTO_IP; /* == 0 */ - else - usage(argv[0]); - break; - case 'f': - cfg_src_port = strtol(optarg, NULL, 0); - break; - case 'F': - cfg_expect_failure = true; - break; - case 'h': - usage(argv[0]); - break; - case 'i': - if (!strcmp(optarg, "4")) - cfg_l3_inner = PF_INET; - else if (!strcmp(optarg, "6")) - cfg_l3_inner = PF_INET6; - else - usage(argv[0]); - break; - case 'l': - cfg_payload_len = strtol(optarg, NULL, 0); - break; - case 'n': - cfg_num_pkt = strtol(optarg, NULL, 0); - break; - case 'o': - cfg_l3_outer = parse_protocol_family(argv[0], optarg); - break; - case 'O': - cfg_l3_extra = parse_protocol_family(argv[0], optarg); - break; - case 'R': - cfg_only_rx = true; - break; - case 's': - if (cfg_l3_outer == AF_INET) - parse_addr4(&out_saddr4, optarg); - else - parse_addr6(&out_saddr6, optarg); - break; - case 'S': - if (cfg_l3_inner == AF_INET) - parse_addr4(&in_saddr4, optarg); - else - parse_addr6(&in_saddr6, optarg); - break; - case 't': - cfg_num_secs = strtol(optarg, NULL, 0); - break; - case 'T': - cfg_only_tx = true; - break; - case 'x': - cfg_dsfield_outer = strtol(optarg, NULL, 0); - break; - case 'X': - cfg_dsfield_inner = strtol(optarg, NULL, 0); - break; - } - } - - if (cfg_only_rx && cfg_only_tx) - error(1, 0, "options: cannot combine rx-only and tx-only"); - - if (cfg_encap_proto && cfg_l3_outer == AF_UNSPEC) - error(1, 0, "options: must specify outer with encap"); - else if ((!cfg_encap_proto) && cfg_l3_outer != AF_UNSPEC) - error(1, 0, "options: cannot combine no-encap and outer"); - else if ((!cfg_encap_proto) && cfg_l3_extra != AF_UNSPEC) - error(1, 0, "options: cannot combine no-encap and extra"); - - if (cfg_l3_inner == AF_UNSPEC) - cfg_l3_inner = AF_INET6; - if (cfg_l3_inner == AF_INET6 && cfg_encap_proto == IPPROTO_IPIP) - cfg_encap_proto = IPPROTO_IPV6; - - /* RFC 6040 4.2: - * on decap, if outer encountered congestion (CE == 0x3), - * but inner cannot encode ECN (NoECT == 0x0), then drop packet. - */ - if (((cfg_dsfield_outer & 0x3) == 0x3) && - ((cfg_dsfield_inner & 0x3) == 0x0)) - cfg_expect_failure = true; -} - -static void print_opts(void) -{ - if (cfg_l3_inner == PF_INET6) { - util_printaddr("inner.dest6", (void *) &in_daddr6); - util_printaddr("inner.source6", (void *) &in_saddr6); - } else { - util_printaddr("inner.dest4", (void *) &in_daddr4); - util_printaddr("inner.source4", (void *) &in_saddr4); - } - - if (!cfg_l3_outer) - return; - - fprintf(stderr, "encap proto: %u\n", cfg_encap_proto); - - if (cfg_l3_outer == PF_INET6) { - util_printaddr("outer.dest6", (void *) &out_daddr6); - util_printaddr("outer.source6", (void *) &out_saddr6); - } else { - util_printaddr("outer.dest4", (void *) &out_daddr4); - util_printaddr("outer.source4", (void *) &out_saddr4); - } - - if (!cfg_l3_extra) - return; - - if (cfg_l3_outer == PF_INET6) { - util_printaddr("extra.dest6", (void *) &extra_daddr6); - util_printaddr("extra.source6", (void *) &extra_saddr6); - } else { - util_printaddr("extra.dest4", (void *) &extra_daddr4); - util_printaddr("extra.source4", (void *) &extra_saddr4); - } - -} - -int main(int argc, char **argv) -{ - parse_opts(argc, argv); - print_opts(); - return do_main(); -} diff --git a/tools/testing/selftests/bpf/test_flow_dissector.sh b/tools/testing/selftests/bpf/test_flow_dissector.sh deleted file mode 100755 index 4b298863797a..000000000000 --- a/tools/testing/selftests/bpf/test_flow_dissector.sh +++ /dev/null @@ -1,178 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 -# -# Load BPF flow dissector and verify it correctly dissects traffic - -BPF_FILE="bpf_flow.bpf.o" -export TESTNAME=test_flow_dissector -unmount=0 - -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 - -msg="skip all tests:" -if [ $UID != 0 ]; then - echo $msg please run this as root >&2 - exit $ksft_skip -fi - -# This test needs to be run in a network namespace with in_netns.sh. Check if -# this is the case and run it with in_netns.sh if it is being run in the root -# namespace. -if [[ -z $(ip netns identify $$) ]]; then - err=0 - if bpftool="$(which bpftool)"; then - echo "Testing global flow dissector..." - - $bpftool prog loadall $BPF_FILE /sys/fs/bpf/flow \ - type flow_dissector - - if ! unshare --net $bpftool prog attach pinned \ - /sys/fs/bpf/flow/_dissect flow_dissector; then - echo "Unexpected unsuccessful attach in namespace" >&2 - err=1 - fi - - $bpftool prog attach pinned /sys/fs/bpf/flow/_dissect \ - flow_dissector - - if unshare --net $bpftool prog attach pinned \ - /sys/fs/bpf/flow/_dissect flow_dissector; then - echo "Unexpected successful attach in namespace" >&2 - err=1 - fi - - if ! $bpftool prog detach pinned \ - /sys/fs/bpf/flow/_dissect flow_dissector; then - echo "Failed to detach flow dissector" >&2 - err=1 - fi - - rm -rf /sys/fs/bpf/flow - else - echo "Skipping root flow dissector test, bpftool not found" >&2 - fi - - # Run the rest of the tests in a net namespace. - ../net/in_netns.sh "$0" "$@" - err=$(( $err + $? )) - - if (( $err == 0 )); then - echo "selftests: $TESTNAME [PASS]"; - else - echo "selftests: $TESTNAME [FAILED]"; - fi - - exit $err -fi - -# Determine selftest success via shell exit code -exit_handler() -{ - set +e - - # Cleanup - tc filter del dev lo ingress pref 1337 2> /dev/null - tc qdisc del dev lo ingress 2> /dev/null - ./flow_dissector_load -d 2> /dev/null - if [ $unmount -ne 0 ]; then - umount bpffs 2> /dev/null - fi -} - -# Exit script immediately (well catched by trap handler) if any -# program/thing exits with a non-zero status. -set -e - -# (Use 'trap -l' to list meaning of numbers) -trap exit_handler 0 2 3 6 9 - -# Mount BPF file system -if /bin/mount | grep /sys/fs/bpf > /dev/null; then - echo "bpffs already mounted" -else - echo "bpffs not mounted. Mounting..." - unmount=1 - /bin/mount bpffs /sys/fs/bpf -t bpf -fi - -# Attach BPF program -./flow_dissector_load -p $BPF_FILE -s _dissect - -# Setup -tc qdisc add dev lo ingress -echo 0 > /proc/sys/net/ipv4/conf/default/rp_filter -echo 0 > /proc/sys/net/ipv4/conf/all/rp_filter -echo 0 > /proc/sys/net/ipv4/conf/lo/rp_filter - -echo "Testing IPv4..." -# Drops all IP/UDP packets coming from port 9 -tc filter add dev lo parent ffff: protocol ip pref 1337 flower ip_proto \ - udp src_port 9 action drop - -# Send 10 IPv4/UDP packets from port 8. Filter should not drop any. -./test_flow_dissector -i 4 -f 8 -# Send 10 IPv4/UDP packets from port 9. Filter should drop all. -./test_flow_dissector -i 4 -f 9 -F -# Send 10 IPv4/UDP packets from port 10. Filter should not drop any. -./test_flow_dissector -i 4 -f 10 - -echo "Testing IPv4 from 127.0.0.127 (fallback to generic dissector)..." -# Send 10 IPv4/UDP packets from port 8. Filter should not drop any. -./test_flow_dissector -i 4 -S 127.0.0.127 -f 8 -# Send 10 IPv4/UDP packets from port 9. Filter should drop all. -./test_flow_dissector -i 4 -S 127.0.0.127 -f 9 -F -# Send 10 IPv4/UDP packets from port 10. Filter should not drop any. -./test_flow_dissector -i 4 -S 127.0.0.127 -f 10 - -echo "Testing IPIP..." -# Send 10 IPv4/IPv4/UDP packets from port 8. Filter should not drop any. -./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e bare -i 4 \ - -D 192.168.0.1 -S 1.1.1.1 -f 8 -# Send 10 IPv4/IPv4/UDP packets from port 9. Filter should drop all. -./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e bare -i 4 \ - -D 192.168.0.1 -S 1.1.1.1 -f 9 -F -# Send 10 IPv4/IPv4/UDP packets from port 10. Filter should not drop any. -./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e bare -i 4 \ - -D 192.168.0.1 -S 1.1.1.1 -f 10 - -echo "Testing IPv4 + GRE..." -# Send 10 IPv4/GRE/IPv4/UDP packets from port 8. Filter should not drop any. -./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e gre -i 4 \ - -D 192.168.0.1 -S 1.1.1.1 -f 8 -# Send 10 IPv4/GRE/IPv4/UDP packets from port 9. Filter should drop all. -./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e gre -i 4 \ - -D 192.168.0.1 -S 1.1.1.1 -f 9 -F -# Send 10 IPv4/GRE/IPv4/UDP packets from port 10. Filter should not drop any. -./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e gre -i 4 \ - -D 192.168.0.1 -S 1.1.1.1 -f 10 - -tc filter del dev lo ingress pref 1337 - -echo "Testing port range..." -# Drops all IP/UDP packets coming from port 8-10 -tc filter add dev lo parent ffff: protocol ip pref 1337 flower ip_proto \ - udp src_port 8-10 action drop - -# Send 10 IPv4/UDP packets from port 7. Filter should not drop any. -./test_flow_dissector -i 4 -f 7 -# Send 10 IPv4/UDP packets from port 9. Filter should drop all. -./test_flow_dissector -i 4 -f 9 -F -# Send 10 IPv4/UDP packets from port 11. Filter should not drop any. -./test_flow_dissector -i 4 -f 11 - -tc filter del dev lo ingress pref 1337 - -echo "Testing IPv6..." -# Drops all IPv6/UDP packets coming from port 9 -tc filter add dev lo parent ffff: protocol ipv6 pref 1337 flower ip_proto \ - udp src_port 9 action drop - -# Send 10 IPv6/UDP packets from port 8. Filter should not drop any. -./test_flow_dissector -i 6 -f 8 -# Send 10 IPv6/UDP packets from port 9. Filter should drop all. -./test_flow_dissector -i 6 -f 9 -F -# Send 10 IPv6/UDP packets from port 10. Filter should not drop any. -./test_flow_dissector -i 6 -f 10 - -exit 0 -- cgit v1.2.3 From 98ebe5ef6f5c4517ba92fb3e56f95827ebea83fd Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 21 Nov 2024 14:45:58 -0800 Subject: libbpf: don't adjust USDT semaphore address if .stapsdt.base addr is missing USDT ELF note optionally can record an offset of .stapsdt.base, which is used to make adjustments to USDT target attach address. Currently, libbpf will do this address adjustment unconditionally if it finds .stapsdt.base ELF section in target binary. But there is a corner case where .stapsdt.base ELF section is present, but specific USDT note doesn't reference it. In such case, libbpf will basically just add base address and end up with absolutely incorrect USDT target address. This adjustment has to be done only if both .stapsdt.sema section is present and USDT note is recording a reference to it. Fixes: 74cc6311cec9 ("libbpf: Add USDT notes parsing and resolution logic") Signed-off-by: Andrii Nakryiko Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20241121224558.796110-1-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/usdt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c index 5f085736c6c4..4e4a52742b01 100644 --- a/tools/lib/bpf/usdt.c +++ b/tools/lib/bpf/usdt.c @@ -661,7 +661,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char * * [0] https://sourceware.org/systemtap/wiki/UserSpaceProbeImplementation */ usdt_abs_ip = note.loc_addr; - if (base_addr) + if (base_addr && note.base_addr) usdt_abs_ip += base_addr - note.base_addr; /* When attaching uprobes (which is what USDTs basically are) -- cgit v1.2.3 From 9aef3aaa7059c4dd0cc875107e05bb3198a7fc33 Mon Sep 17 00:00:00 2001 From: Mahe Tardy Date: Mon, 25 Nov 2024 15:26:03 +0000 Subject: selftests/bpf: add cgroup skb direct packet access test This verifies that programs of BPF_PROG_TYPE_CGROUP_SKB can access skb->data_end with direct packet access when being run with BPF_PROG_TEST_RUN. Signed-off-by: Mahe Tardy Link: https://lore.kernel.org/r/20241125152603.375898-2-mahe.tardy@gmail.com Signed-off-by: Alexei Starovoitov --- .../prog_tests/cgroup_skb_direct_packet_access.c | 28 ++++++++++++++++++++++ .../bpf/progs/cgroup_skb_direct_packet_access.c | 15 ++++++++++++ 2 files changed, 43 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/cgroup_skb_direct_packet_access.c create mode 100644 tools/testing/selftests/bpf/progs/cgroup_skb_direct_packet_access.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_skb_direct_packet_access.c b/tools/testing/selftests/bpf/prog_tests/cgroup_skb_direct_packet_access.c new file mode 100644 index 000000000000..e1a90c10db8c --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_skb_direct_packet_access.c @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include "cgroup_skb_direct_packet_access.skel.h" + +void test_cgroup_skb_prog_run_direct_packet_access(void) +{ + int err; + struct cgroup_skb_direct_packet_access *skel; + char test_skb[64] = {}; + + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = test_skb, + .data_size_in = sizeof(test_skb), + ); + + skel = cgroup_skb_direct_packet_access__open_and_load(); + if (!ASSERT_OK_PTR(skel, "cgroup_skb_direct_packet_access__open_and_load")) + return; + + err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.direct_packet_access), &topts); + ASSERT_OK(err, "bpf_prog_test_run_opts err"); + ASSERT_EQ(topts.retval, 1, "retval"); + + ASSERT_NEQ(skel->bss->data_end, 0, "data_end"); + + cgroup_skb_direct_packet_access__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/cgroup_skb_direct_packet_access.c b/tools/testing/selftests/bpf/progs/cgroup_skb_direct_packet_access.c new file mode 100644 index 000000000000..e32b07d802bb --- /dev/null +++ b/tools/testing/selftests/bpf/progs/cgroup_skb_direct_packet_access.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" +#include + +__u32 data_end; + +SEC("cgroup_skb/ingress") +int direct_packet_access(struct __sk_buff *skb) +{ + data_end = skb->data_end; + return 1; +} + +char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From 9a17db586d722c0875f7a0580c438dc80afee1e7 Mon Sep 17 00:00:00 2001 From: Ben Olson Date: Tue, 26 Nov 2024 14:08:45 -0600 Subject: libbpf: Improve debug message when the base BTF cannot be found When running `bpftool` on a kernel module installed in `/lib/modules...`, this error is encountered if the user does not specify `--base-btf` to point to a valid base BTF (e.g. usually in `/sys/kernel/btf/vmlinux`). However, looking at the debug output to determine the cause of the error simply says `Invalid BTF string section`, which does not point to the actual source of the error. This just improves that debug message to tell users what happened. Signed-off-by: Ben Olson Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/Z0YqzQ5lNz7obQG7@bolson-desk Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/btf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 12468ae0d573..a4ae2df68b91 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -283,7 +283,7 @@ static int btf_parse_str_sec(struct btf *btf) return -EINVAL; } if (!btf->base_btf && start[0]) { - pr_debug("Invalid BTF string section\n"); + pr_debug("Malformed BTF string section, did you forget to provide base BTF?\n"); return -EINVAL; } return 0; -- cgit v1.2.3 From c721d8f8b196285a59ed5c940e856bce9890523f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexis=20Lothor=C3=A9=20=28eBPF=20Foundation=29?= Date: Thu, 28 Nov 2024 15:38:43 +0100 Subject: selftests/bpf: ensure proper root namespace cleanup when test fail MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit serial_test_flow_dissector_namespace manipulates both the root net namespace and a dedicated non-root net namespace. If for some reason a program attach on root namespace succeeds while it was expected to fail, the unexpected program will remain attached to the root namespace, possibly affecting other runs or even other tests in the same run. Fix undesired test failure side effect by explicitly detaching programs on failing tests expecting attach to fail. As a side effect of this change, do not test errno value if the tested operation do not fail. Fixes: 284ed00a59dd ("selftests/bpf: migrate flow_dissector namespace exclusivity test") Signed-off-by: Alexis Lothoré (eBPF Foundation) Acked-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20241128-small_flow_test_fix-v1-1-c12d45c98c59@bootlin.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/flow_dissector.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c index 8e6e483fead3..08bae13248c4 100644 --- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c @@ -525,11 +525,14 @@ void serial_test_flow_dissector_namespace(void) ns = open_netns(TEST_NS); if (!ASSERT_OK_PTR(ns, "enter non-root net namespace")) goto out_clean_ns; - err = bpf_prog_attach(prog_fd, 0, BPF_FLOW_DISSECTOR, 0); + if (!ASSERT_ERR(err, + "refuse new flow dissector in non-root net namespace")) + bpf_prog_detach2(prog_fd, 0, BPF_FLOW_DISSECTOR); + else + ASSERT_EQ(errno, EEXIST, + "refused because of already attached prog"); close_netns(ns); - ASSERT_ERR(err, "refuse new flow dissector in non-root net namespace"); - ASSERT_EQ(errno, EEXIST, "refused because of already attached prog"); /* If no flow dissector is attached to the root namespace, we must * be able to attach one to a non-root net namespace @@ -545,8 +548,11 @@ void serial_test_flow_dissector_namespace(void) * a flow dissector to root namespace must fail */ err = bpf_prog_attach(prog_fd, 0, BPF_FLOW_DISSECTOR, 0); - ASSERT_ERR(err, "refuse new flow dissector on root namespace"); - ASSERT_EQ(errno, EEXIST, "refused because of already attached prog"); + if (!ASSERT_ERR(err, "refuse new flow dissector on root namespace")) + bpf_prog_detach2(prog_fd, 0, BPF_FLOW_DISSECTOR); + else + ASSERT_EQ(errno, EEXIST, + "refused because of already attached prog"); ns = open_netns(TEST_NS); bpf_prog_detach2(prog_fd, 0, BPF_FLOW_DISSECTOR); -- cgit v1.2.3 From 793baff3f24f16dab9061045e23eea67724feae6 Mon Sep 17 00:00:00 2001 From: Honglei Wang Date: Fri, 29 Nov 2024 17:10:03 +0800 Subject: sched_ext: Add __weak to fix the build errors commit 5cbb302880f5 ("sched_ext: Rename scx_bpf_dispatch[_vtime]_from_dsq*() -> scx_bpf_dsq_move[_vtime]*()") introduced several new functions which caused compilation errors when compiled with clang. Let's fix this by adding __weak markers. Signed-off-by: Honglei Wang Signed-off-by: Tejun Heo Fixes: 5cbb302880f5 ("sched_ext: Rename scx_bpf_dispatch[_vtime]_from_dsq*() -> scx_bpf_dsq_move[_vtime]*()") Acked-by: Andrii Nakryiko --- tools/sched_ext/include/scx/common.bpf.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/sched_ext/include/scx/common.bpf.h b/tools/sched_ext/include/scx/common.bpf.h index 2f36b7b6418d..625f5b046776 100644 --- a/tools/sched_ext/include/scx/common.bpf.h +++ b/tools/sched_ext/include/scx/common.bpf.h @@ -40,9 +40,9 @@ void scx_bpf_dsq_insert(struct task_struct *p, u64 dsq_id, u64 slice, u64 enq_fl void scx_bpf_dsq_insert_vtime(struct task_struct *p, u64 dsq_id, u64 slice, u64 vtime, u64 enq_flags) __ksym __weak; u32 scx_bpf_dispatch_nr_slots(void) __ksym; void scx_bpf_dispatch_cancel(void) __ksym; -bool scx_bpf_dsq_move_to_local(u64 dsq_id) __ksym; -void scx_bpf_dsq_move_set_slice(struct bpf_iter_scx_dsq *it__iter, u64 slice) __ksym; -void scx_bpf_dsq_move_set_vtime(struct bpf_iter_scx_dsq *it__iter, u64 vtime) __ksym; +bool scx_bpf_dsq_move_to_local(u64 dsq_id) __ksym __weak; +void scx_bpf_dsq_move_set_slice(struct bpf_iter_scx_dsq *it__iter, u64 slice) __ksym __weak; +void scx_bpf_dsq_move_set_vtime(struct bpf_iter_scx_dsq *it__iter, u64 vtime) __ksym __weak; bool scx_bpf_dsq_move(struct bpf_iter_scx_dsq *it__iter, struct task_struct *p, u64 dsq_id, u64 enq_flags) __ksym __weak; bool scx_bpf_dsq_move_vtime(struct bpf_iter_scx_dsq *it__iter, struct task_struct *p, u64 dsq_id, u64 enq_flags) __ksym __weak; u32 scx_bpf_reenqueue_local(void) __ksym; -- cgit v1.2.3 From e8a99af68c068865dbac7f3330e97bf8e96edf33 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Tue, 3 Dec 2024 15:44:17 +0800 Subject: tools/power turbostat: Add initial support for PantherLake Add initial support for PantherLake. It shares the same features with Lunarlake. Signed-off-by: Zhang Rui Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 58a487c225a7..540336138ce9 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -1024,6 +1024,7 @@ static const struct platform_data turbostat_pdata[] = { { INTEL_ARROWLAKE_U, &adl_features }, { INTEL_ARROWLAKE, &adl_features }, { INTEL_LUNARLAKE_M, &lnl_features }, + { INTEL_PANTHERLAKE_L, &lnl_features }, { INTEL_ATOM_SILVERMONT, &slv_features }, { INTEL_ATOM_SILVERMONT_D, &slvd_features }, { INTEL_ATOM_AIRMONT, &amt_features }, -- cgit v1.2.3 From 6b47ed23e2f1bc2c177da47437970e6208ac9ea0 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Tue, 3 Dec 2024 15:44:18 +0800 Subject: tools/power turbostat: Add initial support for ClearwaterForest Add initial support for ClearwaterForest. It shares the same features with SierraForest. Signed-off-by: Zhang Rui Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 540336138ce9..e203f109dd2e 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -1037,6 +1037,7 @@ static const struct platform_data turbostat_pdata[] = { { INTEL_ATOM_GRACEMONT, &adl_features }, { INTEL_ATOM_CRESTMONT_X, &srf_features }, { INTEL_ATOM_CRESTMONT, &grr_features }, + { INTEL_ATOM_DARKMONT_X, &srf_features }, { INTEL_XEON_PHI_KNL, &knl_features }, { INTEL_XEON_PHI_KNM, &knl_features }, /* -- cgit v1.2.3 From 9e47f8adb053b69e2e8310551e6fd5156704cef4 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Tue, 3 Dec 2024 12:23:22 -0500 Subject: tools/power turbostat: update turbostat(8) Clarify how to get the latest version. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.8 | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index a7f7ed01421c..59b89e6b25bf 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -516,14 +516,40 @@ that they count at TSC rate, which is true on all processors tested to date. Volume 3B: System Programming Guide" https://www.intel.com/products/processor/manuals/ +.SH RUN THE LATEST VERSION +If turbostat complains that it doesn't recognize your processor, +please try the latest version. + +The latest version of turbostat does not require the latest version of the Linux kernel. +However, some features, such as perf(1) counters, do require kernel support. + +The latest turbostat release is available in the upstream Linux Kernel source tree. +eg. "git pull https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git" +and run make in tools/power/x86/turbostat/. + +n.b. "make install" will update your system manually, but a distro update may subsequently downgrade your turbostat to an older version. +For this reason, manually installing to /usr/local/bin may be what you want. + +Note that turbostat/Makefile has a "make snapshot" target, which will create a tar file +that can build without a local kernel source tree. + +If the upstream version isn't new enough, the development tree can be found here: +"git pull https://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux.git turbostat" + +If the development tree doesn't work, please contact the author via chat, +or via email with the word "turbostat" on the Subject line. + .SH FILES .ta .nf +/sys/bus/event_source/devices/ /dev/cpu/*/msr +/sys/class/intel_pmt/ +/sys/devices/system/cpu/ .fi .SH "SEE ALSO" -msr(4), vmstat(8) +perf(1), msr(4), vmstat(8) .PP .SH AUTHOR .nf -- cgit v1.2.3 From 4133be39e216130a86382fb5cfbaf6851a6f7a45 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Tue, 3 Dec 2024 15:51:16 +0800 Subject: tools/power turbostat: Exit on unsupported Intel models Turbostat requires per-platform enabling for Intel CPU models due to platform-specific features. When running on unsupported Intel CPU models, turbostat currently operates with limited default features, which can lead to users unknowingly using an outdated version of the tool. Enhance turbostat to exit by default when run on unsupported Intel CPU models, with a clear message to users, informing them that their CPU model is not supported and advising them to update to the latest version of turbostat for full functionality. [lenb: updated error message wording] Signed-off-by: Zhang Rui Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index e203f109dd2e..5e894b71003c 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -1079,6 +1079,10 @@ void probe_platform_features(unsigned int family, unsigned int model) return; } } + + fprintf(stderr, "Unsupported platform detected.\n" + "\tSee RUN THE LATEST VERSION on turbostat(8)\n"); + exit(1); } /* Model specific support End */ -- cgit v1.2.3 From 48c62ba1b407140229e92f5cfae6ae113fc4af8e Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Tue, 3 Dec 2024 15:51:17 +0800 Subject: tools/power turbostat: Exit on unsupported Vendors Turbostat currently supports x86 processors from Intel, AMD, and Hygon. The behavior of turbostat on CPUs from other vendors has not been evaluated and may lead to incorrect or undefined behavior. Enhance turbostat to exit by default when running on an unsupported CPU vendor. This ensures that users are aware that their CPU is not currently supported by turbostat, guiding them to seek support for their specific hardware through future patches. Signed-off-by: Zhang Rui Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 5e894b71003c..cb659b274554 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -1056,9 +1056,9 @@ void probe_platform_features(unsigned int family, unsigned int model) { int i; - platform = &default_features; if (authentic_amd || hygon_genuine) { + platform = &default_features; if (max_extended_level >= 0x80000007) { unsigned int eax, ebx, ecx, edx; @@ -1071,7 +1071,7 @@ void probe_platform_features(unsigned int family, unsigned int model) } if (!genuine_intel) - return; + goto end; for (i = 0; turbostat_pdata[i].features; i++) { if (VFM_FAMILY(turbostat_pdata[i].vfm) == family && VFM_MODEL(turbostat_pdata[i].vfm) == model) { @@ -1080,6 +1080,10 @@ void probe_platform_features(unsigned int family, unsigned int model) } } +end: + if (platform) + return; + fprintf(stderr, "Unsupported platform detected.\n" "\tSee RUN THE LATEST VERSION on turbostat(8)\n"); exit(1); -- cgit v1.2.3 From cc63f89ef9db70f74c563317d36028bb5e6196a1 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Tue, 3 Dec 2024 15:51:18 +0800 Subject: tools/power turbostat: Improve --help output Improve the `--help` output of turbostat by standardizing the format and enhancing readability. The following changes are made to ensure consistency and clarity in the help message: 1. Use a consistent pattern for each parameter's help message: - Display the parameter and its input (if any) on the same line, separated by a space. - Provide the detailed description on a separate line. 2. Ensure that the first character of each description is in lower-case. These changes make the help output more uniform and easier to read, helping users quickly understand the available options and their usage. No functional change. Signed-off-by: Zhang Rui Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 41 ++++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 15 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index cb659b274554..5165450a8187 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -2145,41 +2145,52 @@ void help(void) "when COMMAND completes.\n" "If no COMMAND is specified, turbostat wakes every 5-seconds\n" "to print statistics, until interrupted.\n" - " -a, --add add a counter\n" + " -a, --add counter\n" + " add a counter\n" " eg. --add msr0x10,u64,cpu,delta,MY_TSC\n" " eg. --add perf/cstate_pkg/c2-residency,package,delta,percent,perfPC2\n" " eg. --add pmt,name=XTAL,type=raw,domain=package0,offset=0,lsb=0,msb=63,guid=0x1a067102\n" - " -c, --cpu cpu-set limit output to summary plus cpu-set:\n" + " -c, --cpu cpu-set\n" + " limit output to summary plus cpu-set:\n" " {core | package | j,k,l..m,n-p }\n" - " -d, --debug displays usec, Time_Of_Day_Seconds and more debugging\n" + " -d, --debug\n" + " displays usec, Time_Of_Day_Seconds and more debugging\n" " debug messages are printed to stderr\n" - " -D, --Dump displays the raw counter values\n" - " -e, --enable [all | column]\n" + " -D, --Dump\n" + " displays the raw counter values\n" + " -e, --enable [all | column]\n" " shows all or the specified disabled column\n" - " -H, --hide [column|column,column,...]\n" + " -H, --hide [column | column,column,...]\n" " hide the specified column(s)\n" " -i, --interval sec.subsec\n" - " Override default 5-second measurement interval\n" - " -J, --Joules displays energy in Joules instead of Watts\n" - " -l, --list list column headers only\n" - " -M, --no-msr Disable all uses of the MSR driver\n" - " -P, --no-perf Disable all uses of the perf API\n" + " override default 5-second measurement interval\n" + " -J, --Joules\n" + " displays energy in Joules instead of Watts\n" + " -l, --list\n" + " list column headers only\n" + " -M, --no-msr\n" + " disable all uses of the MSR driver\n" + " -P, --no-perf\n" + " disable all uses of the perf API\n" " -n, --num_iterations num\n" " number of the measurement iterations\n" " -N, --header_iterations num\n" " print header every num iterations\n" " -o, --out file\n" " create or truncate \"file\" for all output\n" - " -q, --quiet skip decoding system configuration header\n" - " -s, --show [column|column,column,...]\n" + " -q, --quiet\n" + " skip decoding system configuration header\n" + " -s, --show [column | column,column,...]\n" " show only the specified column(s)\n" " -S, --Summary\n" " limits output to 1-line system summary per interval\n" " -T, --TCC temperature\n" " sets the Thermal Control Circuit temperature in\n" " degrees Celsius\n" - " -h, --help print this help message\n" - " -v, --version print version information\n" "\n" "For more help, run \"man turbostat\"\n"); + " -h, --help\n" + " print this help message\n" + " -v, --version\n" + " print version information\n" "\n" "For more help, run \"man turbostat\"\n"); } /* -- cgit v1.2.3 From 3d94026af328d3d355d15c1d7fe73278f77c6a42 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Tue, 3 Dec 2024 15:51:19 +0800 Subject: tools/power turbostat: Introduce --force parameter Turbostat currently exits under the following conditions: 1. When running on non-Intel/AMD/Hygon x86 vendors. 2. When running on Intel models that lack specific platform features. Introduce a new `--force` parameter that allows turbostat to run on these unsupported platforms with minimal default feature support. This provides users with the flexibility to gather basic information even on unsupported systems. [lenb: updated warning message text] Signed-off-by: Zhang Rui Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 5165450a8187..7accc4a73366 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -326,6 +326,7 @@ unsigned int rapl_joules; unsigned int summary_only; unsigned int list_header_only; unsigned int dump_only; +unsigned int force_load; unsigned int has_aperf; unsigned int has_aperf_access; unsigned int has_epb; @@ -1058,7 +1059,8 @@ void probe_platform_features(unsigned int family, unsigned int model) if (authentic_amd || hygon_genuine) { - platform = &default_features; + /* fallback to default features on unsupported models */ + force_load++; if (max_extended_level >= 0x80000007) { unsigned int eax, ebx, ecx, edx; @@ -1067,7 +1069,7 @@ void probe_platform_features(unsigned int family, unsigned int model) if ((edx & (1 << 14)) && family >= 0x17) platform = &amd_features_with_rapl; } - return; + goto end; } if (!genuine_intel) @@ -1081,6 +1083,11 @@ void probe_platform_features(unsigned int family, unsigned int model) } end: + if (force_load && !platform) { + fprintf(outf, "Forced to run on unsupported platform!\n"); + platform = &default_features; + } + if (platform) return; @@ -2160,6 +2167,8 @@ void help(void) " displays the raw counter values\n" " -e, --enable [all | column]\n" " shows all or the specified disabled column\n" + " -f, --force\n" + " force load turbostat with minimum default features on unsupported platforms.\n" " -H, --hide [column | column,column,...]\n" " hide the specified column(s)\n" " -i, --interval sec.subsec\n" @@ -9942,6 +9951,7 @@ void cmdline(int argc, char **argv) { "Dump", no_argument, 0, 'D' }, { "debug", no_argument, 0, 'd' }, /* internal, not documented */ { "enable", required_argument, 0, 'e' }, + { "force", no_argument, 0, 'f' }, { "interval", required_argument, 0, 'i' }, { "IPC", no_argument, 0, 'I' }, { "num_iterations", required_argument, 0, 'n' }, @@ -10002,6 +10012,9 @@ void cmdline(int argc, char **argv) /* --enable specified counter */ bic_enabled = bic_enabled | bic_lookup(optarg, SHOW_LIST); break; + case 'f': + force_load++; + break; case 'd': debug++; ENABLE_BIC(BIC_DISABLED_BY_DEFAULT); -- cgit v1.2.3 From cbd8730aea8d79cda6b0f3c18b406dfdef0c1b80 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Tue, 3 Dec 2024 19:03:58 -0800 Subject: bpf: Improve verifier log for resource leak on exit The verifier log when leaking resources on BPF_EXIT may be a bit confusing, as it's a problem only when finally existing from the main prog, not from any of the subprogs. Hence, update the verifier error string and the corresponding selftests matching on it. Acked-by: Eduard Zingerman Suggested-by: Eduard Zingerman Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20241204030400.208005-6-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/exceptions_fail.c | 4 ++-- tools/testing/selftests/bpf/progs/preempt_lock.c | 14 +++++++------- tools/testing/selftests/bpf/progs/verifier_spin_lock.c | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/exceptions_fail.c b/tools/testing/selftests/bpf/progs/exceptions_fail.c index fe0f3fa5aab6..8a0fdff89927 100644 --- a/tools/testing/selftests/bpf/progs/exceptions_fail.c +++ b/tools/testing/selftests/bpf/progs/exceptions_fail.c @@ -131,7 +131,7 @@ int reject_subprog_with_lock(void *ctx) } SEC("?tc") -__failure __msg("BPF_EXIT instruction cannot be used inside bpf_rcu_read_lock-ed region") +__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_rcu_read_lock-ed region") int reject_with_rcu_read_lock(void *ctx) { bpf_rcu_read_lock(); @@ -147,7 +147,7 @@ __noinline static int throwing_subprog(struct __sk_buff *ctx) } SEC("?tc") -__failure __msg("BPF_EXIT instruction cannot be used inside bpf_rcu_read_lock-ed region") +__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_rcu_read_lock-ed region") int reject_subprog_with_rcu_read_lock(void *ctx) { bpf_rcu_read_lock(); diff --git a/tools/testing/selftests/bpf/progs/preempt_lock.c b/tools/testing/selftests/bpf/progs/preempt_lock.c index 885377e83607..5269571cf7b5 100644 --- a/tools/testing/selftests/bpf/progs/preempt_lock.c +++ b/tools/testing/selftests/bpf/progs/preempt_lock.c @@ -6,7 +6,7 @@ #include "bpf_experimental.h" SEC("?tc") -__failure __msg("BPF_EXIT instruction cannot be used inside bpf_preempt_disable-ed region") +__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_preempt_disable-ed region") int preempt_lock_missing_1(struct __sk_buff *ctx) { bpf_preempt_disable(); @@ -14,7 +14,7 @@ int preempt_lock_missing_1(struct __sk_buff *ctx) } SEC("?tc") -__failure __msg("BPF_EXIT instruction cannot be used inside bpf_preempt_disable-ed region") +__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_preempt_disable-ed region") int preempt_lock_missing_2(struct __sk_buff *ctx) { bpf_preempt_disable(); @@ -23,7 +23,7 @@ int preempt_lock_missing_2(struct __sk_buff *ctx) } SEC("?tc") -__failure __msg("BPF_EXIT instruction cannot be used inside bpf_preempt_disable-ed region") +__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_preempt_disable-ed region") int preempt_lock_missing_3(struct __sk_buff *ctx) { bpf_preempt_disable(); @@ -33,7 +33,7 @@ int preempt_lock_missing_3(struct __sk_buff *ctx) } SEC("?tc") -__failure __msg("BPF_EXIT instruction cannot be used inside bpf_preempt_disable-ed region") +__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_preempt_disable-ed region") int preempt_lock_missing_3_minus_2(struct __sk_buff *ctx) { bpf_preempt_disable(); @@ -55,7 +55,7 @@ static __noinline void preempt_enable(void) } SEC("?tc") -__failure __msg("BPF_EXIT instruction cannot be used inside bpf_preempt_disable-ed region") +__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_preempt_disable-ed region") int preempt_lock_missing_1_subprog(struct __sk_buff *ctx) { preempt_disable(); @@ -63,7 +63,7 @@ int preempt_lock_missing_1_subprog(struct __sk_buff *ctx) } SEC("?tc") -__failure __msg("BPF_EXIT instruction cannot be used inside bpf_preempt_disable-ed region") +__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_preempt_disable-ed region") int preempt_lock_missing_2_subprog(struct __sk_buff *ctx) { preempt_disable(); @@ -72,7 +72,7 @@ int preempt_lock_missing_2_subprog(struct __sk_buff *ctx) } SEC("?tc") -__failure __msg("BPF_EXIT instruction cannot be used inside bpf_preempt_disable-ed region") +__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_preempt_disable-ed region") int preempt_lock_missing_2_minus_1_subprog(struct __sk_buff *ctx) { preempt_disable(); diff --git a/tools/testing/selftests/bpf/progs/verifier_spin_lock.c b/tools/testing/selftests/bpf/progs/verifier_spin_lock.c index 3f679de73229..25599eac9a70 100644 --- a/tools/testing/selftests/bpf/progs/verifier_spin_lock.c +++ b/tools/testing/selftests/bpf/progs/verifier_spin_lock.c @@ -187,7 +187,7 @@ l0_%=: r6 = r0; \ SEC("cgroup/skb") __description("spin_lock: test6 missing unlock") -__failure __msg("BPF_EXIT instruction cannot be used inside bpf_spin_lock-ed region") +__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_spin_lock-ed region") __failure_unpriv __msg_unpriv("") __naked void spin_lock_test6_missing_unlock(void) { -- cgit v1.2.3 From e8c6c80b76e53632992ec345a02c05942aa8f3f2 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Tue, 3 Dec 2024 19:03:59 -0800 Subject: selftests/bpf: Expand coverage of preempt tests to sleepable kfunc For preemption-related kfuncs, we don't test their interaction with sleepable kfuncs (we do test helpers) even though the verifier has code to protect against such a pattern. Expand coverage of the selftest to include this case. Acked-by: Eduard Zingerman Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20241204030400.208005-7-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/preempt_lock.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/preempt_lock.c b/tools/testing/selftests/bpf/progs/preempt_lock.c index 5269571cf7b5..6c5797bf0ead 100644 --- a/tools/testing/selftests/bpf/progs/preempt_lock.c +++ b/tools/testing/selftests/bpf/progs/preempt_lock.c @@ -5,6 +5,8 @@ #include "bpf_misc.h" #include "bpf_experimental.h" +extern int bpf_copy_from_user_str(void *dst, u32 dst__sz, const void *unsafe_ptr__ign, u64 flags) __weak __ksym; + SEC("?tc") __failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_preempt_disable-ed region") int preempt_lock_missing_1(struct __sk_buff *ctx) @@ -113,6 +115,18 @@ int preempt_sleepable_helper(void *ctx) return 0; } +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +__failure __msg("kernel func bpf_copy_from_user_str is sleepable within non-preemptible region") +int preempt_sleepable_kfunc(void *ctx) +{ + u32 data; + + bpf_preempt_disable(); + bpf_copy_from_user_str(&data, sizeof(data), NULL, 0); + bpf_preempt_enable(); + return 0; +} + int __noinline preempt_global_subprog(void) { preempt_balance_subprog(); -- cgit v1.2.3 From 4fec4c22f046a64741a1ae417de718504fd2cda2 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Tue, 3 Dec 2024 19:04:00 -0800 Subject: selftests/bpf: Add IRQ save/restore tests Include tests that check for rejection in erroneous cases, like unbalanced IRQ-disabled counts, within and across subprogs, invalid IRQ flag state or input to kfuncs, behavior upon overwriting IRQ saved state on stack, interaction with sleepable kfuncs/helpers, global functions, and out of order restore. Include some success scenarios as well to demonstrate usage. #128/1 irq/irq_save_bad_arg:OK #128/2 irq/irq_restore_bad_arg:OK #128/3 irq/irq_restore_missing_2:OK #128/4 irq/irq_restore_missing_3:OK #128/5 irq/irq_restore_missing_3_minus_2:OK #128/6 irq/irq_restore_missing_1_subprog:OK #128/7 irq/irq_restore_missing_2_subprog:OK #128/8 irq/irq_restore_missing_3_subprog:OK #128/9 irq/irq_restore_missing_3_minus_2_subprog:OK #128/10 irq/irq_balance:OK #128/11 irq/irq_balance_n:OK #128/12 irq/irq_balance_subprog:OK #128/13 irq/irq_global_subprog:OK #128/14 irq/irq_restore_ooo:OK #128/15 irq/irq_restore_ooo_3:OK #128/16 irq/irq_restore_3_subprog:OK #128/17 irq/irq_restore_4_subprog:OK #128/18 irq/irq_restore_ooo_3_subprog:OK #128/19 irq/irq_restore_invalid:OK #128/20 irq/irq_save_invalid:OK #128/21 irq/irq_restore_iter:OK #128/22 irq/irq_save_iter:OK #128/23 irq/irq_flag_overwrite:OK #128/24 irq/irq_flag_overwrite_partial:OK #128/25 irq/irq_ooo_refs_array:OK #128/26 irq/irq_sleepable_helper:OK #128/27 irq/irq_sleepable_kfunc:OK #128 irq:OK Summary: 1/27 PASSED, 0 SKIPPED, 0 FAILED Acked-by: Eduard Zingerman Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20241204030400.208005-8-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/verifier.c | 2 + tools/testing/selftests/bpf/progs/irq.c | 444 ++++++++++++++++++++++ 2 files changed, 446 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/irq.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index d9f65adb456b..b1b4d69c407a 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -98,6 +98,7 @@ #include "verifier_xdp_direct_packet_access.skel.h" #include "verifier_bits_iter.skel.h" #include "verifier_lsm.skel.h" +#include "irq.skel.h" #define MAX_ENTRIES 11 @@ -225,6 +226,7 @@ void test_verifier_xdp(void) { RUN(verifier_xdp); } void test_verifier_xdp_direct_packet_access(void) { RUN(verifier_xdp_direct_packet_access); } void test_verifier_bits_iter(void) { RUN(verifier_bits_iter); } void test_verifier_lsm(void) { RUN(verifier_lsm); } +void test_irq(void) { RUN(irq); } void test_verifier_mtu(void) { diff --git a/tools/testing/selftests/bpf/progs/irq.c b/tools/testing/selftests/bpf/progs/irq.c new file mode 100644 index 000000000000..b0b53d980964 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/irq.c @@ -0,0 +1,444 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ +#include +#include +#include "bpf_misc.h" +#include "bpf_experimental.h" + +unsigned long global_flags; + +extern void bpf_local_irq_save(unsigned long *) __weak __ksym; +extern void bpf_local_irq_restore(unsigned long *) __weak __ksym; +extern int bpf_copy_from_user_str(void *dst, u32 dst__sz, const void *unsafe_ptr__ign, u64 flags) __weak __ksym; + +SEC("?tc") +__failure __msg("arg#0 doesn't point to an irq flag on stack") +int irq_save_bad_arg(struct __sk_buff *ctx) +{ + bpf_local_irq_save(&global_flags); + return 0; +} + +SEC("?tc") +__failure __msg("arg#0 doesn't point to an irq flag on stack") +int irq_restore_bad_arg(struct __sk_buff *ctx) +{ + bpf_local_irq_restore(&global_flags); + return 0; +} + +SEC("?tc") +__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_local_irq_save-ed region") +int irq_restore_missing_2(struct __sk_buff *ctx) +{ + unsigned long flags1; + unsigned long flags2; + + bpf_local_irq_save(&flags1); + bpf_local_irq_save(&flags2); + return 0; +} + +SEC("?tc") +__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_local_irq_save-ed region") +int irq_restore_missing_3(struct __sk_buff *ctx) +{ + unsigned long flags1; + unsigned long flags2; + unsigned long flags3; + + bpf_local_irq_save(&flags1); + bpf_local_irq_save(&flags2); + bpf_local_irq_save(&flags3); + return 0; +} + +SEC("?tc") +__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_local_irq_save-ed region") +int irq_restore_missing_3_minus_2(struct __sk_buff *ctx) +{ + unsigned long flags1; + unsigned long flags2; + unsigned long flags3; + + bpf_local_irq_save(&flags1); + bpf_local_irq_save(&flags2); + bpf_local_irq_save(&flags3); + bpf_local_irq_restore(&flags3); + bpf_local_irq_restore(&flags2); + return 0; +} + +static __noinline void local_irq_save(unsigned long *flags) +{ + bpf_local_irq_save(flags); +} + +static __noinline void local_irq_restore(unsigned long *flags) +{ + bpf_local_irq_restore(flags); +} + +SEC("?tc") +__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_local_irq_save-ed region") +int irq_restore_missing_1_subprog(struct __sk_buff *ctx) +{ + unsigned long flags; + + local_irq_save(&flags); + return 0; +} + +SEC("?tc") +__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_local_irq_save-ed region") +int irq_restore_missing_2_subprog(struct __sk_buff *ctx) +{ + unsigned long flags1; + unsigned long flags2; + + local_irq_save(&flags1); + local_irq_save(&flags2); + return 0; +} + +SEC("?tc") +__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_local_irq_save-ed region") +int irq_restore_missing_3_subprog(struct __sk_buff *ctx) +{ + unsigned long flags1; + unsigned long flags2; + unsigned long flags3; + + local_irq_save(&flags1); + local_irq_save(&flags2); + local_irq_save(&flags3); + return 0; +} + +SEC("?tc") +__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_local_irq_save-ed region") +int irq_restore_missing_3_minus_2_subprog(struct __sk_buff *ctx) +{ + unsigned long flags1; + unsigned long flags2; + unsigned long flags3; + + local_irq_save(&flags1); + local_irq_save(&flags2); + local_irq_save(&flags3); + local_irq_restore(&flags3); + local_irq_restore(&flags2); + return 0; +} + +SEC("?tc") +__success +int irq_balance(struct __sk_buff *ctx) +{ + unsigned long flags; + + local_irq_save(&flags); + local_irq_restore(&flags); + return 0; +} + +SEC("?tc") +__success +int irq_balance_n(struct __sk_buff *ctx) +{ + unsigned long flags1; + unsigned long flags2; + unsigned long flags3; + + local_irq_save(&flags1); + local_irq_save(&flags2); + local_irq_save(&flags3); + local_irq_restore(&flags3); + local_irq_restore(&flags2); + local_irq_restore(&flags1); + return 0; +} + +static __noinline void local_irq_balance(void) +{ + unsigned long flags; + + local_irq_save(&flags); + local_irq_restore(&flags); +} + +static __noinline void local_irq_balance_n(void) +{ + unsigned long flags1; + unsigned long flags2; + unsigned long flags3; + + local_irq_save(&flags1); + local_irq_save(&flags2); + local_irq_save(&flags3); + local_irq_restore(&flags3); + local_irq_restore(&flags2); + local_irq_restore(&flags1); +} + +SEC("?tc") +__success +int irq_balance_subprog(struct __sk_buff *ctx) +{ + local_irq_balance(); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +__failure __msg("sleepable helper bpf_copy_from_user#") +int irq_sleepable_helper(void *ctx) +{ + unsigned long flags; + u32 data; + + local_irq_save(&flags); + bpf_copy_from_user(&data, sizeof(data), NULL); + local_irq_restore(&flags); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +__failure __msg("kernel func bpf_copy_from_user_str is sleepable within IRQ-disabled region") +int irq_sleepable_kfunc(void *ctx) +{ + unsigned long flags; + u32 data; + + local_irq_save(&flags); + bpf_copy_from_user_str(&data, sizeof(data), NULL, 0); + local_irq_restore(&flags); + return 0; +} + +int __noinline global_local_irq_balance(void) +{ + local_irq_balance_n(); + return 0; +} + +SEC("?tc") +__failure __msg("global function calls are not allowed with IRQs disabled") +int irq_global_subprog(struct __sk_buff *ctx) +{ + unsigned long flags; + + bpf_local_irq_save(&flags); + global_local_irq_balance(); + bpf_local_irq_restore(&flags); + return 0; +} + +SEC("?tc") +__failure __msg("cannot restore irq state out of order") +int irq_restore_ooo(struct __sk_buff *ctx) +{ + unsigned long flags1; + unsigned long flags2; + + bpf_local_irq_save(&flags1); + bpf_local_irq_save(&flags2); + bpf_local_irq_restore(&flags1); + bpf_local_irq_restore(&flags2); + return 0; +} + +SEC("?tc") +__failure __msg("cannot restore irq state out of order") +int irq_restore_ooo_3(struct __sk_buff *ctx) +{ + unsigned long flags1; + unsigned long flags2; + unsigned long flags3; + + bpf_local_irq_save(&flags1); + bpf_local_irq_save(&flags2); + bpf_local_irq_restore(&flags2); + bpf_local_irq_save(&flags3); + bpf_local_irq_restore(&flags1); + bpf_local_irq_restore(&flags3); + return 0; +} + +static __noinline void local_irq_save_3(unsigned long *flags1, unsigned long *flags2, + unsigned long *flags3) +{ + local_irq_save(flags1); + local_irq_save(flags2); + local_irq_save(flags3); +} + +SEC("?tc") +__success +int irq_restore_3_subprog(struct __sk_buff *ctx) +{ + unsigned long flags1; + unsigned long flags2; + unsigned long flags3; + + local_irq_save_3(&flags1, &flags2, &flags3); + bpf_local_irq_restore(&flags3); + bpf_local_irq_restore(&flags2); + bpf_local_irq_restore(&flags1); + return 0; +} + +SEC("?tc") +__failure __msg("cannot restore irq state out of order") +int irq_restore_4_subprog(struct __sk_buff *ctx) +{ + unsigned long flags1; + unsigned long flags2; + unsigned long flags3; + unsigned long flags4; + + local_irq_save_3(&flags1, &flags2, &flags3); + bpf_local_irq_restore(&flags3); + bpf_local_irq_save(&flags4); + bpf_local_irq_restore(&flags4); + bpf_local_irq_restore(&flags1); + return 0; +} + +SEC("?tc") +__failure __msg("cannot restore irq state out of order") +int irq_restore_ooo_3_subprog(struct __sk_buff *ctx) +{ + unsigned long flags1; + unsigned long flags2; + unsigned long flags3; + + local_irq_save_3(&flags1, &flags2, &flags3); + bpf_local_irq_restore(&flags3); + bpf_local_irq_restore(&flags2); + bpf_local_irq_save(&flags3); + bpf_local_irq_restore(&flags1); + return 0; +} + +SEC("?tc") +__failure __msg("expected an initialized") +int irq_restore_invalid(struct __sk_buff *ctx) +{ + unsigned long flags1; + unsigned long flags = 0xfaceb00c; + + bpf_local_irq_save(&flags1); + bpf_local_irq_restore(&flags); + return 0; +} + +SEC("?tc") +__failure __msg("expected uninitialized") +int irq_save_invalid(struct __sk_buff *ctx) +{ + unsigned long flags1; + + bpf_local_irq_save(&flags1); + bpf_local_irq_save(&flags1); + return 0; +} + +SEC("?tc") +__failure __msg("expected an initialized") +int irq_restore_iter(struct __sk_buff *ctx) +{ + struct bpf_iter_num it; + + bpf_iter_num_new(&it, 0, 42); + bpf_local_irq_restore((unsigned long *)&it); + return 0; +} + +SEC("?tc") +__failure __msg("Unreleased reference id=1") +int irq_save_iter(struct __sk_buff *ctx) +{ + struct bpf_iter_num it; + + /* Ensure same sized slot has st->ref_obj_id set, so we reject based on + * slot_type != STACK_IRQ_FLAG... + */ + _Static_assert(sizeof(it) == sizeof(unsigned long), "broken iterator size"); + + bpf_iter_num_new(&it, 0, 42); + bpf_local_irq_save((unsigned long *)&it); + bpf_local_irq_restore((unsigned long *)&it); + return 0; +} + +SEC("?tc") +__failure __msg("expected an initialized") +int irq_flag_overwrite(struct __sk_buff *ctx) +{ + unsigned long flags; + + bpf_local_irq_save(&flags); + flags = 0xdeadbeef; + bpf_local_irq_restore(&flags); + return 0; +} + +SEC("?tc") +__failure __msg("expected an initialized") +int irq_flag_overwrite_partial(struct __sk_buff *ctx) +{ + unsigned long flags; + + bpf_local_irq_save(&flags); + *(((char *)&flags) + 1) = 0xff; + bpf_local_irq_restore(&flags); + return 0; +} + +SEC("?tc") +__failure __msg("cannot restore irq state out of order") +int irq_ooo_refs_array(struct __sk_buff *ctx) +{ + unsigned long flags[4]; + struct { int i; } *p; + + /* refs=1 */ + bpf_local_irq_save(&flags[0]); + + /* refs=1,2 */ + p = bpf_obj_new(typeof(*p)); + if (!p) { + bpf_local_irq_restore(&flags[0]); + return 0; + } + + /* refs=1,2,3 */ + bpf_local_irq_save(&flags[1]); + + /* refs=1,2,3,4 */ + bpf_local_irq_save(&flags[2]); + + /* Now when we remove ref=2, the verifier must not break the ordering in + * the refs array between 1,3,4. With an older implementation, the + * verifier would swap the last element with the removed element, but to + * maintain the stack property we need to use memmove. + */ + bpf_obj_drop(p); + + /* Save and restore to reset active_irq_id to 3, as the ordering is now + * refs=1,4,3. When restoring the linear scan will find prev_id in order + * as 3 instead of 4. + */ + bpf_local_irq_save(&flags[3]); + bpf_local_irq_restore(&flags[3]); + + /* With the incorrect implementation, we can release flags[1], flags[2], + * and flags[0], i.e. in the wrong order. + */ + bpf_local_irq_restore(&flags[1]); + bpf_local_irq_restore(&flags[2]); + bpf_local_irq_restore(&flags[0]); + return 0; +} + +char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From e2f0791124a1b6ca8d570110cbd487969d9d41ef Mon Sep 17 00:00:00 2001 From: Marco Leogrande Date: Mon, 2 Dec 2024 12:45:30 -0800 Subject: tools/testing/selftests/bpf/test_tc_tunnel.sh: Fix wait for server bind Commit f803bcf9208a ("selftests/bpf: Prevent client connect before server bind in test_tc_tunnel.sh") added code that waits for the netcat server to start before the netcat client attempts to connect to it. However, not all calls to 'server_listen' were guarded. This patch adds the existing 'wait_for_port' guard after the remaining call to 'server_listen'. Fixes: f803bcf9208a ("selftests/bpf: Prevent client connect before server bind in test_tc_tunnel.sh") Signed-off-by: Marco Leogrande Acked-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20241202204530.1143448-1-leogrande@google.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/test_tc_tunnel.sh | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_tc_tunnel.sh b/tools/testing/selftests/bpf/test_tc_tunnel.sh index 7989ec608454..cb55a908bb0d 100755 --- a/tools/testing/selftests/bpf/test_tc_tunnel.sh +++ b/tools/testing/selftests/bpf/test_tc_tunnel.sh @@ -305,6 +305,7 @@ else client_connect verify_data server_listen + wait_for_port ${port} ${netcat_opt} fi # serverside, use BPF for decap -- cgit v1.2.3 From ef7009decc30eb2515a64253791d61b72229c119 Mon Sep 17 00:00:00 2001 From: Ihor Solodrai Date: Thu, 21 Nov 2024 21:40:17 +0000 Subject: selftests/sched_ext: fix build after renames in sched_ext API The selftests are falining to build on current tip of bpf-next and sched_ext [1]. This has broken BPF CI [2] after merge from upstream. Use appropriate function names in the selftests according to the recent changes in the sched_ext API [3]. [1] https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/commit/?id=fc39fb56917bb3cb53e99560ca3612a84456ada2 [2] https://github.com/kernel-patches/bpf/actions/runs/11959327258/job/33340923745 [3] https://lore.kernel.org/all/20241109194853.580310-1-tj@kernel.org/ Signed-off-by: Ihor Solodrai Acked-by: Andrea Righi Acked-by: David Vernet Signed-off-by: Tejun Heo --- tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.bpf.c | 2 +- tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.bpf.c | 4 ++-- tools/testing/selftests/sched_ext/dsp_local_on.bpf.c | 2 +- tools/testing/selftests/sched_ext/enq_select_cpu_fails.bpf.c | 2 +- tools/testing/selftests/sched_ext/exit.bpf.c | 4 ++-- tools/testing/selftests/sched_ext/maximal.bpf.c | 4 ++-- tools/testing/selftests/sched_ext/select_cpu_dfl.bpf.c | 2 +- tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.bpf.c | 2 +- tools/testing/selftests/sched_ext/select_cpu_dispatch.bpf.c | 2 +- .../testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.bpf.c | 2 +- .../testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.bpf.c | 4 ++-- tools/testing/selftests/sched_ext/select_cpu_vtime.bpf.c | 8 ++++---- 12 files changed, 19 insertions(+), 19 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.bpf.c b/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.bpf.c index 37d9bf6fb745..6f4c3f5a1c5d 100644 --- a/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.bpf.c +++ b/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.bpf.c @@ -20,7 +20,7 @@ s32 BPF_STRUCT_OPS(ddsp_bogus_dsq_fail_select_cpu, struct task_struct *p, * If we dispatch to a bogus DSQ that will fall back to the * builtin global DSQ, we fail gracefully. */ - scx_bpf_dispatch_vtime(p, 0xcafef00d, SCX_SLICE_DFL, + scx_bpf_dsq_insert_vtime(p, 0xcafef00d, SCX_SLICE_DFL, p->scx.dsq_vtime, 0); return cpu; } diff --git a/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.bpf.c b/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.bpf.c index dffc97d9cdf1..e4a55027778f 100644 --- a/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.bpf.c +++ b/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.bpf.c @@ -17,8 +17,8 @@ s32 BPF_STRUCT_OPS(ddsp_vtimelocal_fail_select_cpu, struct task_struct *p, if (cpu >= 0) { /* Shouldn't be allowed to vtime dispatch to a builtin DSQ. */ - scx_bpf_dispatch_vtime(p, SCX_DSQ_LOCAL, SCX_SLICE_DFL, - p->scx.dsq_vtime, 0); + scx_bpf_dsq_insert_vtime(p, SCX_DSQ_LOCAL, SCX_SLICE_DFL, + p->scx.dsq_vtime, 0); return cpu; } diff --git a/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c b/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c index 6a7db1502c29..6325bf76f47e 100644 --- a/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c +++ b/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c @@ -45,7 +45,7 @@ void BPF_STRUCT_OPS(dsp_local_on_dispatch, s32 cpu, struct task_struct *prev) target = bpf_get_prandom_u32() % nr_cpus; - scx_bpf_dispatch(p, SCX_DSQ_LOCAL_ON | target, SCX_SLICE_DFL, 0); + scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL_ON | target, SCX_SLICE_DFL, 0); bpf_task_release(p); } diff --git a/tools/testing/selftests/sched_ext/enq_select_cpu_fails.bpf.c b/tools/testing/selftests/sched_ext/enq_select_cpu_fails.bpf.c index 1efb50d61040..a7cf868d5e31 100644 --- a/tools/testing/selftests/sched_ext/enq_select_cpu_fails.bpf.c +++ b/tools/testing/selftests/sched_ext/enq_select_cpu_fails.bpf.c @@ -31,7 +31,7 @@ void BPF_STRUCT_OPS(enq_select_cpu_fails_enqueue, struct task_struct *p, /* Can only call from ops.select_cpu() */ scx_bpf_select_cpu_dfl(p, 0, 0, &found); - scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags); + scx_bpf_dsq_insert(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags); } SEC(".struct_ops.link") diff --git a/tools/testing/selftests/sched_ext/exit.bpf.c b/tools/testing/selftests/sched_ext/exit.bpf.c index d75d4faf07f6..4bc36182d3ff 100644 --- a/tools/testing/selftests/sched_ext/exit.bpf.c +++ b/tools/testing/selftests/sched_ext/exit.bpf.c @@ -33,7 +33,7 @@ void BPF_STRUCT_OPS(exit_enqueue, struct task_struct *p, u64 enq_flags) if (exit_point == EXIT_ENQUEUE) EXIT_CLEANLY(); - scx_bpf_dispatch(p, DSQ_ID, SCX_SLICE_DFL, enq_flags); + scx_bpf_dsq_insert(p, DSQ_ID, SCX_SLICE_DFL, enq_flags); } void BPF_STRUCT_OPS(exit_dispatch, s32 cpu, struct task_struct *p) @@ -41,7 +41,7 @@ void BPF_STRUCT_OPS(exit_dispatch, s32 cpu, struct task_struct *p) if (exit_point == EXIT_DISPATCH) EXIT_CLEANLY(); - scx_bpf_consume(DSQ_ID); + scx_bpf_dsq_move_to_local(DSQ_ID); } void BPF_STRUCT_OPS(exit_enable, struct task_struct *p) diff --git a/tools/testing/selftests/sched_ext/maximal.bpf.c b/tools/testing/selftests/sched_ext/maximal.bpf.c index 4d4cd8d966db..4c005fa71810 100644 --- a/tools/testing/selftests/sched_ext/maximal.bpf.c +++ b/tools/testing/selftests/sched_ext/maximal.bpf.c @@ -20,7 +20,7 @@ s32 BPF_STRUCT_OPS(maximal_select_cpu, struct task_struct *p, s32 prev_cpu, void BPF_STRUCT_OPS(maximal_enqueue, struct task_struct *p, u64 enq_flags) { - scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags); + scx_bpf_dsq_insert(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags); } void BPF_STRUCT_OPS(maximal_dequeue, struct task_struct *p, u64 deq_flags) @@ -28,7 +28,7 @@ void BPF_STRUCT_OPS(maximal_dequeue, struct task_struct *p, u64 deq_flags) void BPF_STRUCT_OPS(maximal_dispatch, s32 cpu, struct task_struct *prev) { - scx_bpf_consume(SCX_DSQ_GLOBAL); + scx_bpf_dsq_move_to_local(SCX_DSQ_GLOBAL); } void BPF_STRUCT_OPS(maximal_runnable, struct task_struct *p, u64 enq_flags) diff --git a/tools/testing/selftests/sched_ext/select_cpu_dfl.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_dfl.bpf.c index f171ac470970..13d0f5be788d 100644 --- a/tools/testing/selftests/sched_ext/select_cpu_dfl.bpf.c +++ b/tools/testing/selftests/sched_ext/select_cpu_dfl.bpf.c @@ -30,7 +30,7 @@ void BPF_STRUCT_OPS(select_cpu_dfl_enqueue, struct task_struct *p, } scx_bpf_put_idle_cpumask(idle_mask); - scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags); + scx_bpf_dsq_insert(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags); } SEC(".struct_ops.link") diff --git a/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.bpf.c index 9efdbb7da928..815f1d5d61ac 100644 --- a/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.bpf.c +++ b/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.bpf.c @@ -67,7 +67,7 @@ void BPF_STRUCT_OPS(select_cpu_dfl_nodispatch_enqueue, struct task_struct *p, saw_local = true; } - scx_bpf_dispatch(p, dsq_id, SCX_SLICE_DFL, enq_flags); + scx_bpf_dsq_insert(p, dsq_id, SCX_SLICE_DFL, enq_flags); } s32 BPF_STRUCT_OPS(select_cpu_dfl_nodispatch_init_task, diff --git a/tools/testing/selftests/sched_ext/select_cpu_dispatch.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_dispatch.bpf.c index 59bfc4f36167..4bb99699e920 100644 --- a/tools/testing/selftests/sched_ext/select_cpu_dispatch.bpf.c +++ b/tools/testing/selftests/sched_ext/select_cpu_dispatch.bpf.c @@ -29,7 +29,7 @@ s32 BPF_STRUCT_OPS(select_cpu_dispatch_select_cpu, struct task_struct *p, cpu = prev_cpu; dispatch: - scx_bpf_dispatch(p, dsq_id, SCX_SLICE_DFL, 0); + scx_bpf_dsq_insert(p, dsq_id, SCX_SLICE_DFL, 0); return cpu; } diff --git a/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.bpf.c index 3bbd5fcdfb18..2a75de11b2cf 100644 --- a/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.bpf.c +++ b/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.bpf.c @@ -18,7 +18,7 @@ s32 BPF_STRUCT_OPS(select_cpu_dispatch_bad_dsq_select_cpu, struct task_struct *p s32 prev_cpu, u64 wake_flags) { /* Dispatching to a random DSQ should fail. */ - scx_bpf_dispatch(p, 0xcafef00d, SCX_SLICE_DFL, 0); + scx_bpf_dsq_insert(p, 0xcafef00d, SCX_SLICE_DFL, 0); return prev_cpu; } diff --git a/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.bpf.c index 0fda57fe0ecf..99d075695c97 100644 --- a/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.bpf.c +++ b/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.bpf.c @@ -18,8 +18,8 @@ s32 BPF_STRUCT_OPS(select_cpu_dispatch_dbl_dsp_select_cpu, struct task_struct *p s32 prev_cpu, u64 wake_flags) { /* Dispatching twice in a row is disallowed. */ - scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, 0); - scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, 0); + scx_bpf_dsq_insert(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, 0); + scx_bpf_dsq_insert(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, 0); return prev_cpu; } diff --git a/tools/testing/selftests/sched_ext/select_cpu_vtime.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_vtime.bpf.c index e6c67bcf5e6e..bfcb96cd4954 100644 --- a/tools/testing/selftests/sched_ext/select_cpu_vtime.bpf.c +++ b/tools/testing/selftests/sched_ext/select_cpu_vtime.bpf.c @@ -2,8 +2,8 @@ /* * A scheduler that validates that enqueue flags are properly stored and * applied at dispatch time when a task is directly dispatched from - * ops.select_cpu(). We validate this by using scx_bpf_dispatch_vtime(), and - * making the test a very basic vtime scheduler. + * ops.select_cpu(). We validate this by using scx_bpf_dsq_insert_vtime(), + * and making the test a very basic vtime scheduler. * * Copyright (c) 2024 Meta Platforms, Inc. and affiliates. * Copyright (c) 2024 David Vernet @@ -47,13 +47,13 @@ s32 BPF_STRUCT_OPS(select_cpu_vtime_select_cpu, struct task_struct *p, cpu = prev_cpu; scx_bpf_test_and_clear_cpu_idle(cpu); ddsp: - scx_bpf_dispatch_vtime(p, VTIME_DSQ, SCX_SLICE_DFL, task_vtime(p), 0); + scx_bpf_dsq_insert_vtime(p, VTIME_DSQ, SCX_SLICE_DFL, task_vtime(p), 0); return cpu; } void BPF_STRUCT_OPS(select_cpu_vtime_dispatch, s32 cpu, struct task_struct *p) { - if (scx_bpf_consume(VTIME_DSQ)) + if (scx_bpf_dsq_move_to_local(VTIME_DSQ)) consumed = true; } -- cgit v1.2.3 From f24d192985cbd6782850fdbb3839039da2f0ee76 Mon Sep 17 00:00:00 2001 From: guanjing Date: Sun, 17 Nov 2024 10:51:29 +0800 Subject: sched_ext: fix application of sizeof to pointer sizeof when applied to a pointer typed expression gives the size of the pointer. The proper fix in this particular case is to code sizeof(*cpuset) instead of sizeof(cpuset). This issue was detected with the help of Coccinelle. Fixes: 22a920209ab6 ("sched_ext: Implement tickless support") Signed-off-by: guanjing Acked-by: Andrea Righi Signed-off-by: Tejun Heo --- tools/sched_ext/scx_central.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/sched_ext/scx_central.c b/tools/sched_ext/scx_central.c index 21deea320bd7..e938156ed0a0 100644 --- a/tools/sched_ext/scx_central.c +++ b/tools/sched_ext/scx_central.c @@ -97,7 +97,7 @@ restart: SCX_BUG_ON(!cpuset, "Failed to allocate cpuset"); CPU_ZERO(cpuset); CPU_SET(skel->rodata->central_cpu, cpuset); - SCX_BUG_ON(sched_setaffinity(0, sizeof(cpuset), cpuset), + SCX_BUG_ON(sched_setaffinity(0, sizeof(*cpuset), cpuset), "Failed to affinitize to central CPU %d (max %d)", skel->rodata->central_cpu, skel->rodata->nr_cpu_ids - 1); CPU_FREE(cpuset); -- cgit v1.2.3 From ac98b3132402e1b892c16f87d766f21ef18dd344 Mon Sep 17 00:00:00 2001 From: Kenjiro Nakayama Date: Wed, 4 Dec 2024 07:28:44 +0900 Subject: selftests/net: call sendmmsg via udpgso_bench.sh Currently, sendmmsg is implemented in udpgso_bench_tx.c, but it is not called by any test script. This patch adds a test for sendmmsg in udpgso_bench.sh. This allows for basic API testing and benchmarking comparisons with GSO. Signed-off-by: Kenjiro Nakayama Reviewed-by: Hangbin Liu Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20241203222843.26983-1-nakayamakenjiro@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/udpgso_bench.sh | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/udpgso_bench.sh b/tools/testing/selftests/net/udpgso_bench.sh index 640bc43452fa..88fa1d53ba2b 100755 --- a/tools/testing/selftests/net/udpgso_bench.sh +++ b/tools/testing/selftests/net/udpgso_bench.sh @@ -92,6 +92,9 @@ run_udp() { echo "udp" run_in_netns ${args} + echo "udp sendmmsg" + run_in_netns ${args} -m + echo "udp gso" run_in_netns ${args} -S 0 -- cgit v1.2.3 From 523d3cc4b6d1ae18bfa516345d48332d455181e6 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Wed, 4 Dec 2024 07:55:42 -0800 Subject: ynl: support enum-cnt-name attribute in legacy definitions This is similar to existing attr-cnt-name in the attributes to allow changing the name of the 'count' enum entry. Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20241204155549.641348-2-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- tools/net/ynl/ynl-gen-c.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py index d8201c4b1520..bfe95826ae3e 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/ynl-gen-c.py @@ -801,6 +801,7 @@ class EnumSet(SpecEnumSet): self.user_type = 'int' self.value_pfx = yaml.get('name-prefix', f"{family.ident_name}-{yaml['name']}-") + self.enum_cnt_name = yaml.get('enum-cnt-name', None) super().__init__(family, yaml) @@ -2472,9 +2473,12 @@ def render_uapi(family, cw): max_val = f' = {enum.get_mask()},' cw.p(max_name + max_val) else: + cnt_name = enum.enum_cnt_name max_name = c_upper(name_pfx + 'max') - cw.p('__' + max_name + ',') - cw.p(max_name + ' = (__' + max_name + ' - 1)') + if not cnt_name: + cnt_name = '__' + name_pfx + 'max' + cw.p(c_upper(cnt_name) + ',') + cw.p(max_name + ' = (' + c_upper(cnt_name) + ' - 1)') cw.block_end(line=';') cw.nl() elif const['type'] == 'const': -- cgit v1.2.3 From 8c843ecde4e49e11063ad942675246ec685ea19a Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Wed, 4 Dec 2024 07:55:43 -0800 Subject: ynl: skip rendering attributes with header property in uapi mode To allow omitting some of the attributes in the final generated file. Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20241204155549.641348-3-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- tools/net/ynl/ynl-gen-c.py | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tools') diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py index bfe95826ae3e..79829ce39139 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/ynl-gen-c.py @@ -801,6 +801,7 @@ class EnumSet(SpecEnumSet): self.user_type = 'int' self.value_pfx = yaml.get('name-prefix', f"{family.ident_name}-{yaml['name']}-") + self.header = yaml.get('header', None) self.enum_cnt_name = yaml.get('enum-cnt-name', None) super().__init__(family, yaml) @@ -2441,6 +2442,9 @@ def render_uapi(family, cw): if const['type'] == 'enum' or const['type'] == 'flags': enum = family.consts[const['name']] + if enum.header: + continue + if enum.has_doc(): if enum.has_entry_doc(): cw.p('/**') -- cgit v1.2.3 From 56881d07f0b4cb97f3c460dc3908eee91fc51a17 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Wed, 4 Dec 2024 07:55:44 -0800 Subject: ynl: support directional specs in ynl-gen-c.py The intent is to generate ethtool uapi headers. For now, some of the things are hard-coded: - _MSG_{USER,KERNEL}_MAX - the split between USER and KERNEL messages Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20241204155549.641348-4-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- tools/net/ynl/ynl-gen-c.py | 118 +++++++++++++++++++++++++++++++++------------ 1 file changed, 87 insertions(+), 31 deletions(-) (limited to 'tools') diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py index 79829ce39139..2bf4d992e54a 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/ynl-gen-c.py @@ -2419,6 +2419,87 @@ def uapi_enum_start(family, cw, obj, ckey='', enum_name='enum-name'): cw.block_start(line=start_line) +def render_uapi_unified(family, cw, max_by_define, separate_ntf): + max_name = c_upper(family.get('cmd-max-name', f"{family.op_prefix}MAX")) + cnt_name = c_upper(family.get('cmd-cnt-name', f"__{family.op_prefix}MAX")) + max_value = f"({cnt_name} - 1)" + + uapi_enum_start(family, cw, family['operations'], 'enum-name') + val = 0 + for op in family.msgs.values(): + if separate_ntf and ('notify' in op or 'event' in op): + continue + + suffix = ',' + if op.value != val: + suffix = f" = {op.value}," + val = op.value + cw.p(op.enum_name + suffix) + val += 1 + cw.nl() + cw.p(cnt_name + ('' if max_by_define else ',')) + if not max_by_define: + cw.p(f"{max_name} = {max_value}") + cw.block_end(line=';') + if max_by_define: + cw.p(f"#define {max_name} {max_value}") + cw.nl() + + +def render_uapi_directional(family, cw, max_by_define): + max_name = f"{family.op_prefix}USER_MAX" + cnt_name = f"__{family.op_prefix}USER_CNT" + max_value = f"({cnt_name} - 1)" + + cw.block_start(line='enum') + cw.p(c_upper(f'{family.name}_MSG_USER_NONE = 0,')) + val = 0 + for op in family.msgs.values(): + if 'do' in op and 'event' not in op: + suffix = ',' + if op.value and op.value != val: + suffix = f" = {op.value}," + val = op.value + cw.p(op.enum_name + suffix) + val += 1 + cw.nl() + cw.p(cnt_name + ('' if max_by_define else ',')) + if not max_by_define: + cw.p(f"{max_name} = {max_value}") + cw.block_end(line=';') + if max_by_define: + cw.p(f"#define {max_name} {max_value}") + cw.nl() + + max_name = f"{family.op_prefix}KERNEL_MAX" + cnt_name = f"__{family.op_prefix}KERNEL_CNT" + max_value = f"({cnt_name} - 1)" + + cw.block_start(line='enum') + cw.p(c_upper(f'{family.name}_MSG_KERNEL_NONE = 0,')) + val = 0 + for op in family.msgs.values(): + if ('do' in op and 'reply' in op['do']) or 'notify' in op or 'event' in op: + enum_name = op.enum_name + if 'event' not in op and 'notify' not in op: + enum_name = f'{enum_name}_REPLY' + + suffix = ',' + if op.value and op.value != val: + suffix = f" = {op.value}," + val = op.value + cw.p(enum_name + suffix) + val += 1 + cw.nl() + cw.p(cnt_name + ('' if max_by_define else ',')) + if not max_by_define: + cw.p(f"{max_name} = {max_value}") + cw.block_end(line=';') + if max_by_define: + cw.p(f"#define {max_name} {max_value}") + cw.nl() + + def render_uapi(family, cw): hdr_prot = f"_UAPI_LINUX_{c_upper(family.uapi_header_name)}_H" hdr_prot = hdr_prot.replace('/', '_') @@ -2523,30 +2604,12 @@ def render_uapi(family, cw): # Commands separate_ntf = 'async-prefix' in family['operations'] - max_name = c_upper(family.get('cmd-max-name', f"{family.op_prefix}MAX")) - cnt_name = c_upper(family.get('cmd-cnt-name', f"__{family.op_prefix}MAX")) - max_value = f"({cnt_name} - 1)" - - uapi_enum_start(family, cw, family['operations'], 'enum-name') - val = 0 - for op in family.msgs.values(): - if separate_ntf and ('notify' in op or 'event' in op): - continue - - suffix = ',' - if op.value != val: - suffix = f" = {op.value}," - val = op.value - cw.p(op.enum_name + suffix) - val += 1 - cw.nl() - cw.p(cnt_name + ('' if max_by_define else ',')) - if not max_by_define: - cw.p(f"{max_name} = {max_value}") - cw.block_end(line=';') - if max_by_define: - cw.p(f"#define {max_name} {max_value}") - cw.nl() + if family.msg_id_model == 'unified': + render_uapi_unified(family, cw, max_by_define, separate_ntf) + elif family.msg_id_model == 'directional': + render_uapi_directional(family, cw, max_by_define) + else: + raise Exception(f'Unsupported message enum-model {family.msg_id_model}') if separate_ntf: uapi_enum_start(family, cw, family['operations'], enum_name='async-enum') @@ -2670,13 +2733,6 @@ def main(): os.sys.exit(1) return - supported_models = ['unified'] - if args.mode in ['user', 'kernel']: - supported_models += ['directional'] - if parsed.msg_id_model not in supported_models: - print(f'Message enum-model {parsed.msg_id_model} not supported for {args.mode} generation') - os.sys.exit(1) - cw = CodeWriter(BaseNlLib(), args.out_file, overwrite=(not args.cmp_out)) _, spec_kernel = find_kernel_root(args.spec) -- cgit v1.2.3 From 001b0b59efbbdf54126c2ae512009d4a7c9f9f88 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Wed, 4 Dec 2024 07:55:46 -0800 Subject: ynl: include uapi header after all dependencies Essentially reverse the order of headers for userspace generated files. Before (make -C tools/net/ynl/; cat tools/net/ynl/ethtool-user.h): #include #include #include #include After: #include #include While at it, make sure we track which headers we've already included and include the headers only once. Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20241204155549.641348-6-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- tools/net/ynl/ynl-gen-c.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py index 2bf4d992e54a..8098bcbb6f40 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/ynl-gen-c.py @@ -2782,12 +2782,17 @@ def main(): else: cw.p(f'#include "{hdr_file}"') cw.p('#include "ynl.h"') - headers = [parsed.uapi_header] + headers = [] for definition in parsed['definitions']: if 'header' in definition: headers.append(definition['header']) + if args.mode == 'user': + headers.append(parsed.uapi_header) + seen_header = [] for one in headers: - cw.p(f"#include <{one}>") + if one not in seen_header: + cw.p(f"#include <{one}>") + seen_header.append(one) cw.nl() if args.mode == "user": -- cgit v1.2.3 From e10500b69c3f3378f3dcfc8c2fe4cdb74fc844f5 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Thu, 5 Dec 2024 13:59:42 +0000 Subject: libbpf: Fix segfault due to libelf functions not setting errno Libelf functions do not set errno on failure. Instead, it relies on its internal _elf_errno value, that can be retrieved via elf_errno (or the corresponding message via elf_errmsg()). From "man libelf": If a libelf function encounters an error it will set an internal error code that can be retrieved with elf_errno. Each thread maintains its own separate error code. The meaning of each error code can be determined with elf_errmsg, which returns a string describing the error. As a consequence, libbpf should not return -errno when a function from libelf fails, because an empty value will not be interpreted as an error and won't prevent the program to stop. This is visible in bpf_linker__add_file(), for example, where we call a succession of functions that rely on libelf: err = err ?: linker_load_obj_file(linker, filename, opts, &obj); err = err ?: linker_append_sec_data(linker, &obj); err = err ?: linker_append_elf_syms(linker, &obj); err = err ?: linker_append_elf_relos(linker, &obj); err = err ?: linker_append_btf(linker, &obj); err = err ?: linker_append_btf_ext(linker, &obj); If the object file that we try to process is not, in fact, a correct object file, linker_load_obj_file() may fail with errno not being set, and return 0. In this case we attempt to run linker_append_elf_sysms() and may segfault. This can happen (and was discovered) with bpftool: $ bpftool gen object output.o sample_ret0.bpf.c libbpf: failed to get ELF header for sample_ret0.bpf.c: invalid `Elf' handle zsh: segmentation fault (core dumped) bpftool gen object output.o sample_ret0.bpf.c Fix the issue by returning a non-null error code (-EINVAL) when libelf functions fail. Fixes: faf6ed321cf6 ("libbpf: Add BPF static linker APIs") Signed-off-by: Quentin Monnet Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20241205135942.65262-1-qmo@kernel.org --- tools/lib/bpf/linker.c | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c index cf71d149fe26..e56ba6e67451 100644 --- a/tools/lib/bpf/linker.c +++ b/tools/lib/bpf/linker.c @@ -566,17 +566,15 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, } obj->elf = elf_begin(obj->fd, ELF_C_READ_MMAP, NULL); if (!obj->elf) { - err = -errno; pr_warn_elf("failed to parse ELF file '%s'", filename); - return err; + return -EINVAL; } /* Sanity check ELF file high-level properties */ ehdr = elf64_getehdr(obj->elf); if (!ehdr) { - err = -errno; pr_warn_elf("failed to get ELF header for %s", filename); - return err; + return -EINVAL; } /* Linker output endianness set by first input object */ @@ -606,9 +604,8 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, } if (elf_getshdrstrndx(obj->elf, &obj->shstrs_sec_idx)) { - err = -errno; pr_warn_elf("failed to get SHSTRTAB section index for %s", filename); - return err; + return -EINVAL; } scn = NULL; @@ -618,26 +615,23 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, shdr = elf64_getshdr(scn); if (!shdr) { - err = -errno; pr_warn_elf("failed to get section #%zu header for %s", sec_idx, filename); - return err; + return -EINVAL; } sec_name = elf_strptr(obj->elf, obj->shstrs_sec_idx, shdr->sh_name); if (!sec_name) { - err = -errno; pr_warn_elf("failed to get section #%zu name for %s", sec_idx, filename); - return err; + return -EINVAL; } data = elf_getdata(scn, 0); if (!data) { - err = -errno; pr_warn_elf("failed to get section #%zu (%s) data from %s", sec_idx, sec_name, filename); - return err; + return -EINVAL; } sec = add_src_sec(obj, sec_name); @@ -2680,14 +2674,14 @@ int bpf_linker__finalize(struct bpf_linker *linker) /* Finalize ELF layout */ if (elf_update(linker->elf, ELF_C_NULL) < 0) { - err = -errno; + err = -EINVAL; pr_warn_elf("failed to finalize ELF layout"); return libbpf_err(err); } /* Write out final ELF contents */ if (elf_update(linker->elf, ELF_C_WRITE) < 0) { - err = -errno; + err = -EINVAL; pr_warn_elf("failed to write ELF contents"); return libbpf_err(err); } -- cgit v1.2.3 From 0bee36d1a51366fa57b731f8975f26f92943b43e Mon Sep 17 00:00:00 2001 From: Song Yoong Siang Date: Thu, 5 Dec 2024 12:42:58 +0800 Subject: selftests/bpf: Actuate tx_metadata_len in xdp_hw_metadata set XDP_UMEM_TX_METADATA_LEN flag to reserve tx_metadata_len bytes of per-chunk metadata. Fixes: d5e726d9143c ("xsk: Require XDP_UMEM_TX_METADATA_LEN to actuate tx_metadata_len") Signed-off-by: Song Yoong Siang Signed-off-by: Martin KaFai Lau Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20241205044258.3155799-1-yoong.siang.song@intel.com --- tools/testing/selftests/bpf/xdp_hw_metadata.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c index 6f9956eed797..ad6c08dfd6c8 100644 --- a/tools/testing/selftests/bpf/xdp_hw_metadata.c +++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c @@ -79,7 +79,7 @@ static int open_xsk(int ifindex, struct xsk *xsk, __u32 queue_id) .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS, .frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE, - .flags = XSK_UMEM__DEFAULT_FLAGS, + .flags = XDP_UMEM_TX_METADATA_LEN, .tx_metadata_len = sizeof(struct xsk_tx_metadata), }; __u32 idx = 0; -- cgit v1.2.3 From 2309132fc5d9d87deb15bda3497326aded6bfe4a Mon Sep 17 00:00:00 2001 From: Song Yoong Siang Date: Thu, 5 Dec 2024 13:19:36 +0800 Subject: selftests/bpf: Enable Tx hwtstamp in xdp_hw_metadata Currently, user needs to manually enable transmit hardware timestamp feature of certain Ethernet drivers, e.g. stmmac and igc drivers, through following command after running the xdp_hw_metadata app. sudo hwstamp_ctl -i eth0 -t 1 To simplify the step test of xdp_hw_metadata, set tx_type to HWTSTAMP_TX_ON to enable hardware timestamping for all outgoing packets, so that user no longer need to execute hwstamp_ctl command. Signed-off-by: Song Yoong Siang Signed-off-by: Martin KaFai Lau Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20241205051936.3156307-1-yoong.siang.song@intel.com --- tools/testing/selftests/bpf/xdp_hw_metadata.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c index ad6c08dfd6c8..e38675d9b118 100644 --- a/tools/testing/selftests/bpf/xdp_hw_metadata.c +++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c @@ -551,6 +551,7 @@ static void hwtstamp_enable(const char *ifname) { struct hwtstamp_config cfg = { .rx_filter = HWTSTAMP_FILTER_ALL, + .tx_type = HWTSTAMP_TX_ON, }; hwtstamp_ioctl(SIOCGHWTSTAMP, ifname, &saved_hwtstamp_cfg); -- cgit v1.2.3 From 1e7e1f0e8be147ae98fe88ec82150c97265965a6 Mon Sep 17 00:00:00 2001 From: Octavian Purdila Date: Tue, 3 Dec 2024 19:05:20 -0800 Subject: selftests/tc-testing: sfq: test that kernel rejects limit of 1 Add test to check that the kernel rejects a configuration with the limit set to 1. Signed-off-by: Octavian Purdila Link: https://patch.msgid.link/20241204030520.2084663-3-tavip@google.com Signed-off-by: Jakub Kicinski --- .../tc-testing/scripts/sfq_rejects_limit_1.py | 21 +++++++++++++++++++++ .../selftests/tc-testing/tc-tests/qdiscs/sfq.json | 20 ++++++++++++++++++++ 2 files changed, 41 insertions(+) create mode 100755 tools/testing/selftests/tc-testing/scripts/sfq_rejects_limit_1.py (limited to 'tools') diff --git a/tools/testing/selftests/tc-testing/scripts/sfq_rejects_limit_1.py b/tools/testing/selftests/tc-testing/scripts/sfq_rejects_limit_1.py new file mode 100755 index 000000000000..0f44a6199495 --- /dev/null +++ b/tools/testing/selftests/tc-testing/scripts/sfq_rejects_limit_1.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# +# Script that checks that SFQ rejects a limit of 1 at the kernel +# level. We can't use iproute2's tc because it does not accept a limit +# of 1. + +import sys +import os + +from pyroute2 import IPRoute +from pyroute2.netlink.exceptions import NetlinkError + +ip = IPRoute() +ifidx = ip.link_lookup(ifname=sys.argv[1]) + +try: + ip.tc('add', 'sfq', ifidx, limit=1) + sys.exit(1) +except NetlinkError: + sys.exit(0) diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json index 16d51936b385..50e8d72781cb 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json @@ -208,5 +208,25 @@ "teardown": [ "$TC qdisc del dev $DUMMY handle 1: root" ] + }, + { + "id": "4d6f", + "name": "Check that limit of 1 is rejected", + "category": [ + "qdisc", + "sfq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + ], + "cmdUnderTest": "./scripts/sfq_rejects_limit_1.py $DUMMY", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "sfq", + "matchCount": "0", + "teardown": [ + ] } ] -- cgit v1.2.3 From d6212d82bf26f3cbd30b84df064080dd98051ae9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Wed, 4 Dec 2024 14:28:26 +0100 Subject: selftests/bpf: Consolidate kernel modules into common directory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The selftests build four kernel modules which use copy-pasted Makefile targets. This is a bit messy, and doesn't scale so well when we add more modules, so let's consolidate these rules into a single rule generated for each module name, and move the module sources into a single directory. To avoid parallel builds of the different modules stepping on each other's toes during the 'modpost' phase of the Kbuild 'make modules', the module files should really be a grouped target. However, make only added explicit support for grouped targets in version 4.3, which is newer than the minimum version supported by the kernel. However, make implicitly treats pattern matching rules with multiple targets as a grouped target, so we can work around this by turning the rule into a pattern matching target. We do this by replacing '.ko' with '%ko' in the targets with subst(). Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Andrii Nakryiko Acked-by: Viktor Malik Link: https://lore.kernel.org/bpf/20241204-bpf-selftests-mod-compile-v5-1-b96231134a49@redhat.com --- tools/testing/selftests/bpf/Makefile | 64 +- .../selftests/bpf/bpf_test_modorder_x/Makefile | 19 - .../bpf/bpf_test_modorder_x/bpf_test_modorder_x.c | 39 - .../selftests/bpf/bpf_test_modorder_y/Makefile | 19 - .../bpf/bpf_test_modorder_y/bpf_test_modorder_y.c | 39 - .../testing/selftests/bpf/bpf_test_no_cfi/Makefile | 19 - .../bpf/bpf_test_no_cfi/bpf_test_no_cfi.c | 84 -- tools/testing/selftests/bpf/bpf_testmod/.gitignore | 6 - tools/testing/selftests/bpf/bpf_testmod/Makefile | 20 - .../selftests/bpf/bpf_testmod/bpf_testmod-events.h | 71 - .../selftests/bpf/bpf_testmod/bpf_testmod.c | 1487 -------------------- .../selftests/bpf/bpf_testmod/bpf_testmod.h | 113 -- .../selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h | 162 --- .../testing/selftests/bpf/prog_tests/core_reloc.c | 2 +- tools/testing/selftests/bpf/progs/bad_struct_ops.c | 2 +- tools/testing/selftests/bpf/progs/cb_refs.c | 2 +- tools/testing/selftests/bpf/progs/epilogue_exit.c | 4 +- .../selftests/bpf/progs/epilogue_tailcall.c | 4 +- tools/testing/selftests/bpf/progs/iters_testmod.c | 2 +- tools/testing/selftests/bpf/progs/jit_probe_mem.c | 2 +- .../selftests/bpf/progs/kfunc_call_destructive.c | 2 +- .../testing/selftests/bpf/progs/kfunc_call_fail.c | 2 +- .../testing/selftests/bpf/progs/kfunc_call_race.c | 2 +- .../testing/selftests/bpf/progs/kfunc_call_test.c | 2 +- .../selftests/bpf/progs/kfunc_call_test_subprog.c | 2 +- .../testing/selftests/bpf/progs/local_kptr_stash.c | 2 +- tools/testing/selftests/bpf/progs/map_kptr.c | 2 +- tools/testing/selftests/bpf/progs/map_kptr_fail.c | 2 +- tools/testing/selftests/bpf/progs/missed_kprobe.c | 2 +- .../selftests/bpf/progs/missed_kprobe_recursion.c | 2 +- tools/testing/selftests/bpf/progs/nested_acquire.c | 2 +- tools/testing/selftests/bpf/progs/pro_epilogue.c | 4 +- .../selftests/bpf/progs/pro_epilogue_goto_start.c | 4 +- tools/testing/selftests/bpf/progs/sock_addr_kern.c | 2 +- .../selftests/bpf/progs/struct_ops_detach.c | 2 +- .../selftests/bpf/progs/struct_ops_forgotten_cb.c | 2 +- .../selftests/bpf/progs/struct_ops_maybe_null.c | 2 +- .../bpf/progs/struct_ops_maybe_null_fail.c | 2 +- .../selftests/bpf/progs/struct_ops_module.c | 2 +- .../selftests/bpf/progs/struct_ops_multi_pages.c | 2 +- .../selftests/bpf/progs/struct_ops_nulled_out_cb.c | 2 +- .../selftests/bpf/progs/struct_ops_private_stack.c | 2 +- .../bpf/progs/struct_ops_private_stack_fail.c | 2 +- .../bpf/progs/struct_ops_private_stack_recur.c | 2 +- .../bpf/progs/test_kfunc_param_nullable.c | 2 +- .../selftests/bpf/progs/test_module_attach.c | 2 +- .../selftests/bpf/progs/test_tp_btf_nullable.c | 2 +- .../testing/selftests/bpf/progs/unsupported_ops.c | 2 +- tools/testing/selftests/bpf/progs/wq.c | 2 +- tools/testing/selftests/bpf/progs/wq_failures.c | 2 +- tools/testing/selftests/bpf/test_kmods/.gitignore | 6 + tools/testing/selftests/bpf/test_kmods/Makefile | 21 + .../selftests/bpf/test_kmods/bpf_test_modorder_x.c | 39 + .../selftests/bpf/test_kmods/bpf_test_modorder_y.c | 39 + .../selftests/bpf/test_kmods/bpf_test_no_cfi.c | 84 ++ .../selftests/bpf/test_kmods/bpf_testmod-events.h | 71 + .../testing/selftests/bpf/test_kmods/bpf_testmod.c | 1487 ++++++++++++++++++++ .../testing/selftests/bpf/test_kmods/bpf_testmod.h | 113 ++ .../selftests/bpf/test_kmods/bpf_testmod_kfunc.h | 162 +++ 59 files changed, 2084 insertions(+), 2162 deletions(-) delete mode 100644 tools/testing/selftests/bpf/bpf_test_modorder_x/Makefile delete mode 100644 tools/testing/selftests/bpf/bpf_test_modorder_x/bpf_test_modorder_x.c delete mode 100644 tools/testing/selftests/bpf/bpf_test_modorder_y/Makefile delete mode 100644 tools/testing/selftests/bpf/bpf_test_modorder_y/bpf_test_modorder_y.c delete mode 100644 tools/testing/selftests/bpf/bpf_test_no_cfi/Makefile delete mode 100644 tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c delete mode 100644 tools/testing/selftests/bpf/bpf_testmod/.gitignore delete mode 100644 tools/testing/selftests/bpf/bpf_testmod/Makefile delete mode 100644 tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h delete mode 100644 tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c delete mode 100644 tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h delete mode 100644 tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h create mode 100644 tools/testing/selftests/bpf/test_kmods/.gitignore create mode 100644 tools/testing/selftests/bpf/test_kmods/Makefile create mode 100644 tools/testing/selftests/bpf/test_kmods/bpf_test_modorder_x.c create mode 100644 tools/testing/selftests/bpf/test_kmods/bpf_test_modorder_y.c create mode 100644 tools/testing/selftests/bpf/test_kmods/bpf_test_no_cfi.c create mode 100644 tools/testing/selftests/bpf/test_kmods/bpf_testmod-events.h create mode 100644 tools/testing/selftests/bpf/test_kmods/bpf_testmod.c create mode 100644 tools/testing/selftests/bpf/test_kmods/bpf_testmod.h create mode 100644 tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index a1964d40a60e..b03df5d295c8 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -150,13 +150,13 @@ TEST_PROGS_EXTENDED := with_addr.sh \ with_tunnels.sh ima_setup.sh verify_sig_setup.sh \ test_xdp_vlan.sh test_bpftool.py +TEST_KMODS := bpf_testmod.ko bpf_test_no_cfi.ko bpf_test_modorder_x.ko \ + bpf_test_modorder_y.ko +TEST_KMOD_TARGETS = $(addprefix $(OUTPUT)/,$(TEST_KMODS)) + # Compile but not part of 'make run_tests' TEST_GEN_PROGS_EXTENDED = \ bench \ - bpf_testmod.ko \ - bpf_test_modorder_x.ko \ - bpf_test_modorder_y.ko \ - bpf_test_no_cfi.ko \ flow_dissector_load \ runqslower \ test_cpp \ @@ -182,8 +182,9 @@ override define CLEAN $(Q)$(RM) -r $(TEST_GEN_PROGS) $(Q)$(RM) -r $(TEST_GEN_PROGS_EXTENDED) $(Q)$(RM) -r $(TEST_GEN_FILES) + $(Q)$(RM) -r $(TEST_KMODS) $(Q)$(RM) -r $(EXTRA_CLEAN) - $(Q)$(MAKE) -C bpf_testmod clean + $(Q)$(MAKE) -C test_kmods clean $(Q)$(MAKE) docs-clean endef @@ -249,7 +250,7 @@ endif # to build individual tests. # NOTE: Semicolon at the end is critical to override lib.mk's default static # rule for binaries. -$(notdir $(TEST_GEN_PROGS) \ +$(notdir $(TEST_GEN_PROGS) $(TEST_KMODS) \ $(TEST_GEN_PROGS_EXTENDED)): %: $(OUTPUT)/% ; # sort removes libbpf duplicates when not cross-building @@ -303,37 +304,19 @@ $(OUTPUT)/sign-file: ../../../../scripts/sign-file.c $< -o $@ \ $(shell $(PKG_CONFIG) --libs libcrypto 2> /dev/null || echo -lcrypto) -$(OUTPUT)/bpf_testmod.ko: $(VMLINUX_BTF) $(RESOLVE_BTFIDS) $(wildcard bpf_testmod/Makefile bpf_testmod/*.[ch]) - $(call msg,MOD,,$@) - $(Q)$(RM) bpf_testmod/bpf_testmod.ko # force re-compilation - $(Q)$(MAKE) $(submake_extras) -C bpf_testmod \ - RESOLVE_BTFIDS=$(RESOLVE_BTFIDS) \ - EXTRA_CFLAGS='' EXTRA_LDFLAGS='' - $(Q)cp bpf_testmod/bpf_testmod.ko $@ - -$(OUTPUT)/bpf_test_no_cfi.ko: $(VMLINUX_BTF) $(RESOLVE_BTFIDS) $(wildcard bpf_test_no_cfi/Makefile bpf_test_no_cfi/*.[ch]) - $(call msg,MOD,,$@) - $(Q)$(RM) bpf_test_no_cfi/bpf_test_no_cfi.ko # force re-compilation - $(Q)$(MAKE) $(submake_extras) -C bpf_test_no_cfi \ - RESOLVE_BTFIDS=$(RESOLVE_BTFIDS) \ +# This should really be a grouped target, but make versions before 4.3 don't +# support that for regular rules. However, pattern matching rules are implicitly +# treated as grouped even with older versions of make, so as a workaround, the +# subst() turns the rule into a pattern matching rule +$(addprefix test_kmods/,$(subst .ko,%ko,$(TEST_KMODS))): $(VMLINUX_BTF) $(RESOLVE_BTFIDS) $(wildcard test_kmods/Makefile test_kmods/*.[ch]) + $(Q)$(RM) test_kmods/*.ko test_kmods/*.mod.o # force re-compilation + $(Q)$(MAKE) $(submake_extras) -C test_kmods \ + RESOLVE_BTFIDS=$(RESOLVE_BTFIDS) \ EXTRA_CFLAGS='' EXTRA_LDFLAGS='' - $(Q)cp bpf_test_no_cfi/bpf_test_no_cfi.ko $@ -$(OUTPUT)/bpf_test_modorder_x.ko: $(VMLINUX_BTF) $(RESOLVE_BTFIDS) $(wildcard bpf_test_modorder_x/Makefile bpf_test_modorder_x/*.[ch]) +$(TEST_KMOD_TARGETS): $(addprefix test_kmods/,$(TEST_KMODS)) $(call msg,MOD,,$@) - $(Q)$(RM) bpf_test_modorder_x/bpf_test_modorder_x.ko # force re-compilation - $(Q)$(MAKE) $(submake_extras) -C bpf_test_modorder_x \ - RESOLVE_BTFIDS=$(RESOLVE_BTFIDS) \ - EXTRA_CFLAGS='' EXTRA_LDFLAGS='' - $(Q)cp bpf_test_modorder_x/bpf_test_modorder_x.ko $@ - -$(OUTPUT)/bpf_test_modorder_y.ko: $(VMLINUX_BTF) $(RESOLVE_BTFIDS) $(wildcard bpf_test_modorder_y/Makefile bpf_test_modorder_y/*.[ch]) - $(call msg,MOD,,$@) - $(Q)$(RM) bpf_test_modorder_y/bpf_test_modorder_y.ko # force re-compilation - $(Q)$(MAKE) $(submake_extras) -C bpf_test_modorder_y \ - RESOLVE_BTFIDS=$(RESOLVE_BTFIDS) \ - EXTRA_CFLAGS='' EXTRA_LDFLAGS='' - $(Q)cp bpf_test_modorder_y/bpf_test_modorder_y.ko $@ + $(Q)cp test_kmods/$(@F) $@ DEFAULT_BPFTOOL := $(HOST_SCRATCH_DIR)/sbin/bpftool @@ -758,14 +741,12 @@ TRUNNER_EXTRA_SOURCES := test_progs.c \ json_writer.c \ flow_dissector_load.h \ ip_check_defrag_frags.h -TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko \ - $(OUTPUT)/bpf_test_no_cfi.ko \ - $(OUTPUT)/bpf_test_modorder_x.ko \ - $(OUTPUT)/bpf_test_modorder_y.ko \ +TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read \ $(OUTPUT)/liburandom_read.so \ $(OUTPUT)/xdp_synproxy \ $(OUTPUT)/sign-file \ $(OUTPUT)/uprobe_multi \ + $(TEST_KMOD_TARGETS) \ ima_setup.sh \ verify_sig_setup.sh \ $(wildcard progs/btf_dump_test_case_*.c) \ @@ -892,12 +873,9 @@ $(OUTPUT)/uprobe_multi: uprobe_multi.c uprobe_multi.ld EXTRA_CLEAN := $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) \ prog_tests/tests.h map_tests/tests.h verifier/tests.h \ - feature bpftool \ + feature bpftool $(TEST_KMOD_TARGETS) \ $(addprefix $(OUTPUT)/,*.o *.d *.skel.h *.lskel.h *.subskel.h \ - no_alu32 cpuv4 bpf_gcc bpf_testmod.ko \ - bpf_test_no_cfi.ko \ - bpf_test_modorder_x.ko \ - bpf_test_modorder_y.ko \ + no_alu32 cpuv4 bpf_gcc \ liburandom_read.so) \ $(OUTPUT)/FEATURE-DUMP.selftests diff --git a/tools/testing/selftests/bpf/bpf_test_modorder_x/Makefile b/tools/testing/selftests/bpf/bpf_test_modorder_x/Makefile deleted file mode 100644 index 40b25b98ad1b..000000000000 --- a/tools/testing/selftests/bpf/bpf_test_modorder_x/Makefile +++ /dev/null @@ -1,19 +0,0 @@ -BPF_TESTMOD_DIR := $(realpath $(dir $(abspath $(lastword $(MAKEFILE_LIST))))) -KDIR ?= $(abspath $(BPF_TESTMOD_DIR)/../../../../..) - -ifeq ($(V),1) -Q = -else -Q = @ -endif - -MODULES = bpf_test_modorder_x.ko - -obj-m += bpf_test_modorder_x.o - -all: - +$(Q)make -C $(KDIR) M=$(BPF_TESTMOD_DIR) modules - -clean: - +$(Q)make -C $(KDIR) M=$(BPF_TESTMOD_DIR) clean - diff --git a/tools/testing/selftests/bpf/bpf_test_modorder_x/bpf_test_modorder_x.c b/tools/testing/selftests/bpf/bpf_test_modorder_x/bpf_test_modorder_x.c deleted file mode 100644 index 0cc747fa912f..000000000000 --- a/tools/testing/selftests/bpf/bpf_test_modorder_x/bpf_test_modorder_x.c +++ /dev/null @@ -1,39 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include -#include -#include - -__bpf_kfunc_start_defs(); - -__bpf_kfunc int bpf_test_modorder_retx(void) -{ - return 'x'; -} - -__bpf_kfunc_end_defs(); - -BTF_KFUNCS_START(bpf_test_modorder_kfunc_x_ids) -BTF_ID_FLAGS(func, bpf_test_modorder_retx); -BTF_KFUNCS_END(bpf_test_modorder_kfunc_x_ids) - -static const struct btf_kfunc_id_set bpf_test_modorder_x_set = { - .owner = THIS_MODULE, - .set = &bpf_test_modorder_kfunc_x_ids, -}; - -static int __init bpf_test_modorder_x_init(void) -{ - return register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, - &bpf_test_modorder_x_set); -} - -static void __exit bpf_test_modorder_x_exit(void) -{ -} - -module_init(bpf_test_modorder_x_init); -module_exit(bpf_test_modorder_x_exit); - -MODULE_DESCRIPTION("BPF selftest ordertest module X"); -MODULE_LICENSE("GPL"); diff --git a/tools/testing/selftests/bpf/bpf_test_modorder_y/Makefile b/tools/testing/selftests/bpf/bpf_test_modorder_y/Makefile deleted file mode 100644 index 52c3ab9d84e2..000000000000 --- a/tools/testing/selftests/bpf/bpf_test_modorder_y/Makefile +++ /dev/null @@ -1,19 +0,0 @@ -BPF_TESTMOD_DIR := $(realpath $(dir $(abspath $(lastword $(MAKEFILE_LIST))))) -KDIR ?= $(abspath $(BPF_TESTMOD_DIR)/../../../../..) - -ifeq ($(V),1) -Q = -else -Q = @ -endif - -MODULES = bpf_test_modorder_y.ko - -obj-m += bpf_test_modorder_y.o - -all: - +$(Q)make -C $(KDIR) M=$(BPF_TESTMOD_DIR) modules - -clean: - +$(Q)make -C $(KDIR) M=$(BPF_TESTMOD_DIR) clean - diff --git a/tools/testing/selftests/bpf/bpf_test_modorder_y/bpf_test_modorder_y.c b/tools/testing/selftests/bpf/bpf_test_modorder_y/bpf_test_modorder_y.c deleted file mode 100644 index c627ee085d13..000000000000 --- a/tools/testing/selftests/bpf/bpf_test_modorder_y/bpf_test_modorder_y.c +++ /dev/null @@ -1,39 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include -#include -#include - -__bpf_kfunc_start_defs(); - -__bpf_kfunc int bpf_test_modorder_rety(void) -{ - return 'y'; -} - -__bpf_kfunc_end_defs(); - -BTF_KFUNCS_START(bpf_test_modorder_kfunc_y_ids) -BTF_ID_FLAGS(func, bpf_test_modorder_rety); -BTF_KFUNCS_END(bpf_test_modorder_kfunc_y_ids) - -static const struct btf_kfunc_id_set bpf_test_modorder_y_set = { - .owner = THIS_MODULE, - .set = &bpf_test_modorder_kfunc_y_ids, -}; - -static int __init bpf_test_modorder_y_init(void) -{ - return register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, - &bpf_test_modorder_y_set); -} - -static void __exit bpf_test_modorder_y_exit(void) -{ -} - -module_init(bpf_test_modorder_y_init); -module_exit(bpf_test_modorder_y_exit); - -MODULE_DESCRIPTION("BPF selftest ordertest module Y"); -MODULE_LICENSE("GPL"); diff --git a/tools/testing/selftests/bpf/bpf_test_no_cfi/Makefile b/tools/testing/selftests/bpf/bpf_test_no_cfi/Makefile deleted file mode 100644 index ed5143b79edf..000000000000 --- a/tools/testing/selftests/bpf/bpf_test_no_cfi/Makefile +++ /dev/null @@ -1,19 +0,0 @@ -BPF_TEST_NO_CFI_DIR := $(realpath $(dir $(abspath $(lastword $(MAKEFILE_LIST))))) -KDIR ?= $(abspath $(BPF_TEST_NO_CFI_DIR)/../../../../..) - -ifeq ($(V),1) -Q = -else -Q = @ -endif - -MODULES = bpf_test_no_cfi.ko - -obj-m += bpf_test_no_cfi.o - -all: - +$(Q)make -C $(KDIR) M=$(BPF_TEST_NO_CFI_DIR) modules - -clean: - +$(Q)make -C $(KDIR) M=$(BPF_TEST_NO_CFI_DIR) clean - diff --git a/tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c b/tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c deleted file mode 100644 index 948eb3962732..000000000000 --- a/tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c +++ /dev/null @@ -1,84 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ -#include -#include -#include -#include - -struct bpf_test_no_cfi_ops { - void (*fn_1)(void); - void (*fn_2)(void); -}; - -static int dummy_init(struct btf *btf) -{ - return 0; -} - -static int dummy_init_member(const struct btf_type *t, - const struct btf_member *member, - void *kdata, const void *udata) -{ - return 0; -} - -static int dummy_reg(void *kdata, struct bpf_link *link) -{ - return 0; -} - -static void dummy_unreg(void *kdata, struct bpf_link *link) -{ -} - -static const struct bpf_verifier_ops dummy_verifier_ops; - -static void bpf_test_no_cfi_ops__fn_1(void) -{ -} - -static void bpf_test_no_cfi_ops__fn_2(void) -{ -} - -static struct bpf_test_no_cfi_ops __test_no_cif_ops = { - .fn_1 = bpf_test_no_cfi_ops__fn_1, - .fn_2 = bpf_test_no_cfi_ops__fn_2, -}; - -static struct bpf_struct_ops test_no_cif_ops = { - .verifier_ops = &dummy_verifier_ops, - .init = dummy_init, - .init_member = dummy_init_member, - .reg = dummy_reg, - .unreg = dummy_unreg, - .name = "bpf_test_no_cfi_ops", - .owner = THIS_MODULE, -}; - -static int bpf_test_no_cfi_init(void) -{ - int ret; - - ret = register_bpf_struct_ops(&test_no_cif_ops, - bpf_test_no_cfi_ops); - if (!ret) - return -EINVAL; - - test_no_cif_ops.cfi_stubs = &__test_no_cif_ops; - ret = register_bpf_struct_ops(&test_no_cif_ops, - bpf_test_no_cfi_ops); - return ret; -} - -static void bpf_test_no_cfi_exit(void) -{ -} - -module_init(bpf_test_no_cfi_init); -module_exit(bpf_test_no_cfi_exit); - -MODULE_AUTHOR("Kuifeng Lee"); -MODULE_DESCRIPTION("BPF no cfi_stubs test module"); -MODULE_LICENSE("Dual BSD/GPL"); - diff --git a/tools/testing/selftests/bpf/bpf_testmod/.gitignore b/tools/testing/selftests/bpf/bpf_testmod/.gitignore deleted file mode 100644 index ded513777281..000000000000 --- a/tools/testing/selftests/bpf/bpf_testmod/.gitignore +++ /dev/null @@ -1,6 +0,0 @@ -*.mod -*.mod.c -*.o -.ko -/Module.symvers -/modules.order diff --git a/tools/testing/selftests/bpf/bpf_testmod/Makefile b/tools/testing/selftests/bpf/bpf_testmod/Makefile deleted file mode 100644 index 15cb36c4483a..000000000000 --- a/tools/testing/selftests/bpf/bpf_testmod/Makefile +++ /dev/null @@ -1,20 +0,0 @@ -BPF_TESTMOD_DIR := $(realpath $(dir $(abspath $(lastword $(MAKEFILE_LIST))))) -KDIR ?= $(abspath $(BPF_TESTMOD_DIR)/../../../../..) - -ifeq ($(V),1) -Q = -else -Q = @ -endif - -MODULES = bpf_testmod.ko - -obj-m += bpf_testmod.o -CFLAGS_bpf_testmod.o = -I$(src) - -all: - +$(Q)make -C $(KDIR) M=$(BPF_TESTMOD_DIR) modules - -clean: - +$(Q)make -C $(KDIR) M=$(BPF_TESTMOD_DIR) clean - diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h deleted file mode 100644 index aeef86b3da74..000000000000 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h +++ /dev/null @@ -1,71 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (c) 2020 Facebook */ -#undef TRACE_SYSTEM -#define TRACE_SYSTEM bpf_testmod - -#if !defined(_BPF_TESTMOD_EVENTS_H) || defined(TRACE_HEADER_MULTI_READ) -#define _BPF_TESTMOD_EVENTS_H - -#include -#include "bpf_testmod.h" - -TRACE_EVENT(bpf_testmod_test_read, - TP_PROTO(struct task_struct *task, struct bpf_testmod_test_read_ctx *ctx), - TP_ARGS(task, ctx), - TP_STRUCT__entry( - __field(pid_t, pid) - __array(char, comm, TASK_COMM_LEN) - __field(loff_t, off) - __field(size_t, len) - ), - TP_fast_assign( - __entry->pid = task->pid; - memcpy(__entry->comm, task->comm, TASK_COMM_LEN); - __entry->off = ctx->off; - __entry->len = ctx->len; - ), - TP_printk("pid=%d comm=%s off=%llu len=%zu", - __entry->pid, __entry->comm, __entry->off, __entry->len) -); - -/* A bare tracepoint with no event associated with it */ -DECLARE_TRACE(bpf_testmod_test_write_bare, - TP_PROTO(struct task_struct *task, struct bpf_testmod_test_write_ctx *ctx), - TP_ARGS(task, ctx) -); - -/* Used in bpf_testmod_test_read() to test __nullable suffix */ -DECLARE_TRACE(bpf_testmod_test_nullable_bare, - TP_PROTO(struct bpf_testmod_test_read_ctx *ctx__nullable), - TP_ARGS(ctx__nullable) -); - -struct sk_buff; - -DECLARE_TRACE(bpf_testmod_test_raw_tp_null, - TP_PROTO(struct sk_buff *skb), - TP_ARGS(skb) -); - - -#undef BPF_TESTMOD_DECLARE_TRACE -#ifdef DECLARE_TRACE_WRITABLE -#define BPF_TESTMOD_DECLARE_TRACE(call, proto, args, size) \ - DECLARE_TRACE_WRITABLE(call, PARAMS(proto), PARAMS(args), size) -#else -#define BPF_TESTMOD_DECLARE_TRACE(call, proto, args, size) \ - DECLARE_TRACE(call, PARAMS(proto), PARAMS(args)) -#endif - -BPF_TESTMOD_DECLARE_TRACE(bpf_testmod_test_writable_bare, - TP_PROTO(struct bpf_testmod_test_writable_ctx *ctx), - TP_ARGS(ctx), - sizeof(struct bpf_testmod_test_writable_ctx) -); - -#endif /* _BPF_TESTMOD_EVENTS_H */ - -#undef TRACE_INCLUDE_PATH -#define TRACE_INCLUDE_PATH . -#define TRACE_INCLUDE_FILE bpf_testmod-events -#include diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c deleted file mode 100644 index cc9dde507aba..000000000000 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ /dev/null @@ -1,1487 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright (c) 2020 Facebook */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "bpf_testmod.h" -#include "bpf_testmod_kfunc.h" - -#define CREATE_TRACE_POINTS -#include "bpf_testmod-events.h" - -#define CONNECT_TIMEOUT_SEC 1 - -typedef int (*func_proto_typedef)(long); -typedef int (*func_proto_typedef_nested1)(func_proto_typedef); -typedef int (*func_proto_typedef_nested2)(func_proto_typedef_nested1); - -DEFINE_PER_CPU(int, bpf_testmod_ksym_percpu) = 123; -long bpf_testmod_test_struct_arg_result; -static DEFINE_MUTEX(sock_lock); -static struct socket *sock; - -struct bpf_testmod_struct_arg_1 { - int a; -}; -struct bpf_testmod_struct_arg_2 { - long a; - long b; -}; - -struct bpf_testmod_struct_arg_3 { - int a; - int b[]; -}; - -struct bpf_testmod_struct_arg_4 { - u64 a; - int b; -}; - -struct bpf_testmod_struct_arg_5 { - char a; - short b; - int c; - long d; -}; - -__bpf_hook_start(); - -noinline int -bpf_testmod_test_struct_arg_1(struct bpf_testmod_struct_arg_2 a, int b, int c) { - bpf_testmod_test_struct_arg_result = a.a + a.b + b + c; - return bpf_testmod_test_struct_arg_result; -} - -noinline int -bpf_testmod_test_struct_arg_2(int a, struct bpf_testmod_struct_arg_2 b, int c) { - bpf_testmod_test_struct_arg_result = a + b.a + b.b + c; - return bpf_testmod_test_struct_arg_result; -} - -noinline int -bpf_testmod_test_struct_arg_3(int a, int b, struct bpf_testmod_struct_arg_2 c) { - bpf_testmod_test_struct_arg_result = a + b + c.a + c.b; - return bpf_testmod_test_struct_arg_result; -} - -noinline int -bpf_testmod_test_struct_arg_4(struct bpf_testmod_struct_arg_1 a, int b, - int c, int d, struct bpf_testmod_struct_arg_2 e) { - bpf_testmod_test_struct_arg_result = a.a + b + c + d + e.a + e.b; - return bpf_testmod_test_struct_arg_result; -} - -noinline int -bpf_testmod_test_struct_arg_5(void) { - bpf_testmod_test_struct_arg_result = 1; - return bpf_testmod_test_struct_arg_result; -} - -noinline int -bpf_testmod_test_struct_arg_6(struct bpf_testmod_struct_arg_3 *a) { - bpf_testmod_test_struct_arg_result = a->b[0]; - return bpf_testmod_test_struct_arg_result; -} - -noinline int -bpf_testmod_test_struct_arg_7(u64 a, void *b, short c, int d, void *e, - struct bpf_testmod_struct_arg_4 f) -{ - bpf_testmod_test_struct_arg_result = a + (long)b + c + d + - (long)e + f.a + f.b; - return bpf_testmod_test_struct_arg_result; -} - -noinline int -bpf_testmod_test_struct_arg_8(u64 a, void *b, short c, int d, void *e, - struct bpf_testmod_struct_arg_4 f, int g) -{ - bpf_testmod_test_struct_arg_result = a + (long)b + c + d + - (long)e + f.a + f.b + g; - return bpf_testmod_test_struct_arg_result; -} - -noinline int -bpf_testmod_test_struct_arg_9(u64 a, void *b, short c, int d, void *e, char f, - short g, struct bpf_testmod_struct_arg_5 h, long i) -{ - bpf_testmod_test_struct_arg_result = a + (long)b + c + d + (long)e + - f + g + h.a + h.b + h.c + h.d + i; - return bpf_testmod_test_struct_arg_result; -} - -noinline int -bpf_testmod_test_arg_ptr_to_struct(struct bpf_testmod_struct_arg_1 *a) { - bpf_testmod_test_struct_arg_result = a->a; - return bpf_testmod_test_struct_arg_result; -} - -__bpf_kfunc void -bpf_testmod_test_mod_kfunc(int i) -{ - *(int *)this_cpu_ptr(&bpf_testmod_ksym_percpu) = i; -} - -__bpf_kfunc int bpf_iter_testmod_seq_new(struct bpf_iter_testmod_seq *it, s64 value, int cnt) -{ - it->cnt = cnt; - - if (cnt < 0) - return -EINVAL; - - it->value = value; - - return 0; -} - -__bpf_kfunc s64 *bpf_iter_testmod_seq_next(struct bpf_iter_testmod_seq* it) -{ - if (it->cnt <= 0) - return NULL; - - it->cnt--; - - return &it->value; -} - -__bpf_kfunc s64 bpf_iter_testmod_seq_value(int val, struct bpf_iter_testmod_seq* it__iter) -{ - if (it__iter->cnt < 0) - return 0; - - return val + it__iter->value; -} - -__bpf_kfunc void bpf_iter_testmod_seq_destroy(struct bpf_iter_testmod_seq *it) -{ - it->cnt = 0; -} - -__bpf_kfunc void bpf_kfunc_common_test(void) -{ -} - -__bpf_kfunc void bpf_kfunc_dynptr_test(struct bpf_dynptr *ptr, - struct bpf_dynptr *ptr__nullable) -{ -} - -__bpf_kfunc struct sk_buff *bpf_kfunc_nested_acquire_nonzero_offset_test(struct sk_buff_head *ptr) -{ - return NULL; -} - -__bpf_kfunc struct sk_buff *bpf_kfunc_nested_acquire_zero_offset_test(struct sock_common *ptr) -{ - return NULL; -} - -__bpf_kfunc void bpf_kfunc_nested_release_test(struct sk_buff *ptr) -{ -} - -__bpf_kfunc void bpf_kfunc_trusted_vma_test(struct vm_area_struct *ptr) -{ -} - -__bpf_kfunc void bpf_kfunc_trusted_task_test(struct task_struct *ptr) -{ -} - -__bpf_kfunc void bpf_kfunc_trusted_num_test(int *ptr) -{ -} - -__bpf_kfunc void bpf_kfunc_rcu_task_test(struct task_struct *ptr) -{ -} - -__bpf_kfunc struct bpf_testmod_ctx * -bpf_testmod_ctx_create(int *err) -{ - struct bpf_testmod_ctx *ctx; - - ctx = kzalloc(sizeof(*ctx), GFP_ATOMIC); - if (!ctx) { - *err = -ENOMEM; - return NULL; - } - refcount_set(&ctx->usage, 1); - - return ctx; -} - -static void testmod_free_cb(struct rcu_head *head) -{ - struct bpf_testmod_ctx *ctx; - - ctx = container_of(head, struct bpf_testmod_ctx, rcu); - kfree(ctx); -} - -__bpf_kfunc void bpf_testmod_ctx_release(struct bpf_testmod_ctx *ctx) -{ - if (!ctx) - return; - if (refcount_dec_and_test(&ctx->usage)) - call_rcu(&ctx->rcu, testmod_free_cb); -} - -static struct bpf_testmod_ops3 *st_ops3; - -static int bpf_testmod_test_3(void) -{ - return 0; -} - -static int bpf_testmod_test_4(void) -{ - return 0; -} - -static struct bpf_testmod_ops3 __bpf_testmod_ops3 = { - .test_1 = bpf_testmod_test_3, - .test_2 = bpf_testmod_test_4, -}; - -static void bpf_testmod_test_struct_ops3(void) -{ - if (st_ops3) - st_ops3->test_1(); -} - -__bpf_kfunc void bpf_testmod_ops3_call_test_1(void) -{ - st_ops3->test_1(); -} - -__bpf_kfunc void bpf_testmod_ops3_call_test_2(void) -{ - st_ops3->test_2(); -} - -struct bpf_testmod_btf_type_tag_1 { - int a; -}; - -struct bpf_testmod_btf_type_tag_2 { - struct bpf_testmod_btf_type_tag_1 __user *p; -}; - -struct bpf_testmod_btf_type_tag_3 { - struct bpf_testmod_btf_type_tag_1 __percpu *p; -}; - -noinline int -bpf_testmod_test_btf_type_tag_user_1(struct bpf_testmod_btf_type_tag_1 __user *arg) { - BTF_TYPE_EMIT(func_proto_typedef); - BTF_TYPE_EMIT(func_proto_typedef_nested1); - BTF_TYPE_EMIT(func_proto_typedef_nested2); - return arg->a; -} - -noinline int -bpf_testmod_test_btf_type_tag_user_2(struct bpf_testmod_btf_type_tag_2 *arg) { - return arg->p->a; -} - -noinline int -bpf_testmod_test_btf_type_tag_percpu_1(struct bpf_testmod_btf_type_tag_1 __percpu *arg) { - return arg->a; -} - -noinline int -bpf_testmod_test_btf_type_tag_percpu_2(struct bpf_testmod_btf_type_tag_3 *arg) { - return arg->p->a; -} - -noinline int bpf_testmod_loop_test(int n) -{ - /* Make sum volatile, so smart compilers, such as clang, will not - * optimize the code by removing the loop. - */ - volatile int sum = 0; - int i; - - /* the primary goal of this test is to test LBR. Create a lot of - * branches in the function, so we can catch it easily. - */ - for (i = 0; i < n; i++) - sum += i; - return sum; -} - -__weak noinline struct file *bpf_testmod_return_ptr(int arg) -{ - static struct file f = {}; - - switch (arg) { - case 1: return (void *)EINVAL; /* user addr */ - case 2: return (void *)0xcafe4a11; /* user addr */ - case 3: return (void *)-EINVAL; /* canonical, but invalid */ - case 4: return (void *)(1ull << 60); /* non-canonical and invalid */ - case 5: return (void *)~(1ull << 30); /* trigger extable */ - case 6: return &f; /* valid addr */ - case 7: return (void *)((long)&f | 1); /* kernel tricks */ -#ifdef CONFIG_X86_64 - case 8: return (void *)VSYSCALL_ADDR; /* vsyscall page address */ -#endif - default: return NULL; - } -} - -noinline int bpf_testmod_fentry_test1(int a) -{ - return a + 1; -} - -noinline int bpf_testmod_fentry_test2(int a, u64 b) -{ - return a + b; -} - -noinline int bpf_testmod_fentry_test3(char a, int b, u64 c) -{ - return a + b + c; -} - -noinline int bpf_testmod_fentry_test7(u64 a, void *b, short c, int d, - void *e, char f, int g) -{ - return a + (long)b + c + d + (long)e + f + g; -} - -noinline int bpf_testmod_fentry_test11(u64 a, void *b, short c, int d, - void *e, char f, int g, - unsigned int h, long i, __u64 j, - unsigned long k) -{ - return a + (long)b + c + d + (long)e + f + g + h + i + j + k; -} - -int bpf_testmod_fentry_ok; - -noinline ssize_t -bpf_testmod_test_read(struct file *file, struct kobject *kobj, - struct bin_attribute *bin_attr, - char *buf, loff_t off, size_t len) -{ - struct bpf_testmod_test_read_ctx ctx = { - .buf = buf, - .off = off, - .len = len, - }; - struct bpf_testmod_struct_arg_1 struct_arg1 = {10}, struct_arg1_2 = {-1}; - struct bpf_testmod_struct_arg_2 struct_arg2 = {2, 3}; - struct bpf_testmod_struct_arg_3 *struct_arg3; - struct bpf_testmod_struct_arg_4 struct_arg4 = {21, 22}; - struct bpf_testmod_struct_arg_5 struct_arg5 = {23, 24, 25, 26}; - int i = 1; - - while (bpf_testmod_return_ptr(i)) - i++; - - (void)bpf_testmod_test_struct_arg_1(struct_arg2, 1, 4); - (void)bpf_testmod_test_struct_arg_2(1, struct_arg2, 4); - (void)bpf_testmod_test_struct_arg_3(1, 4, struct_arg2); - (void)bpf_testmod_test_struct_arg_4(struct_arg1, 1, 2, 3, struct_arg2); - (void)bpf_testmod_test_struct_arg_5(); - (void)bpf_testmod_test_struct_arg_7(16, (void *)17, 18, 19, - (void *)20, struct_arg4); - (void)bpf_testmod_test_struct_arg_8(16, (void *)17, 18, 19, - (void *)20, struct_arg4, 23); - (void)bpf_testmod_test_struct_arg_9(16, (void *)17, 18, 19, (void *)20, - 21, 22, struct_arg5, 27); - - (void)bpf_testmod_test_arg_ptr_to_struct(&struct_arg1_2); - - (void)trace_bpf_testmod_test_raw_tp_null(NULL); - - bpf_testmod_test_struct_ops3(); - - struct_arg3 = kmalloc((sizeof(struct bpf_testmod_struct_arg_3) + - sizeof(int)), GFP_KERNEL); - if (struct_arg3 != NULL) { - struct_arg3->b[0] = 1; - (void)bpf_testmod_test_struct_arg_6(struct_arg3); - kfree(struct_arg3); - } - - /* This is always true. Use the check to make sure the compiler - * doesn't remove bpf_testmod_loop_test. - */ - if (bpf_testmod_loop_test(101) > 100) - trace_bpf_testmod_test_read(current, &ctx); - - trace_bpf_testmod_test_nullable_bare(NULL); - - /* Magic number to enable writable tp */ - if (len == 64) { - struct bpf_testmod_test_writable_ctx writable = { - .val = 1024, - }; - trace_bpf_testmod_test_writable_bare(&writable); - if (writable.early_ret) - return snprintf(buf, len, "%d\n", writable.val); - } - - if (bpf_testmod_fentry_test1(1) != 2 || - bpf_testmod_fentry_test2(2, 3) != 5 || - bpf_testmod_fentry_test3(4, 5, 6) != 15 || - bpf_testmod_fentry_test7(16, (void *)17, 18, 19, (void *)20, - 21, 22) != 133 || - bpf_testmod_fentry_test11(16, (void *)17, 18, 19, (void *)20, - 21, 22, 23, 24, 25, 26) != 231) - goto out; - - bpf_testmod_fentry_ok = 1; -out: - return -EIO; /* always fail */ -} -EXPORT_SYMBOL(bpf_testmod_test_read); -ALLOW_ERROR_INJECTION(bpf_testmod_test_read, ERRNO); - -noinline ssize_t -bpf_testmod_test_write(struct file *file, struct kobject *kobj, - struct bin_attribute *bin_attr, - char *buf, loff_t off, size_t len) -{ - struct bpf_testmod_test_write_ctx ctx = { - .buf = buf, - .off = off, - .len = len, - }; - - trace_bpf_testmod_test_write_bare(current, &ctx); - - return -EIO; /* always fail */ -} -EXPORT_SYMBOL(bpf_testmod_test_write); -ALLOW_ERROR_INJECTION(bpf_testmod_test_write, ERRNO); - -noinline int bpf_fentry_shadow_test(int a) -{ - return a + 2; -} -EXPORT_SYMBOL_GPL(bpf_fentry_shadow_test); - -__bpf_hook_end(); - -static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = { - .attr = { .name = "bpf_testmod", .mode = 0666, }, - .read = bpf_testmod_test_read, - .write = bpf_testmod_test_write, -}; - -/* bpf_testmod_uprobe sysfs attribute is so far enabled for x86_64 only, - * please see test_uretprobe_regs_change test - */ -#ifdef __x86_64__ - -static int -uprobe_ret_handler(struct uprobe_consumer *self, unsigned long func, - struct pt_regs *regs, __u64 *data) - -{ - regs->ax = 0x12345678deadbeef; - regs->cx = 0x87654321feebdaed; - regs->r11 = (u64) -1; - return true; -} - -struct testmod_uprobe { - struct path path; - struct uprobe *uprobe; - struct uprobe_consumer consumer; -}; - -static DEFINE_MUTEX(testmod_uprobe_mutex); - -static struct testmod_uprobe uprobe = { - .consumer.ret_handler = uprobe_ret_handler, -}; - -static int testmod_register_uprobe(loff_t offset) -{ - int err = -EBUSY; - - if (uprobe.uprobe) - return -EBUSY; - - mutex_lock(&testmod_uprobe_mutex); - - if (uprobe.uprobe) - goto out; - - err = kern_path("/proc/self/exe", LOOKUP_FOLLOW, &uprobe.path); - if (err) - goto out; - - uprobe.uprobe = uprobe_register(d_real_inode(uprobe.path.dentry), - offset, 0, &uprobe.consumer); - if (IS_ERR(uprobe.uprobe)) { - err = PTR_ERR(uprobe.uprobe); - path_put(&uprobe.path); - uprobe.uprobe = NULL; - } -out: - mutex_unlock(&testmod_uprobe_mutex); - return err; -} - -static void testmod_unregister_uprobe(void) -{ - mutex_lock(&testmod_uprobe_mutex); - - if (uprobe.uprobe) { - uprobe_unregister_nosync(uprobe.uprobe, &uprobe.consumer); - uprobe_unregister_sync(); - path_put(&uprobe.path); - uprobe.uprobe = NULL; - } - - mutex_unlock(&testmod_uprobe_mutex); -} - -static ssize_t -bpf_testmod_uprobe_write(struct file *file, struct kobject *kobj, - struct bin_attribute *bin_attr, - char *buf, loff_t off, size_t len) -{ - unsigned long offset = 0; - int err = 0; - - if (kstrtoul(buf, 0, &offset)) - return -EINVAL; - - if (offset) - err = testmod_register_uprobe(offset); - else - testmod_unregister_uprobe(); - - return err ?: strlen(buf); -} - -static struct bin_attribute bin_attr_bpf_testmod_uprobe_file __ro_after_init = { - .attr = { .name = "bpf_testmod_uprobe", .mode = 0666, }, - .write = bpf_testmod_uprobe_write, -}; - -static int register_bpf_testmod_uprobe(void) -{ - return sysfs_create_bin_file(kernel_kobj, &bin_attr_bpf_testmod_uprobe_file); -} - -static void unregister_bpf_testmod_uprobe(void) -{ - testmod_unregister_uprobe(); - sysfs_remove_bin_file(kernel_kobj, &bin_attr_bpf_testmod_uprobe_file); -} - -#else -static int register_bpf_testmod_uprobe(void) -{ - return 0; -} - -static void unregister_bpf_testmod_uprobe(void) { } -#endif - -BTF_KFUNCS_START(bpf_testmod_common_kfunc_ids) -BTF_ID_FLAGS(func, bpf_iter_testmod_seq_new, KF_ITER_NEW) -BTF_ID_FLAGS(func, bpf_iter_testmod_seq_next, KF_ITER_NEXT | KF_RET_NULL) -BTF_ID_FLAGS(func, bpf_iter_testmod_seq_destroy, KF_ITER_DESTROY) -BTF_ID_FLAGS(func, bpf_iter_testmod_seq_value) -BTF_ID_FLAGS(func, bpf_kfunc_common_test) -BTF_ID_FLAGS(func, bpf_kfunc_dynptr_test) -BTF_ID_FLAGS(func, bpf_kfunc_nested_acquire_nonzero_offset_test, KF_ACQUIRE) -BTF_ID_FLAGS(func, bpf_kfunc_nested_acquire_zero_offset_test, KF_ACQUIRE) -BTF_ID_FLAGS(func, bpf_kfunc_nested_release_test, KF_RELEASE) -BTF_ID_FLAGS(func, bpf_kfunc_trusted_vma_test, KF_TRUSTED_ARGS) -BTF_ID_FLAGS(func, bpf_kfunc_trusted_task_test, KF_TRUSTED_ARGS) -BTF_ID_FLAGS(func, bpf_kfunc_trusted_num_test, KF_TRUSTED_ARGS) -BTF_ID_FLAGS(func, bpf_kfunc_rcu_task_test, KF_RCU) -BTF_ID_FLAGS(func, bpf_testmod_ctx_create, KF_ACQUIRE | KF_RET_NULL) -BTF_ID_FLAGS(func, bpf_testmod_ctx_release, KF_RELEASE) -BTF_ID_FLAGS(func, bpf_testmod_ops3_call_test_1) -BTF_ID_FLAGS(func, bpf_testmod_ops3_call_test_2) -BTF_KFUNCS_END(bpf_testmod_common_kfunc_ids) - -BTF_ID_LIST(bpf_testmod_dtor_ids) -BTF_ID(struct, bpf_testmod_ctx) -BTF_ID(func, bpf_testmod_ctx_release) - -static const struct btf_kfunc_id_set bpf_testmod_common_kfunc_set = { - .owner = THIS_MODULE, - .set = &bpf_testmod_common_kfunc_ids, -}; - -__bpf_kfunc u64 bpf_kfunc_call_test1(struct sock *sk, u32 a, u64 b, u32 c, u64 d) -{ - return a + b + c + d; -} - -__bpf_kfunc int bpf_kfunc_call_test2(struct sock *sk, u32 a, u32 b) -{ - return a + b; -} - -__bpf_kfunc struct sock *bpf_kfunc_call_test3(struct sock *sk) -{ - return sk; -} - -__bpf_kfunc long noinline bpf_kfunc_call_test4(signed char a, short b, int c, long d) -{ - /* Provoke the compiler to assume that the caller has sign-extended a, - * b and c on platforms where this is required (e.g. s390x). - */ - return (long)a + (long)b + (long)c + d; -} - -static struct prog_test_ref_kfunc prog_test_struct = { - .a = 42, - .b = 108, - .next = &prog_test_struct, - .cnt = REFCOUNT_INIT(1), -}; - -__bpf_kfunc struct prog_test_ref_kfunc * -bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr) -{ - refcount_inc(&prog_test_struct.cnt); - return &prog_test_struct; -} - -__bpf_kfunc void bpf_kfunc_call_test_offset(struct prog_test_ref_kfunc *p) -{ - WARN_ON_ONCE(1); -} - -__bpf_kfunc struct prog_test_member * -bpf_kfunc_call_memb_acquire(void) -{ - WARN_ON_ONCE(1); - return NULL; -} - -__bpf_kfunc void bpf_kfunc_call_memb1_release(struct prog_test_member1 *p) -{ - WARN_ON_ONCE(1); -} - -static int *__bpf_kfunc_call_test_get_mem(struct prog_test_ref_kfunc *p, const int size) -{ - if (size > 2 * sizeof(int)) - return NULL; - - return (int *)p; -} - -__bpf_kfunc int *bpf_kfunc_call_test_get_rdwr_mem(struct prog_test_ref_kfunc *p, - const int rdwr_buf_size) -{ - return __bpf_kfunc_call_test_get_mem(p, rdwr_buf_size); -} - -__bpf_kfunc int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p, - const int rdonly_buf_size) -{ - return __bpf_kfunc_call_test_get_mem(p, rdonly_buf_size); -} - -/* the next 2 ones can't be really used for testing expect to ensure - * that the verifier rejects the call. - * Acquire functions must return struct pointers, so these ones are - * failing. - */ -__bpf_kfunc int *bpf_kfunc_call_test_acq_rdonly_mem(struct prog_test_ref_kfunc *p, - const int rdonly_buf_size) -{ - return __bpf_kfunc_call_test_get_mem(p, rdonly_buf_size); -} - -__bpf_kfunc void bpf_kfunc_call_int_mem_release(int *p) -{ -} - -__bpf_kfunc void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb) -{ -} - -__bpf_kfunc void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p) -{ -} - -__bpf_kfunc void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p) -{ -} - -__bpf_kfunc void bpf_kfunc_call_test_fail1(struct prog_test_fail1 *p) -{ -} - -__bpf_kfunc void bpf_kfunc_call_test_fail2(struct prog_test_fail2 *p) -{ -} - -__bpf_kfunc void bpf_kfunc_call_test_fail3(struct prog_test_fail3 *p) -{ -} - -__bpf_kfunc void bpf_kfunc_call_test_mem_len_pass1(void *mem, int mem__sz) -{ -} - -__bpf_kfunc void bpf_kfunc_call_test_mem_len_fail1(void *mem, int len) -{ -} - -__bpf_kfunc void bpf_kfunc_call_test_mem_len_fail2(u64 *mem, int len) -{ -} - -__bpf_kfunc void bpf_kfunc_call_test_ref(struct prog_test_ref_kfunc *p) -{ - /* p != NULL, but p->cnt could be 0 */ -} - -__bpf_kfunc void bpf_kfunc_call_test_destructive(void) -{ -} - -__bpf_kfunc static u32 bpf_kfunc_call_test_static_unused_arg(u32 arg, u32 unused) -{ - return arg; -} - -__bpf_kfunc void bpf_kfunc_call_test_sleepable(void) -{ -} - -__bpf_kfunc int bpf_kfunc_init_sock(struct init_sock_args *args) -{ - int proto; - int err; - - mutex_lock(&sock_lock); - - if (sock) { - pr_err("%s called without releasing old sock", __func__); - err = -EPERM; - goto out; - } - - switch (args->af) { - case AF_INET: - case AF_INET6: - proto = args->type == SOCK_STREAM ? IPPROTO_TCP : IPPROTO_UDP; - break; - case AF_UNIX: - proto = PF_UNIX; - break; - default: - pr_err("invalid address family %d\n", args->af); - err = -EINVAL; - goto out; - } - - err = sock_create_kern(current->nsproxy->net_ns, args->af, args->type, - proto, &sock); - - if (!err) - /* Set timeout for call to kernel_connect() to prevent it from hanging, - * and consider the connection attempt failed if it returns - * -EINPROGRESS. - */ - sock->sk->sk_sndtimeo = CONNECT_TIMEOUT_SEC * HZ; -out: - mutex_unlock(&sock_lock); - - return err; -} - -__bpf_kfunc void bpf_kfunc_close_sock(void) -{ - mutex_lock(&sock_lock); - - if (sock) { - sock_release(sock); - sock = NULL; - } - - mutex_unlock(&sock_lock); -} - -__bpf_kfunc int bpf_kfunc_call_kernel_connect(struct addr_args *args) -{ - int err; - - if (args->addrlen > sizeof(args->addr)) - return -EINVAL; - - mutex_lock(&sock_lock); - - if (!sock) { - pr_err("%s called without initializing sock", __func__); - err = -EPERM; - goto out; - } - - err = kernel_connect(sock, (struct sockaddr *)&args->addr, - args->addrlen, 0); -out: - mutex_unlock(&sock_lock); - - return err; -} - -__bpf_kfunc int bpf_kfunc_call_kernel_bind(struct addr_args *args) -{ - int err; - - if (args->addrlen > sizeof(args->addr)) - return -EINVAL; - - mutex_lock(&sock_lock); - - if (!sock) { - pr_err("%s called without initializing sock", __func__); - err = -EPERM; - goto out; - } - - err = kernel_bind(sock, (struct sockaddr *)&args->addr, args->addrlen); -out: - mutex_unlock(&sock_lock); - - return err; -} - -__bpf_kfunc int bpf_kfunc_call_kernel_listen(void) -{ - int err; - - mutex_lock(&sock_lock); - - if (!sock) { - pr_err("%s called without initializing sock", __func__); - err = -EPERM; - goto out; - } - - err = kernel_listen(sock, 128); -out: - mutex_unlock(&sock_lock); - - return err; -} - -__bpf_kfunc int bpf_kfunc_call_kernel_sendmsg(struct sendmsg_args *args) -{ - struct msghdr msg = { - .msg_name = &args->addr.addr, - .msg_namelen = args->addr.addrlen, - }; - struct kvec iov; - int err; - - if (args->addr.addrlen > sizeof(args->addr.addr) || - args->msglen > sizeof(args->msg)) - return -EINVAL; - - iov.iov_base = args->msg; - iov.iov_len = args->msglen; - - mutex_lock(&sock_lock); - - if (!sock) { - pr_err("%s called without initializing sock", __func__); - err = -EPERM; - goto out; - } - - err = kernel_sendmsg(sock, &msg, &iov, 1, args->msglen); - args->addr.addrlen = msg.msg_namelen; -out: - mutex_unlock(&sock_lock); - - return err; -} - -__bpf_kfunc int bpf_kfunc_call_sock_sendmsg(struct sendmsg_args *args) -{ - struct msghdr msg = { - .msg_name = &args->addr.addr, - .msg_namelen = args->addr.addrlen, - }; - struct kvec iov; - int err; - - if (args->addr.addrlen > sizeof(args->addr.addr) || - args->msglen > sizeof(args->msg)) - return -EINVAL; - - iov.iov_base = args->msg; - iov.iov_len = args->msglen; - - iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, &iov, 1, args->msglen); - mutex_lock(&sock_lock); - - if (!sock) { - pr_err("%s called without initializing sock", __func__); - err = -EPERM; - goto out; - } - - err = sock_sendmsg(sock, &msg); - args->addr.addrlen = msg.msg_namelen; -out: - mutex_unlock(&sock_lock); - - return err; -} - -__bpf_kfunc int bpf_kfunc_call_kernel_getsockname(struct addr_args *args) -{ - int err; - - mutex_lock(&sock_lock); - - if (!sock) { - pr_err("%s called without initializing sock", __func__); - err = -EPERM; - goto out; - } - - err = kernel_getsockname(sock, (struct sockaddr *)&args->addr); - if (err < 0) - goto out; - - args->addrlen = err; - err = 0; -out: - mutex_unlock(&sock_lock); - - return err; -} - -__bpf_kfunc int bpf_kfunc_call_kernel_getpeername(struct addr_args *args) -{ - int err; - - mutex_lock(&sock_lock); - - if (!sock) { - pr_err("%s called without initializing sock", __func__); - err = -EPERM; - goto out; - } - - err = kernel_getpeername(sock, (struct sockaddr *)&args->addr); - if (err < 0) - goto out; - - args->addrlen = err; - err = 0; -out: - mutex_unlock(&sock_lock); - - return err; -} - -static DEFINE_MUTEX(st_ops_mutex); -static struct bpf_testmod_st_ops *st_ops; - -__bpf_kfunc int bpf_kfunc_st_ops_test_prologue(struct st_ops_args *args) -{ - int ret = -1; - - mutex_lock(&st_ops_mutex); - if (st_ops && st_ops->test_prologue) - ret = st_ops->test_prologue(args); - mutex_unlock(&st_ops_mutex); - - return ret; -} - -__bpf_kfunc int bpf_kfunc_st_ops_test_epilogue(struct st_ops_args *args) -{ - int ret = -1; - - mutex_lock(&st_ops_mutex); - if (st_ops && st_ops->test_epilogue) - ret = st_ops->test_epilogue(args); - mutex_unlock(&st_ops_mutex); - - return ret; -} - -__bpf_kfunc int bpf_kfunc_st_ops_test_pro_epilogue(struct st_ops_args *args) -{ - int ret = -1; - - mutex_lock(&st_ops_mutex); - if (st_ops && st_ops->test_pro_epilogue) - ret = st_ops->test_pro_epilogue(args); - mutex_unlock(&st_ops_mutex); - - return ret; -} - -__bpf_kfunc int bpf_kfunc_st_ops_inc10(struct st_ops_args *args) -{ - args->a += 10; - return args->a; -} - -BTF_KFUNCS_START(bpf_testmod_check_kfunc_ids) -BTF_ID_FLAGS(func, bpf_testmod_test_mod_kfunc) -BTF_ID_FLAGS(func, bpf_kfunc_call_test1) -BTF_ID_FLAGS(func, bpf_kfunc_call_test2) -BTF_ID_FLAGS(func, bpf_kfunc_call_test3) -BTF_ID_FLAGS(func, bpf_kfunc_call_test4) -BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_pass1) -BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail1) -BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail2) -BTF_ID_FLAGS(func, bpf_kfunc_call_test_acquire, KF_ACQUIRE | KF_RET_NULL) -BTF_ID_FLAGS(func, bpf_kfunc_call_memb_acquire, KF_ACQUIRE | KF_RET_NULL) -BTF_ID_FLAGS(func, bpf_kfunc_call_memb1_release, KF_RELEASE) -BTF_ID_FLAGS(func, bpf_kfunc_call_test_get_rdwr_mem, KF_RET_NULL) -BTF_ID_FLAGS(func, bpf_kfunc_call_test_get_rdonly_mem, KF_RET_NULL) -BTF_ID_FLAGS(func, bpf_kfunc_call_test_acq_rdonly_mem, KF_ACQUIRE | KF_RET_NULL) -BTF_ID_FLAGS(func, bpf_kfunc_call_int_mem_release, KF_RELEASE) -BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass_ctx) -BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass1) -BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass2) -BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail1) -BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail2) -BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail3) -BTF_ID_FLAGS(func, bpf_kfunc_call_test_ref, KF_TRUSTED_ARGS | KF_RCU) -BTF_ID_FLAGS(func, bpf_kfunc_call_test_destructive, KF_DESTRUCTIVE) -BTF_ID_FLAGS(func, bpf_kfunc_call_test_static_unused_arg) -BTF_ID_FLAGS(func, bpf_kfunc_call_test_offset) -BTF_ID_FLAGS(func, bpf_kfunc_call_test_sleepable, KF_SLEEPABLE) -BTF_ID_FLAGS(func, bpf_kfunc_init_sock, KF_SLEEPABLE) -BTF_ID_FLAGS(func, bpf_kfunc_close_sock, KF_SLEEPABLE) -BTF_ID_FLAGS(func, bpf_kfunc_call_kernel_connect, KF_SLEEPABLE) -BTF_ID_FLAGS(func, bpf_kfunc_call_kernel_bind, KF_SLEEPABLE) -BTF_ID_FLAGS(func, bpf_kfunc_call_kernel_listen, KF_SLEEPABLE) -BTF_ID_FLAGS(func, bpf_kfunc_call_kernel_sendmsg, KF_SLEEPABLE) -BTF_ID_FLAGS(func, bpf_kfunc_call_sock_sendmsg, KF_SLEEPABLE) -BTF_ID_FLAGS(func, bpf_kfunc_call_kernel_getsockname, KF_SLEEPABLE) -BTF_ID_FLAGS(func, bpf_kfunc_call_kernel_getpeername, KF_SLEEPABLE) -BTF_ID_FLAGS(func, bpf_kfunc_st_ops_test_prologue, KF_TRUSTED_ARGS | KF_SLEEPABLE) -BTF_ID_FLAGS(func, bpf_kfunc_st_ops_test_epilogue, KF_TRUSTED_ARGS | KF_SLEEPABLE) -BTF_ID_FLAGS(func, bpf_kfunc_st_ops_test_pro_epilogue, KF_TRUSTED_ARGS | KF_SLEEPABLE) -BTF_ID_FLAGS(func, bpf_kfunc_st_ops_inc10, KF_TRUSTED_ARGS) -BTF_KFUNCS_END(bpf_testmod_check_kfunc_ids) - -static int bpf_testmod_ops_init(struct btf *btf) -{ - return 0; -} - -static bool bpf_testmod_ops_is_valid_access(int off, int size, - enum bpf_access_type type, - const struct bpf_prog *prog, - struct bpf_insn_access_aux *info) -{ - return bpf_tracing_btf_ctx_access(off, size, type, prog, info); -} - -static int bpf_testmod_ops_init_member(const struct btf_type *t, - const struct btf_member *member, - void *kdata, const void *udata) -{ - if (member->offset == offsetof(struct bpf_testmod_ops, data) * 8) { - /* For data fields, this function has to copy it and return - * 1 to indicate that the data has been handled by the - * struct_ops type, or the verifier will reject the map if - * the value of the data field is not zero. - */ - ((struct bpf_testmod_ops *)kdata)->data = ((struct bpf_testmod_ops *)udata)->data; - return 1; - } - return 0; -} - -static const struct btf_kfunc_id_set bpf_testmod_kfunc_set = { - .owner = THIS_MODULE, - .set = &bpf_testmod_check_kfunc_ids, -}; - -static const struct bpf_verifier_ops bpf_testmod_verifier_ops = { - .is_valid_access = bpf_testmod_ops_is_valid_access, -}; - -static const struct bpf_verifier_ops bpf_testmod_verifier_ops3 = { - .is_valid_access = bpf_testmod_ops_is_valid_access, -}; - -static int bpf_dummy_reg(void *kdata, struct bpf_link *link) -{ - struct bpf_testmod_ops *ops = kdata; - - if (ops->test_1) - ops->test_1(); - /* Some test cases (ex. struct_ops_maybe_null) may not have test_2 - * initialized, so we need to check for NULL. - */ - if (ops->test_2) - ops->test_2(4, ops->data); - - return 0; -} - -static void bpf_dummy_unreg(void *kdata, struct bpf_link *link) -{ -} - -static int bpf_testmod_test_1(void) -{ - return 0; -} - -static void bpf_testmod_test_2(int a, int b) -{ -} - -static int bpf_testmod_tramp(int value) -{ - return 0; -} - -static int bpf_testmod_ops__test_maybe_null(int dummy, - struct task_struct *task__nullable) -{ - return 0; -} - -static struct bpf_testmod_ops __bpf_testmod_ops = { - .test_1 = bpf_testmod_test_1, - .test_2 = bpf_testmod_test_2, - .test_maybe_null = bpf_testmod_ops__test_maybe_null, -}; - -struct bpf_struct_ops bpf_bpf_testmod_ops = { - .verifier_ops = &bpf_testmod_verifier_ops, - .init = bpf_testmod_ops_init, - .init_member = bpf_testmod_ops_init_member, - .reg = bpf_dummy_reg, - .unreg = bpf_dummy_unreg, - .cfi_stubs = &__bpf_testmod_ops, - .name = "bpf_testmod_ops", - .owner = THIS_MODULE, -}; - -static int bpf_dummy_reg2(void *kdata, struct bpf_link *link) -{ - struct bpf_testmod_ops2 *ops = kdata; - - ops->test_1(); - return 0; -} - -static struct bpf_testmod_ops2 __bpf_testmod_ops2 = { - .test_1 = bpf_testmod_test_1, -}; - -struct bpf_struct_ops bpf_testmod_ops2 = { - .verifier_ops = &bpf_testmod_verifier_ops, - .init = bpf_testmod_ops_init, - .init_member = bpf_testmod_ops_init_member, - .reg = bpf_dummy_reg2, - .unreg = bpf_dummy_unreg, - .cfi_stubs = &__bpf_testmod_ops2, - .name = "bpf_testmod_ops2", - .owner = THIS_MODULE, -}; - -static int st_ops3_reg(void *kdata, struct bpf_link *link) -{ - int err = 0; - - mutex_lock(&st_ops_mutex); - if (st_ops3) { - pr_err("st_ops has already been registered\n"); - err = -EEXIST; - goto unlock; - } - st_ops3 = kdata; - -unlock: - mutex_unlock(&st_ops_mutex); - return err; -} - -static void st_ops3_unreg(void *kdata, struct bpf_link *link) -{ - mutex_lock(&st_ops_mutex); - st_ops3 = NULL; - mutex_unlock(&st_ops_mutex); -} - -static void test_1_recursion_detected(struct bpf_prog *prog) -{ - struct bpf_prog_stats *stats; - - stats = this_cpu_ptr(prog->stats); - printk("bpf_testmod: oh no, recursing into test_1, recursion_misses %llu", - u64_stats_read(&stats->misses)); -} - -static int st_ops3_check_member(const struct btf_type *t, - const struct btf_member *member, - const struct bpf_prog *prog) -{ - u32 moff = __btf_member_bit_offset(t, member) / 8; - - switch (moff) { - case offsetof(struct bpf_testmod_ops3, test_1): - prog->aux->priv_stack_requested = true; - prog->aux->recursion_detected = test_1_recursion_detected; - fallthrough; - default: - break; - } - return 0; -} - -struct bpf_struct_ops bpf_testmod_ops3 = { - .verifier_ops = &bpf_testmod_verifier_ops3, - .init = bpf_testmod_ops_init, - .init_member = bpf_testmod_ops_init_member, - .reg = st_ops3_reg, - .unreg = st_ops3_unreg, - .check_member = st_ops3_check_member, - .cfi_stubs = &__bpf_testmod_ops3, - .name = "bpf_testmod_ops3", - .owner = THIS_MODULE, -}; - -static int bpf_test_mod_st_ops__test_prologue(struct st_ops_args *args) -{ - return 0; -} - -static int bpf_test_mod_st_ops__test_epilogue(struct st_ops_args *args) -{ - return 0; -} - -static int bpf_test_mod_st_ops__test_pro_epilogue(struct st_ops_args *args) -{ - return 0; -} - -static int st_ops_gen_prologue(struct bpf_insn *insn_buf, bool direct_write, - const struct bpf_prog *prog) -{ - struct bpf_insn *insn = insn_buf; - - if (strcmp(prog->aux->attach_func_name, "test_prologue") && - strcmp(prog->aux->attach_func_name, "test_pro_epilogue")) - return 0; - - /* r6 = r1[0]; // r6 will be "struct st_ops *args". r1 is "u64 *ctx". - * r7 = r6->a; - * r7 += 1000; - * r6->a = r7; - */ - *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0); - *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_6, offsetof(struct st_ops_args, a)); - *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 1000); - *insn++ = BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_7, offsetof(struct st_ops_args, a)); - *insn++ = prog->insnsi[0]; - - return insn - insn_buf; -} - -static int st_ops_gen_epilogue(struct bpf_insn *insn_buf, const struct bpf_prog *prog, - s16 ctx_stack_off) -{ - struct bpf_insn *insn = insn_buf; - - if (strcmp(prog->aux->attach_func_name, "test_epilogue") && - strcmp(prog->aux->attach_func_name, "test_pro_epilogue")) - return 0; - - /* r1 = stack[ctx_stack_off]; // r1 will be "u64 *ctx" - * r1 = r1[0]; // r1 will be "struct st_ops *args" - * r6 = r1->a; - * r6 += 10000; - * r1->a = r6; - * r0 = r6; - * r0 *= 2; - * BPF_EXIT; - */ - *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_FP, ctx_stack_off); - *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0); - *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, offsetof(struct st_ops_args, a)); - *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 10000); - *insn++ = BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, offsetof(struct st_ops_args, a)); - *insn++ = BPF_MOV64_REG(BPF_REG_0, BPF_REG_6); - *insn++ = BPF_ALU64_IMM(BPF_MUL, BPF_REG_0, 2); - *insn++ = BPF_EXIT_INSN(); - - return insn - insn_buf; -} - -static int st_ops_btf_struct_access(struct bpf_verifier_log *log, - const struct bpf_reg_state *reg, - int off, int size) -{ - if (off < 0 || off + size > sizeof(struct st_ops_args)) - return -EACCES; - return 0; -} - -static const struct bpf_verifier_ops st_ops_verifier_ops = { - .is_valid_access = bpf_testmod_ops_is_valid_access, - .btf_struct_access = st_ops_btf_struct_access, - .gen_prologue = st_ops_gen_prologue, - .gen_epilogue = st_ops_gen_epilogue, - .get_func_proto = bpf_base_func_proto, -}; - -static struct bpf_testmod_st_ops st_ops_cfi_stubs = { - .test_prologue = bpf_test_mod_st_ops__test_prologue, - .test_epilogue = bpf_test_mod_st_ops__test_epilogue, - .test_pro_epilogue = bpf_test_mod_st_ops__test_pro_epilogue, -}; - -static int st_ops_reg(void *kdata, struct bpf_link *link) -{ - int err = 0; - - mutex_lock(&st_ops_mutex); - if (st_ops) { - pr_err("st_ops has already been registered\n"); - err = -EEXIST; - goto unlock; - } - st_ops = kdata; - -unlock: - mutex_unlock(&st_ops_mutex); - return err; -} - -static void st_ops_unreg(void *kdata, struct bpf_link *link) -{ - mutex_lock(&st_ops_mutex); - st_ops = NULL; - mutex_unlock(&st_ops_mutex); -} - -static int st_ops_init(struct btf *btf) -{ - return 0; -} - -static int st_ops_init_member(const struct btf_type *t, - const struct btf_member *member, - void *kdata, const void *udata) -{ - return 0; -} - -static struct bpf_struct_ops testmod_st_ops = { - .verifier_ops = &st_ops_verifier_ops, - .init = st_ops_init, - .init_member = st_ops_init_member, - .reg = st_ops_reg, - .unreg = st_ops_unreg, - .cfi_stubs = &st_ops_cfi_stubs, - .name = "bpf_testmod_st_ops", - .owner = THIS_MODULE, -}; - -extern int bpf_fentry_test1(int a); - -static int bpf_testmod_init(void) -{ - const struct btf_id_dtor_kfunc bpf_testmod_dtors[] = { - { - .btf_id = bpf_testmod_dtor_ids[0], - .kfunc_btf_id = bpf_testmod_dtor_ids[1] - }, - }; - void **tramp; - int ret; - - ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC, &bpf_testmod_common_kfunc_set); - ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_testmod_kfunc_set); - ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_testmod_kfunc_set); - ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SYSCALL, &bpf_testmod_kfunc_set); - ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &bpf_testmod_kfunc_set); - ret = ret ?: register_bpf_struct_ops(&bpf_bpf_testmod_ops, bpf_testmod_ops); - ret = ret ?: register_bpf_struct_ops(&bpf_testmod_ops2, bpf_testmod_ops2); - ret = ret ?: register_bpf_struct_ops(&bpf_testmod_ops3, bpf_testmod_ops3); - ret = ret ?: register_bpf_struct_ops(&testmod_st_ops, bpf_testmod_st_ops); - ret = ret ?: register_btf_id_dtor_kfuncs(bpf_testmod_dtors, - ARRAY_SIZE(bpf_testmod_dtors), - THIS_MODULE); - if (ret < 0) - return ret; - if (bpf_fentry_test1(0) < 0) - return -EINVAL; - sock = NULL; - mutex_init(&sock_lock); - ret = sysfs_create_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file); - if (ret < 0) - return ret; - ret = register_bpf_testmod_uprobe(); - if (ret < 0) - return ret; - - /* Ensure nothing is between tramp_1..tramp_40 */ - BUILD_BUG_ON(offsetof(struct bpf_testmod_ops, tramp_1) + 40 * sizeof(long) != - offsetofend(struct bpf_testmod_ops, tramp_40)); - tramp = (void **)&__bpf_testmod_ops.tramp_1; - while (tramp <= (void **)&__bpf_testmod_ops.tramp_40) - *tramp++ = bpf_testmod_tramp; - - return 0; -} - -static void bpf_testmod_exit(void) -{ - /* Need to wait for all references to be dropped because - * bpf_kfunc_call_test_release() which currently resides in kernel can - * be called after bpf_testmod is unloaded. Once release function is - * moved into the module this wait can be removed. - */ - while (refcount_read(&prog_test_struct.cnt) > 1) - msleep(20); - - bpf_kfunc_close_sock(); - sysfs_remove_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file); - unregister_bpf_testmod_uprobe(); -} - -module_init(bpf_testmod_init); -module_exit(bpf_testmod_exit); - -MODULE_AUTHOR("Andrii Nakryiko"); -MODULE_DESCRIPTION("BPF selftests module"); -MODULE_LICENSE("Dual BSD/GPL"); diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h deleted file mode 100644 index 356803d1c10e..000000000000 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h +++ /dev/null @@ -1,113 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (c) 2020 Facebook */ -#ifndef _BPF_TESTMOD_H -#define _BPF_TESTMOD_H - -#include - -struct task_struct; - -struct bpf_testmod_test_read_ctx { - char *buf; - loff_t off; - size_t len; -}; - -struct bpf_testmod_test_write_ctx { - char *buf; - loff_t off; - size_t len; -}; - -struct bpf_testmod_test_writable_ctx { - bool early_ret; - int val; -}; - -/* BPF iter that returns *value* *n* times in a row */ -struct bpf_iter_testmod_seq { - s64 value; - int cnt; -}; - -struct bpf_testmod_ops { - int (*test_1)(void); - void (*test_2)(int a, int b); - /* Used to test nullable arguments. */ - int (*test_maybe_null)(int dummy, struct task_struct *task); - int (*unsupported_ops)(void); - - /* The following fields are used to test shadow copies. */ - char onebyte; - struct { - int a; - int b; - } unsupported; - int data; - - /* The following pointers are used to test the maps having multiple - * pages of trampolines. - */ - int (*tramp_1)(int value); - int (*tramp_2)(int value); - int (*tramp_3)(int value); - int (*tramp_4)(int value); - int (*tramp_5)(int value); - int (*tramp_6)(int value); - int (*tramp_7)(int value); - int (*tramp_8)(int value); - int (*tramp_9)(int value); - int (*tramp_10)(int value); - int (*tramp_11)(int value); - int (*tramp_12)(int value); - int (*tramp_13)(int value); - int (*tramp_14)(int value); - int (*tramp_15)(int value); - int (*tramp_16)(int value); - int (*tramp_17)(int value); - int (*tramp_18)(int value); - int (*tramp_19)(int value); - int (*tramp_20)(int value); - int (*tramp_21)(int value); - int (*tramp_22)(int value); - int (*tramp_23)(int value); - int (*tramp_24)(int value); - int (*tramp_25)(int value); - int (*tramp_26)(int value); - int (*tramp_27)(int value); - int (*tramp_28)(int value); - int (*tramp_29)(int value); - int (*tramp_30)(int value); - int (*tramp_31)(int value); - int (*tramp_32)(int value); - int (*tramp_33)(int value); - int (*tramp_34)(int value); - int (*tramp_35)(int value); - int (*tramp_36)(int value); - int (*tramp_37)(int value); - int (*tramp_38)(int value); - int (*tramp_39)(int value); - int (*tramp_40)(int value); -}; - -struct bpf_testmod_ops2 { - int (*test_1)(void); -}; - -struct bpf_testmod_ops3 { - int (*test_1)(void); - int (*test_2)(void); -}; - -struct st_ops_args { - u64 a; -}; - -struct bpf_testmod_st_ops { - int (*test_prologue)(struct st_ops_args *args); - int (*test_epilogue)(struct st_ops_args *args); - int (*test_pro_epilogue)(struct st_ops_args *args); - struct module *owner; -}; - -#endif /* _BPF_TESTMOD_H */ diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h deleted file mode 100644 index b58817938deb..000000000000 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h +++ /dev/null @@ -1,162 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -#ifndef _BPF_TESTMOD_KFUNC_H -#define _BPF_TESTMOD_KFUNC_H - -#ifndef __KERNEL__ -#include -#include -#else -#define __ksym -struct prog_test_member1 { - int a; -}; - -struct prog_test_member { - struct prog_test_member1 m; - int c; -}; - -struct prog_test_ref_kfunc { - int a; - int b; - struct prog_test_member memb; - struct prog_test_ref_kfunc *next; - refcount_t cnt; -}; -#endif - -struct prog_test_pass1 { - int x0; - struct { - int x1; - struct { - int x2; - struct { - int x3; - }; - }; - }; -}; - -struct prog_test_pass2 { - int len; - short arr1[4]; - struct { - char arr2[4]; - unsigned long arr3[8]; - } x; -}; - -struct prog_test_fail1 { - void *p; - int x; -}; - -struct prog_test_fail2 { - int x8; - struct prog_test_pass1 x; -}; - -struct prog_test_fail3 { - int len; - char arr1[2]; - char arr2[]; -}; - -struct init_sock_args { - int af; - int type; -}; - -struct addr_args { - char addr[sizeof(struct __kernel_sockaddr_storage)]; - int addrlen; -}; - -struct sendmsg_args { - struct addr_args addr; - char msg[10]; - int msglen; -}; - -struct bpf_testmod_ctx { - struct callback_head rcu; - refcount_t usage; -}; - -struct prog_test_ref_kfunc * -bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr) __ksym; -void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym; -void bpf_kfunc_call_test_ref(struct prog_test_ref_kfunc *p) __ksym; - -void bpf_kfunc_call_test_mem_len_pass1(void *mem, int len) __ksym; -int *bpf_kfunc_call_test_get_rdwr_mem(struct prog_test_ref_kfunc *p, const int rdwr_buf_size) __ksym; -int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size) __ksym; -int *bpf_kfunc_call_test_acq_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size) __ksym; -void bpf_kfunc_call_int_mem_release(int *p) __ksym; - -/* The bpf_kfunc_call_test_static_unused_arg is defined as static, - * but bpf program compilation needs to see it as global symbol. - */ -#ifndef __KERNEL__ -u32 bpf_kfunc_call_test_static_unused_arg(u32 arg, u32 unused) __ksym; -#endif - -void bpf_testmod_test_mod_kfunc(int i) __ksym; - -__u64 bpf_kfunc_call_test1(struct sock *sk, __u32 a, __u64 b, - __u32 c, __u64 d) __ksym; -int bpf_kfunc_call_test2(struct sock *sk, __u32 a, __u32 b) __ksym; -struct sock *bpf_kfunc_call_test3(struct sock *sk) __ksym; -long bpf_kfunc_call_test4(signed char a, short b, int c, long d) __ksym; - -void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb) __ksym; -void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p) __ksym; -void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p) __ksym; -void bpf_kfunc_call_test_mem_len_fail2(__u64 *mem, int len) __ksym; - -void bpf_kfunc_call_test_destructive(void) __ksym; -void bpf_kfunc_call_test_sleepable(void) __ksym; - -void bpf_kfunc_call_test_offset(struct prog_test_ref_kfunc *p); -struct prog_test_member *bpf_kfunc_call_memb_acquire(void); -void bpf_kfunc_call_memb1_release(struct prog_test_member1 *p); -void bpf_kfunc_call_test_fail1(struct prog_test_fail1 *p); -void bpf_kfunc_call_test_fail2(struct prog_test_fail2 *p); -void bpf_kfunc_call_test_fail3(struct prog_test_fail3 *p); -void bpf_kfunc_call_test_mem_len_fail1(void *mem, int len); - -void bpf_kfunc_common_test(void) __ksym; - -int bpf_kfunc_init_sock(struct init_sock_args *args) __ksym; -void bpf_kfunc_close_sock(void) __ksym; -int bpf_kfunc_call_kernel_connect(struct addr_args *args) __ksym; -int bpf_kfunc_call_kernel_bind(struct addr_args *args) __ksym; -int bpf_kfunc_call_kernel_listen(void) __ksym; -int bpf_kfunc_call_kernel_sendmsg(struct sendmsg_args *args) __ksym; -int bpf_kfunc_call_sock_sendmsg(struct sendmsg_args *args) __ksym; -int bpf_kfunc_call_kernel_getsockname(struct addr_args *args) __ksym; -int bpf_kfunc_call_kernel_getpeername(struct addr_args *args) __ksym; - -void bpf_kfunc_dynptr_test(struct bpf_dynptr *ptr, struct bpf_dynptr *ptr__nullable) __ksym; - -struct bpf_testmod_ctx *bpf_testmod_ctx_create(int *err) __ksym; -void bpf_testmod_ctx_release(struct bpf_testmod_ctx *ctx) __ksym; - -struct sk_buff *bpf_kfunc_nested_acquire_nonzero_offset_test(struct sk_buff_head *ptr) __ksym; -struct sk_buff *bpf_kfunc_nested_acquire_zero_offset_test(struct sock_common *ptr) __ksym; -void bpf_kfunc_nested_release_test(struct sk_buff *ptr) __ksym; - -struct st_ops_args; -int bpf_kfunc_st_ops_test_prologue(struct st_ops_args *args) __ksym; -int bpf_kfunc_st_ops_test_epilogue(struct st_ops_args *args) __ksym; -int bpf_kfunc_st_ops_test_pro_epilogue(struct st_ops_args *args) __ksym; -int bpf_kfunc_st_ops_inc10(struct st_ops_args *args) __ksym; - -void bpf_kfunc_trusted_vma_test(struct vm_area_struct *ptr) __ksym; -void bpf_kfunc_trusted_task_test(struct task_struct *ptr) __ksym; -void bpf_kfunc_trusted_num_test(int *ptr) __ksym; -void bpf_kfunc_rcu_task_test(struct task_struct *ptr) __ksym; - -#endif /* _BPF_TESTMOD_KFUNC_H */ diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c index 1c682550e0e7..e10ea92c3fe2 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c +++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c @@ -2,7 +2,7 @@ #define _GNU_SOURCE #include #include "progs/core_reloc_types.h" -#include "bpf_testmod/bpf_testmod.h" +#include "test_kmods/bpf_testmod.h" #include #include #include diff --git a/tools/testing/selftests/bpf/progs/bad_struct_ops.c b/tools/testing/selftests/bpf/progs/bad_struct_ops.c index b7e175cd0af0..b3f77b4561c8 100644 --- a/tools/testing/selftests/bpf/progs/bad_struct_ops.c +++ b/tools/testing/selftests/bpf/progs/bad_struct_ops.c @@ -3,7 +3,7 @@ #include #include #include -#include "../bpf_testmod/bpf_testmod.h" +#include "../test_kmods/bpf_testmod.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/cb_refs.c b/tools/testing/selftests/bpf/progs/cb_refs.c index 56c764df8196..5d6fc7f01ebb 100644 --- a/tools/testing/selftests/bpf/progs/cb_refs.c +++ b/tools/testing/selftests/bpf/progs/cb_refs.c @@ -2,7 +2,7 @@ #include #include #include -#include "../bpf_testmod/bpf_testmod_kfunc.h" +#include "../test_kmods/bpf_testmod_kfunc.h" struct map_value { struct prog_test_ref_kfunc __kptr *ptr; diff --git a/tools/testing/selftests/bpf/progs/epilogue_exit.c b/tools/testing/selftests/bpf/progs/epilogue_exit.c index 33d3a57bee90..35fec7c75bef 100644 --- a/tools/testing/selftests/bpf/progs/epilogue_exit.c +++ b/tools/testing/selftests/bpf/progs/epilogue_exit.c @@ -4,8 +4,8 @@ #include #include #include "bpf_misc.h" -#include "../bpf_testmod/bpf_testmod.h" -#include "../bpf_testmod/bpf_testmod_kfunc.h" +#include "../test_kmods/bpf_testmod.h" +#include "../test_kmods/bpf_testmod_kfunc.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/epilogue_tailcall.c b/tools/testing/selftests/bpf/progs/epilogue_tailcall.c index 7275dd594de0..153514691ba4 100644 --- a/tools/testing/selftests/bpf/progs/epilogue_tailcall.c +++ b/tools/testing/selftests/bpf/progs/epilogue_tailcall.c @@ -4,8 +4,8 @@ #include #include #include "bpf_misc.h" -#include "../bpf_testmod/bpf_testmod.h" -#include "../bpf_testmod/bpf_testmod_kfunc.h" +#include "../test_kmods/bpf_testmod.h" +#include "../test_kmods/bpf_testmod_kfunc.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/iters_testmod.c b/tools/testing/selftests/bpf/progs/iters_testmod.c index df1d3db60b1b..9e4b45201e69 100644 --- a/tools/testing/selftests/bpf/progs/iters_testmod.c +++ b/tools/testing/selftests/bpf/progs/iters_testmod.c @@ -4,7 +4,7 @@ #include "bpf_experimental.h" #include #include "bpf_misc.h" -#include "../bpf_testmod/bpf_testmod_kfunc.h" +#include "../test_kmods/bpf_testmod_kfunc.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/jit_probe_mem.c b/tools/testing/selftests/bpf/progs/jit_probe_mem.c index f9789e668297..82190d79de37 100644 --- a/tools/testing/selftests/bpf/progs/jit_probe_mem.c +++ b/tools/testing/selftests/bpf/progs/jit_probe_mem.c @@ -3,7 +3,7 @@ #include #include #include -#include "../bpf_testmod/bpf_testmod_kfunc.h" +#include "../test_kmods/bpf_testmod_kfunc.h" static struct prog_test_ref_kfunc __kptr *v; long total_sum = -1; diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_destructive.c b/tools/testing/selftests/bpf/progs/kfunc_call_destructive.c index 7632d9ecb253..b9670e9a6e3d 100644 --- a/tools/testing/selftests/bpf/progs/kfunc_call_destructive.c +++ b/tools/testing/selftests/bpf/progs/kfunc_call_destructive.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include #include -#include "../bpf_testmod/bpf_testmod_kfunc.h" +#include "../test_kmods/bpf_testmod_kfunc.h" SEC("tc") int kfunc_destructive_test(void) diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_fail.c b/tools/testing/selftests/bpf/progs/kfunc_call_fail.c index 08fae306539c..a1963497f0bf 100644 --- a/tools/testing/selftests/bpf/progs/kfunc_call_fail.c +++ b/tools/testing/selftests/bpf/progs/kfunc_call_fail.c @@ -2,7 +2,7 @@ /* Copyright (c) 2021 Facebook */ #include #include -#include "../bpf_testmod/bpf_testmod_kfunc.h" +#include "../test_kmods/bpf_testmod_kfunc.h" struct syscall_test_args { __u8 data[16]; diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_race.c b/tools/testing/selftests/bpf/progs/kfunc_call_race.c index d532af07decf..48f64827cd93 100644 --- a/tools/testing/selftests/bpf/progs/kfunc_call_race.c +++ b/tools/testing/selftests/bpf/progs/kfunc_call_race.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include #include -#include "../bpf_testmod/bpf_testmod_kfunc.h" +#include "../test_kmods/bpf_testmod_kfunc.h" SEC("tc") int kfunc_call_fail(struct __sk_buff *ctx) diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_test.c b/tools/testing/selftests/bpf/progs/kfunc_call_test.c index f502f755f567..8b86113a0126 100644 --- a/tools/testing/selftests/bpf/progs/kfunc_call_test.c +++ b/tools/testing/selftests/bpf/progs/kfunc_call_test.c @@ -2,7 +2,7 @@ /* Copyright (c) 2021 Facebook */ #include #include -#include "../bpf_testmod/bpf_testmod_kfunc.h" +#include "../test_kmods/bpf_testmod_kfunc.h" SEC("tc") int kfunc_call_test4(struct __sk_buff *skb) diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c b/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c index 2380c75e74ce..8e150e85b50d 100644 --- a/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c +++ b/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2021 Facebook */ -#include "../bpf_testmod/bpf_testmod_kfunc.h" +#include "../test_kmods/bpf_testmod_kfunc.h" extern const int bpf_prog_active __ksym; int active_res = -1; diff --git a/tools/testing/selftests/bpf/progs/local_kptr_stash.c b/tools/testing/selftests/bpf/progs/local_kptr_stash.c index b092a72b2c9d..d736506a4c80 100644 --- a/tools/testing/selftests/bpf/progs/local_kptr_stash.c +++ b/tools/testing/selftests/bpf/progs/local_kptr_stash.c @@ -6,7 +6,7 @@ #include #include #include "../bpf_experimental.h" -#include "../bpf_testmod/bpf_testmod_kfunc.h" +#include "../test_kmods/bpf_testmod_kfunc.h" struct plain_local; diff --git a/tools/testing/selftests/bpf/progs/map_kptr.c b/tools/testing/selftests/bpf/progs/map_kptr.c index ab0ce1d01a4a..edaba481db9d 100644 --- a/tools/testing/selftests/bpf/progs/map_kptr.c +++ b/tools/testing/selftests/bpf/progs/map_kptr.c @@ -2,7 +2,7 @@ #include #include #include -#include "../bpf_testmod/bpf_testmod_kfunc.h" +#include "../test_kmods/bpf_testmod_kfunc.h" struct map_value { struct prog_test_ref_kfunc __kptr_untrusted *unref_ptr; diff --git a/tools/testing/selftests/bpf/progs/map_kptr_fail.c b/tools/testing/selftests/bpf/progs/map_kptr_fail.c index 450bb373b179..c2a6bd392e48 100644 --- a/tools/testing/selftests/bpf/progs/map_kptr_fail.c +++ b/tools/testing/selftests/bpf/progs/map_kptr_fail.c @@ -4,7 +4,7 @@ #include #include #include "bpf_misc.h" -#include "../bpf_testmod/bpf_testmod_kfunc.h" +#include "../test_kmods/bpf_testmod_kfunc.h" struct map_value { char buf[8]; diff --git a/tools/testing/selftests/bpf/progs/missed_kprobe.c b/tools/testing/selftests/bpf/progs/missed_kprobe.c index 7f9ef701f5de..51a4fe64c917 100644 --- a/tools/testing/selftests/bpf/progs/missed_kprobe.c +++ b/tools/testing/selftests/bpf/progs/missed_kprobe.c @@ -2,7 +2,7 @@ #include "vmlinux.h" #include #include -#include "../bpf_testmod/bpf_testmod_kfunc.h" +#include "../test_kmods/bpf_testmod_kfunc.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/missed_kprobe_recursion.c b/tools/testing/selftests/bpf/progs/missed_kprobe_recursion.c index 8ea71cbd6c45..c4bf679a9876 100644 --- a/tools/testing/selftests/bpf/progs/missed_kprobe_recursion.c +++ b/tools/testing/selftests/bpf/progs/missed_kprobe_recursion.c @@ -2,7 +2,7 @@ #include "vmlinux.h" #include #include -#include "../bpf_testmod/bpf_testmod_kfunc.h" +#include "../test_kmods/bpf_testmod_kfunc.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/nested_acquire.c b/tools/testing/selftests/bpf/progs/nested_acquire.c index 8e521a21d995..49ad7b9adf56 100644 --- a/tools/testing/selftests/bpf/progs/nested_acquire.c +++ b/tools/testing/selftests/bpf/progs/nested_acquire.c @@ -4,7 +4,7 @@ #include #include #include "bpf_misc.h" -#include "../bpf_testmod/bpf_testmod_kfunc.h" +#include "../test_kmods/bpf_testmod_kfunc.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/pro_epilogue.c b/tools/testing/selftests/bpf/progs/pro_epilogue.c index 44bc3f06b4b6..d97d6e07ef5c 100644 --- a/tools/testing/selftests/bpf/progs/pro_epilogue.c +++ b/tools/testing/selftests/bpf/progs/pro_epilogue.c @@ -4,8 +4,8 @@ #include #include #include "bpf_misc.h" -#include "../bpf_testmod/bpf_testmod.h" -#include "../bpf_testmod/bpf_testmod_kfunc.h" +#include "../test_kmods/bpf_testmod.h" +#include "../test_kmods/bpf_testmod_kfunc.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/pro_epilogue_goto_start.c b/tools/testing/selftests/bpf/progs/pro_epilogue_goto_start.c index 3529e53be355..6048d79be48b 100644 --- a/tools/testing/selftests/bpf/progs/pro_epilogue_goto_start.c +++ b/tools/testing/selftests/bpf/progs/pro_epilogue_goto_start.c @@ -4,8 +4,8 @@ #include #include #include "bpf_misc.h" -#include "../bpf_testmod/bpf_testmod.h" -#include "../bpf_testmod/bpf_testmod_kfunc.h" +#include "../test_kmods/bpf_testmod.h" +#include "../test_kmods/bpf_testmod_kfunc.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/sock_addr_kern.c b/tools/testing/selftests/bpf/progs/sock_addr_kern.c index 8386bb15ccdc..84ad515eafd6 100644 --- a/tools/testing/selftests/bpf/progs/sock_addr_kern.c +++ b/tools/testing/selftests/bpf/progs/sock_addr_kern.c @@ -2,7 +2,7 @@ /* Copyright (c) 2024 Google LLC */ #include #include -#include "../bpf_testmod/bpf_testmod_kfunc.h" +#include "../test_kmods/bpf_testmod_kfunc.h" SEC("syscall") int init_sock(struct init_sock_args *args) diff --git a/tools/testing/selftests/bpf/progs/struct_ops_detach.c b/tools/testing/selftests/bpf/progs/struct_ops_detach.c index d7fdcabe7d90..284a5b008e0c 100644 --- a/tools/testing/selftests/bpf/progs/struct_ops_detach.c +++ b/tools/testing/selftests/bpf/progs/struct_ops_detach.c @@ -2,7 +2,7 @@ /* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ #include #include -#include "../bpf_testmod/bpf_testmod.h" +#include "../test_kmods/bpf_testmod.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/struct_ops_forgotten_cb.c b/tools/testing/selftests/bpf/progs/struct_ops_forgotten_cb.c index 3c822103bd40..d8cc99f5c2e2 100644 --- a/tools/testing/selftests/bpf/progs/struct_ops_forgotten_cb.c +++ b/tools/testing/selftests/bpf/progs/struct_ops_forgotten_cb.c @@ -2,7 +2,7 @@ /* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ #include #include -#include "../bpf_testmod/bpf_testmod.h" +#include "../test_kmods/bpf_testmod.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/struct_ops_maybe_null.c b/tools/testing/selftests/bpf/progs/struct_ops_maybe_null.c index b450f72e744a..ccab3935aa42 100644 --- a/tools/testing/selftests/bpf/progs/struct_ops_maybe_null.c +++ b/tools/testing/selftests/bpf/progs/struct_ops_maybe_null.c @@ -2,7 +2,7 @@ /* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ #include #include -#include "../bpf_testmod/bpf_testmod.h" +#include "../test_kmods/bpf_testmod.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/struct_ops_maybe_null_fail.c b/tools/testing/selftests/bpf/progs/struct_ops_maybe_null_fail.c index 6283099ec383..8b5515f4f724 100644 --- a/tools/testing/selftests/bpf/progs/struct_ops_maybe_null_fail.c +++ b/tools/testing/selftests/bpf/progs/struct_ops_maybe_null_fail.c @@ -2,7 +2,7 @@ /* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ #include #include -#include "../bpf_testmod/bpf_testmod.h" +#include "../test_kmods/bpf_testmod.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/struct_ops_module.c b/tools/testing/selftests/bpf/progs/struct_ops_module.c index 4c56d4a9d9f4..71c420c3a5a6 100644 --- a/tools/testing/selftests/bpf/progs/struct_ops_module.c +++ b/tools/testing/selftests/bpf/progs/struct_ops_module.c @@ -3,7 +3,7 @@ #include #include #include -#include "../bpf_testmod/bpf_testmod.h" +#include "../test_kmods/bpf_testmod.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/struct_ops_multi_pages.c b/tools/testing/selftests/bpf/progs/struct_ops_multi_pages.c index 9efcc6e4d356..5b23ea817f1f 100644 --- a/tools/testing/selftests/bpf/progs/struct_ops_multi_pages.c +++ b/tools/testing/selftests/bpf/progs/struct_ops_multi_pages.c @@ -3,7 +3,7 @@ #include #include #include -#include "../bpf_testmod/bpf_testmod.h" +#include "../test_kmods/bpf_testmod.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/struct_ops_nulled_out_cb.c b/tools/testing/selftests/bpf/progs/struct_ops_nulled_out_cb.c index fa2021388485..5d0937fa07be 100644 --- a/tools/testing/selftests/bpf/progs/struct_ops_nulled_out_cb.c +++ b/tools/testing/selftests/bpf/progs/struct_ops_nulled_out_cb.c @@ -2,7 +2,7 @@ /* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ #include #include -#include "../bpf_testmod/bpf_testmod.h" +#include "../test_kmods/bpf_testmod.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c b/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c index 8ea57e5348ab..0e4d2ff63ab8 100644 --- a/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c +++ b/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c @@ -3,7 +3,7 @@ #include #include #include -#include "../bpf_testmod/bpf_testmod.h" +#include "../test_kmods/bpf_testmod.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c index 1f55ec4cee37..58d5d8dc2235 100644 --- a/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c +++ b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c @@ -3,7 +3,7 @@ #include #include #include -#include "../bpf_testmod/bpf_testmod.h" +#include "../test_kmods/bpf_testmod.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c index f2f300d50988..31e58389bb8b 100644 --- a/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c +++ b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c @@ -3,7 +3,7 @@ #include #include #include -#include "../bpf_testmod/bpf_testmod.h" +#include "../test_kmods/bpf_testmod.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_kfunc_param_nullable.c b/tools/testing/selftests/bpf/progs/test_kfunc_param_nullable.c index 7ac7e1de34d8..0ad1bf1ede8d 100644 --- a/tools/testing/selftests/bpf/progs/test_kfunc_param_nullable.c +++ b/tools/testing/selftests/bpf/progs/test_kfunc_param_nullable.c @@ -4,7 +4,7 @@ #include #include "bpf_misc.h" #include "bpf_kfuncs.h" -#include "../bpf_testmod/bpf_testmod_kfunc.h" +#include "../test_kmods/bpf_testmod_kfunc.h" SEC("tc") int kfunc_dynptr_nullable_test1(struct __sk_buff *skb) diff --git a/tools/testing/selftests/bpf/progs/test_module_attach.c b/tools/testing/selftests/bpf/progs/test_module_attach.c index cc1a012d038f..fb07f5773888 100644 --- a/tools/testing/selftests/bpf/progs/test_module_attach.c +++ b/tools/testing/selftests/bpf/progs/test_module_attach.c @@ -5,7 +5,7 @@ #include #include #include -#include "../bpf_testmod/bpf_testmod.h" +#include "../test_kmods/bpf_testmod.h" __u32 raw_tp_read_sz = 0; diff --git a/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c b/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c index 5aaf2b065f86..3bce838e92d5 100644 --- a/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c +++ b/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c @@ -3,7 +3,7 @@ #include "vmlinux.h" #include #include -#include "../bpf_testmod/bpf_testmod.h" +#include "../test_kmods/bpf_testmod.h" #include "bpf_misc.h" SEC("tp_btf/bpf_testmod_test_nullable_bare") diff --git a/tools/testing/selftests/bpf/progs/unsupported_ops.c b/tools/testing/selftests/bpf/progs/unsupported_ops.c index 9180365a3568..8aa2e0dd624e 100644 --- a/tools/testing/selftests/bpf/progs/unsupported_ops.c +++ b/tools/testing/selftests/bpf/progs/unsupported_ops.c @@ -4,7 +4,7 @@ #include #include #include "bpf_misc.h" -#include "../bpf_testmod/bpf_testmod.h" +#include "../test_kmods/bpf_testmod.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/wq.c b/tools/testing/selftests/bpf/progs/wq.c index f8d3ae0c29ae..2f1ba08c293e 100644 --- a/tools/testing/selftests/bpf/progs/wq.c +++ b/tools/testing/selftests/bpf/progs/wq.c @@ -5,7 +5,7 @@ #include "bpf_experimental.h" #include #include "bpf_misc.h" -#include "../bpf_testmod/bpf_testmod_kfunc.h" +#include "../test_kmods/bpf_testmod_kfunc.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/wq_failures.c b/tools/testing/selftests/bpf/progs/wq_failures.c index 25b51a72fe0f..4240211a1900 100644 --- a/tools/testing/selftests/bpf/progs/wq_failures.c +++ b/tools/testing/selftests/bpf/progs/wq_failures.c @@ -5,7 +5,7 @@ #include "bpf_experimental.h" #include #include "bpf_misc.h" -#include "../bpf_testmod/bpf_testmod_kfunc.h" +#include "../test_kmods/bpf_testmod_kfunc.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_kmods/.gitignore b/tools/testing/selftests/bpf/test_kmods/.gitignore new file mode 100644 index 000000000000..ded513777281 --- /dev/null +++ b/tools/testing/selftests/bpf/test_kmods/.gitignore @@ -0,0 +1,6 @@ +*.mod +*.mod.c +*.o +.ko +/Module.symvers +/modules.order diff --git a/tools/testing/selftests/bpf/test_kmods/Makefile b/tools/testing/selftests/bpf/test_kmods/Makefile new file mode 100644 index 000000000000..d4e50c4509c9 --- /dev/null +++ b/tools/testing/selftests/bpf/test_kmods/Makefile @@ -0,0 +1,21 @@ +TEST_KMOD_DIR := $(realpath $(dir $(abspath $(lastword $(MAKEFILE_LIST))))) +KDIR ?= $(abspath $(TEST_KMOD_DIR)/../../../../..) + +ifeq ($(V),1) +Q = +else +Q = @ +endif + +MODULES = bpf_testmod.ko bpf_test_no_cfi.ko bpf_test_modorder_x.ko \ + bpf_test_modorder_y.ko + +$(foreach m,$(MODULES),$(eval obj-m += $(m:.ko=.o))) + +CFLAGS_bpf_testmod.o = -I$(src) + +all: + $(Q)$(MAKE) -C $(KDIR) M=$(TEST_KMOD_DIR) modules + +clean: + $(Q)$(MAKE) -C $(KDIR) M=$(TEST_KMOD_DIR) clean diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_test_modorder_x.c b/tools/testing/selftests/bpf/test_kmods/bpf_test_modorder_x.c new file mode 100644 index 000000000000..0cc747fa912f --- /dev/null +++ b/tools/testing/selftests/bpf/test_kmods/bpf_test_modorder_x.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include + +__bpf_kfunc_start_defs(); + +__bpf_kfunc int bpf_test_modorder_retx(void) +{ + return 'x'; +} + +__bpf_kfunc_end_defs(); + +BTF_KFUNCS_START(bpf_test_modorder_kfunc_x_ids) +BTF_ID_FLAGS(func, bpf_test_modorder_retx); +BTF_KFUNCS_END(bpf_test_modorder_kfunc_x_ids) + +static const struct btf_kfunc_id_set bpf_test_modorder_x_set = { + .owner = THIS_MODULE, + .set = &bpf_test_modorder_kfunc_x_ids, +}; + +static int __init bpf_test_modorder_x_init(void) +{ + return register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, + &bpf_test_modorder_x_set); +} + +static void __exit bpf_test_modorder_x_exit(void) +{ +} + +module_init(bpf_test_modorder_x_init); +module_exit(bpf_test_modorder_x_exit); + +MODULE_DESCRIPTION("BPF selftest ordertest module X"); +MODULE_LICENSE("GPL"); diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_test_modorder_y.c b/tools/testing/selftests/bpf/test_kmods/bpf_test_modorder_y.c new file mode 100644 index 000000000000..c627ee085d13 --- /dev/null +++ b/tools/testing/selftests/bpf/test_kmods/bpf_test_modorder_y.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include + +__bpf_kfunc_start_defs(); + +__bpf_kfunc int bpf_test_modorder_rety(void) +{ + return 'y'; +} + +__bpf_kfunc_end_defs(); + +BTF_KFUNCS_START(bpf_test_modorder_kfunc_y_ids) +BTF_ID_FLAGS(func, bpf_test_modorder_rety); +BTF_KFUNCS_END(bpf_test_modorder_kfunc_y_ids) + +static const struct btf_kfunc_id_set bpf_test_modorder_y_set = { + .owner = THIS_MODULE, + .set = &bpf_test_modorder_kfunc_y_ids, +}; + +static int __init bpf_test_modorder_y_init(void) +{ + return register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, + &bpf_test_modorder_y_set); +} + +static void __exit bpf_test_modorder_y_exit(void) +{ +} + +module_init(bpf_test_modorder_y_init); +module_exit(bpf_test_modorder_y_exit); + +MODULE_DESCRIPTION("BPF selftest ordertest module Y"); +MODULE_LICENSE("GPL"); diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_test_no_cfi.c b/tools/testing/selftests/bpf/test_kmods/bpf_test_no_cfi.c new file mode 100644 index 000000000000..948eb3962732 --- /dev/null +++ b/tools/testing/selftests/bpf/test_kmods/bpf_test_no_cfi.c @@ -0,0 +1,84 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ +#include +#include +#include +#include + +struct bpf_test_no_cfi_ops { + void (*fn_1)(void); + void (*fn_2)(void); +}; + +static int dummy_init(struct btf *btf) +{ + return 0; +} + +static int dummy_init_member(const struct btf_type *t, + const struct btf_member *member, + void *kdata, const void *udata) +{ + return 0; +} + +static int dummy_reg(void *kdata, struct bpf_link *link) +{ + return 0; +} + +static void dummy_unreg(void *kdata, struct bpf_link *link) +{ +} + +static const struct bpf_verifier_ops dummy_verifier_ops; + +static void bpf_test_no_cfi_ops__fn_1(void) +{ +} + +static void bpf_test_no_cfi_ops__fn_2(void) +{ +} + +static struct bpf_test_no_cfi_ops __test_no_cif_ops = { + .fn_1 = bpf_test_no_cfi_ops__fn_1, + .fn_2 = bpf_test_no_cfi_ops__fn_2, +}; + +static struct bpf_struct_ops test_no_cif_ops = { + .verifier_ops = &dummy_verifier_ops, + .init = dummy_init, + .init_member = dummy_init_member, + .reg = dummy_reg, + .unreg = dummy_unreg, + .name = "bpf_test_no_cfi_ops", + .owner = THIS_MODULE, +}; + +static int bpf_test_no_cfi_init(void) +{ + int ret; + + ret = register_bpf_struct_ops(&test_no_cif_ops, + bpf_test_no_cfi_ops); + if (!ret) + return -EINVAL; + + test_no_cif_ops.cfi_stubs = &__test_no_cif_ops; + ret = register_bpf_struct_ops(&test_no_cif_ops, + bpf_test_no_cfi_ops); + return ret; +} + +static void bpf_test_no_cfi_exit(void) +{ +} + +module_init(bpf_test_no_cfi_init); +module_exit(bpf_test_no_cfi_exit); + +MODULE_AUTHOR("Kuifeng Lee"); +MODULE_DESCRIPTION("BPF no cfi_stubs test module"); +MODULE_LICENSE("Dual BSD/GPL"); + diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod-events.h b/tools/testing/selftests/bpf/test_kmods/bpf_testmod-events.h new file mode 100644 index 000000000000..aeef86b3da74 --- /dev/null +++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod-events.h @@ -0,0 +1,71 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2020 Facebook */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM bpf_testmod + +#if !defined(_BPF_TESTMOD_EVENTS_H) || defined(TRACE_HEADER_MULTI_READ) +#define _BPF_TESTMOD_EVENTS_H + +#include +#include "bpf_testmod.h" + +TRACE_EVENT(bpf_testmod_test_read, + TP_PROTO(struct task_struct *task, struct bpf_testmod_test_read_ctx *ctx), + TP_ARGS(task, ctx), + TP_STRUCT__entry( + __field(pid_t, pid) + __array(char, comm, TASK_COMM_LEN) + __field(loff_t, off) + __field(size_t, len) + ), + TP_fast_assign( + __entry->pid = task->pid; + memcpy(__entry->comm, task->comm, TASK_COMM_LEN); + __entry->off = ctx->off; + __entry->len = ctx->len; + ), + TP_printk("pid=%d comm=%s off=%llu len=%zu", + __entry->pid, __entry->comm, __entry->off, __entry->len) +); + +/* A bare tracepoint with no event associated with it */ +DECLARE_TRACE(bpf_testmod_test_write_bare, + TP_PROTO(struct task_struct *task, struct bpf_testmod_test_write_ctx *ctx), + TP_ARGS(task, ctx) +); + +/* Used in bpf_testmod_test_read() to test __nullable suffix */ +DECLARE_TRACE(bpf_testmod_test_nullable_bare, + TP_PROTO(struct bpf_testmod_test_read_ctx *ctx__nullable), + TP_ARGS(ctx__nullable) +); + +struct sk_buff; + +DECLARE_TRACE(bpf_testmod_test_raw_tp_null, + TP_PROTO(struct sk_buff *skb), + TP_ARGS(skb) +); + + +#undef BPF_TESTMOD_DECLARE_TRACE +#ifdef DECLARE_TRACE_WRITABLE +#define BPF_TESTMOD_DECLARE_TRACE(call, proto, args, size) \ + DECLARE_TRACE_WRITABLE(call, PARAMS(proto), PARAMS(args), size) +#else +#define BPF_TESTMOD_DECLARE_TRACE(call, proto, args, size) \ + DECLARE_TRACE(call, PARAMS(proto), PARAMS(args)) +#endif + +BPF_TESTMOD_DECLARE_TRACE(bpf_testmod_test_writable_bare, + TP_PROTO(struct bpf_testmod_test_writable_ctx *ctx), + TP_ARGS(ctx), + sizeof(struct bpf_testmod_test_writable_ctx) +); + +#endif /* _BPF_TESTMOD_EVENTS_H */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE bpf_testmod-events +#include diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c new file mode 100644 index 000000000000..cc9dde507aba --- /dev/null +++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c @@ -0,0 +1,1487 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "bpf_testmod.h" +#include "bpf_testmod_kfunc.h" + +#define CREATE_TRACE_POINTS +#include "bpf_testmod-events.h" + +#define CONNECT_TIMEOUT_SEC 1 + +typedef int (*func_proto_typedef)(long); +typedef int (*func_proto_typedef_nested1)(func_proto_typedef); +typedef int (*func_proto_typedef_nested2)(func_proto_typedef_nested1); + +DEFINE_PER_CPU(int, bpf_testmod_ksym_percpu) = 123; +long bpf_testmod_test_struct_arg_result; +static DEFINE_MUTEX(sock_lock); +static struct socket *sock; + +struct bpf_testmod_struct_arg_1 { + int a; +}; +struct bpf_testmod_struct_arg_2 { + long a; + long b; +}; + +struct bpf_testmod_struct_arg_3 { + int a; + int b[]; +}; + +struct bpf_testmod_struct_arg_4 { + u64 a; + int b; +}; + +struct bpf_testmod_struct_arg_5 { + char a; + short b; + int c; + long d; +}; + +__bpf_hook_start(); + +noinline int +bpf_testmod_test_struct_arg_1(struct bpf_testmod_struct_arg_2 a, int b, int c) { + bpf_testmod_test_struct_arg_result = a.a + a.b + b + c; + return bpf_testmod_test_struct_arg_result; +} + +noinline int +bpf_testmod_test_struct_arg_2(int a, struct bpf_testmod_struct_arg_2 b, int c) { + bpf_testmod_test_struct_arg_result = a + b.a + b.b + c; + return bpf_testmod_test_struct_arg_result; +} + +noinline int +bpf_testmod_test_struct_arg_3(int a, int b, struct bpf_testmod_struct_arg_2 c) { + bpf_testmod_test_struct_arg_result = a + b + c.a + c.b; + return bpf_testmod_test_struct_arg_result; +} + +noinline int +bpf_testmod_test_struct_arg_4(struct bpf_testmod_struct_arg_1 a, int b, + int c, int d, struct bpf_testmod_struct_arg_2 e) { + bpf_testmod_test_struct_arg_result = a.a + b + c + d + e.a + e.b; + return bpf_testmod_test_struct_arg_result; +} + +noinline int +bpf_testmod_test_struct_arg_5(void) { + bpf_testmod_test_struct_arg_result = 1; + return bpf_testmod_test_struct_arg_result; +} + +noinline int +bpf_testmod_test_struct_arg_6(struct bpf_testmod_struct_arg_3 *a) { + bpf_testmod_test_struct_arg_result = a->b[0]; + return bpf_testmod_test_struct_arg_result; +} + +noinline int +bpf_testmod_test_struct_arg_7(u64 a, void *b, short c, int d, void *e, + struct bpf_testmod_struct_arg_4 f) +{ + bpf_testmod_test_struct_arg_result = a + (long)b + c + d + + (long)e + f.a + f.b; + return bpf_testmod_test_struct_arg_result; +} + +noinline int +bpf_testmod_test_struct_arg_8(u64 a, void *b, short c, int d, void *e, + struct bpf_testmod_struct_arg_4 f, int g) +{ + bpf_testmod_test_struct_arg_result = a + (long)b + c + d + + (long)e + f.a + f.b + g; + return bpf_testmod_test_struct_arg_result; +} + +noinline int +bpf_testmod_test_struct_arg_9(u64 a, void *b, short c, int d, void *e, char f, + short g, struct bpf_testmod_struct_arg_5 h, long i) +{ + bpf_testmod_test_struct_arg_result = a + (long)b + c + d + (long)e + + f + g + h.a + h.b + h.c + h.d + i; + return bpf_testmod_test_struct_arg_result; +} + +noinline int +bpf_testmod_test_arg_ptr_to_struct(struct bpf_testmod_struct_arg_1 *a) { + bpf_testmod_test_struct_arg_result = a->a; + return bpf_testmod_test_struct_arg_result; +} + +__bpf_kfunc void +bpf_testmod_test_mod_kfunc(int i) +{ + *(int *)this_cpu_ptr(&bpf_testmod_ksym_percpu) = i; +} + +__bpf_kfunc int bpf_iter_testmod_seq_new(struct bpf_iter_testmod_seq *it, s64 value, int cnt) +{ + it->cnt = cnt; + + if (cnt < 0) + return -EINVAL; + + it->value = value; + + return 0; +} + +__bpf_kfunc s64 *bpf_iter_testmod_seq_next(struct bpf_iter_testmod_seq* it) +{ + if (it->cnt <= 0) + return NULL; + + it->cnt--; + + return &it->value; +} + +__bpf_kfunc s64 bpf_iter_testmod_seq_value(int val, struct bpf_iter_testmod_seq* it__iter) +{ + if (it__iter->cnt < 0) + return 0; + + return val + it__iter->value; +} + +__bpf_kfunc void bpf_iter_testmod_seq_destroy(struct bpf_iter_testmod_seq *it) +{ + it->cnt = 0; +} + +__bpf_kfunc void bpf_kfunc_common_test(void) +{ +} + +__bpf_kfunc void bpf_kfunc_dynptr_test(struct bpf_dynptr *ptr, + struct bpf_dynptr *ptr__nullable) +{ +} + +__bpf_kfunc struct sk_buff *bpf_kfunc_nested_acquire_nonzero_offset_test(struct sk_buff_head *ptr) +{ + return NULL; +} + +__bpf_kfunc struct sk_buff *bpf_kfunc_nested_acquire_zero_offset_test(struct sock_common *ptr) +{ + return NULL; +} + +__bpf_kfunc void bpf_kfunc_nested_release_test(struct sk_buff *ptr) +{ +} + +__bpf_kfunc void bpf_kfunc_trusted_vma_test(struct vm_area_struct *ptr) +{ +} + +__bpf_kfunc void bpf_kfunc_trusted_task_test(struct task_struct *ptr) +{ +} + +__bpf_kfunc void bpf_kfunc_trusted_num_test(int *ptr) +{ +} + +__bpf_kfunc void bpf_kfunc_rcu_task_test(struct task_struct *ptr) +{ +} + +__bpf_kfunc struct bpf_testmod_ctx * +bpf_testmod_ctx_create(int *err) +{ + struct bpf_testmod_ctx *ctx; + + ctx = kzalloc(sizeof(*ctx), GFP_ATOMIC); + if (!ctx) { + *err = -ENOMEM; + return NULL; + } + refcount_set(&ctx->usage, 1); + + return ctx; +} + +static void testmod_free_cb(struct rcu_head *head) +{ + struct bpf_testmod_ctx *ctx; + + ctx = container_of(head, struct bpf_testmod_ctx, rcu); + kfree(ctx); +} + +__bpf_kfunc void bpf_testmod_ctx_release(struct bpf_testmod_ctx *ctx) +{ + if (!ctx) + return; + if (refcount_dec_and_test(&ctx->usage)) + call_rcu(&ctx->rcu, testmod_free_cb); +} + +static struct bpf_testmod_ops3 *st_ops3; + +static int bpf_testmod_test_3(void) +{ + return 0; +} + +static int bpf_testmod_test_4(void) +{ + return 0; +} + +static struct bpf_testmod_ops3 __bpf_testmod_ops3 = { + .test_1 = bpf_testmod_test_3, + .test_2 = bpf_testmod_test_4, +}; + +static void bpf_testmod_test_struct_ops3(void) +{ + if (st_ops3) + st_ops3->test_1(); +} + +__bpf_kfunc void bpf_testmod_ops3_call_test_1(void) +{ + st_ops3->test_1(); +} + +__bpf_kfunc void bpf_testmod_ops3_call_test_2(void) +{ + st_ops3->test_2(); +} + +struct bpf_testmod_btf_type_tag_1 { + int a; +}; + +struct bpf_testmod_btf_type_tag_2 { + struct bpf_testmod_btf_type_tag_1 __user *p; +}; + +struct bpf_testmod_btf_type_tag_3 { + struct bpf_testmod_btf_type_tag_1 __percpu *p; +}; + +noinline int +bpf_testmod_test_btf_type_tag_user_1(struct bpf_testmod_btf_type_tag_1 __user *arg) { + BTF_TYPE_EMIT(func_proto_typedef); + BTF_TYPE_EMIT(func_proto_typedef_nested1); + BTF_TYPE_EMIT(func_proto_typedef_nested2); + return arg->a; +} + +noinline int +bpf_testmod_test_btf_type_tag_user_2(struct bpf_testmod_btf_type_tag_2 *arg) { + return arg->p->a; +} + +noinline int +bpf_testmod_test_btf_type_tag_percpu_1(struct bpf_testmod_btf_type_tag_1 __percpu *arg) { + return arg->a; +} + +noinline int +bpf_testmod_test_btf_type_tag_percpu_2(struct bpf_testmod_btf_type_tag_3 *arg) { + return arg->p->a; +} + +noinline int bpf_testmod_loop_test(int n) +{ + /* Make sum volatile, so smart compilers, such as clang, will not + * optimize the code by removing the loop. + */ + volatile int sum = 0; + int i; + + /* the primary goal of this test is to test LBR. Create a lot of + * branches in the function, so we can catch it easily. + */ + for (i = 0; i < n; i++) + sum += i; + return sum; +} + +__weak noinline struct file *bpf_testmod_return_ptr(int arg) +{ + static struct file f = {}; + + switch (arg) { + case 1: return (void *)EINVAL; /* user addr */ + case 2: return (void *)0xcafe4a11; /* user addr */ + case 3: return (void *)-EINVAL; /* canonical, but invalid */ + case 4: return (void *)(1ull << 60); /* non-canonical and invalid */ + case 5: return (void *)~(1ull << 30); /* trigger extable */ + case 6: return &f; /* valid addr */ + case 7: return (void *)((long)&f | 1); /* kernel tricks */ +#ifdef CONFIG_X86_64 + case 8: return (void *)VSYSCALL_ADDR; /* vsyscall page address */ +#endif + default: return NULL; + } +} + +noinline int bpf_testmod_fentry_test1(int a) +{ + return a + 1; +} + +noinline int bpf_testmod_fentry_test2(int a, u64 b) +{ + return a + b; +} + +noinline int bpf_testmod_fentry_test3(char a, int b, u64 c) +{ + return a + b + c; +} + +noinline int bpf_testmod_fentry_test7(u64 a, void *b, short c, int d, + void *e, char f, int g) +{ + return a + (long)b + c + d + (long)e + f + g; +} + +noinline int bpf_testmod_fentry_test11(u64 a, void *b, short c, int d, + void *e, char f, int g, + unsigned int h, long i, __u64 j, + unsigned long k) +{ + return a + (long)b + c + d + (long)e + f + g + h + i + j + k; +} + +int bpf_testmod_fentry_ok; + +noinline ssize_t +bpf_testmod_test_read(struct file *file, struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t len) +{ + struct bpf_testmod_test_read_ctx ctx = { + .buf = buf, + .off = off, + .len = len, + }; + struct bpf_testmod_struct_arg_1 struct_arg1 = {10}, struct_arg1_2 = {-1}; + struct bpf_testmod_struct_arg_2 struct_arg2 = {2, 3}; + struct bpf_testmod_struct_arg_3 *struct_arg3; + struct bpf_testmod_struct_arg_4 struct_arg4 = {21, 22}; + struct bpf_testmod_struct_arg_5 struct_arg5 = {23, 24, 25, 26}; + int i = 1; + + while (bpf_testmod_return_ptr(i)) + i++; + + (void)bpf_testmod_test_struct_arg_1(struct_arg2, 1, 4); + (void)bpf_testmod_test_struct_arg_2(1, struct_arg2, 4); + (void)bpf_testmod_test_struct_arg_3(1, 4, struct_arg2); + (void)bpf_testmod_test_struct_arg_4(struct_arg1, 1, 2, 3, struct_arg2); + (void)bpf_testmod_test_struct_arg_5(); + (void)bpf_testmod_test_struct_arg_7(16, (void *)17, 18, 19, + (void *)20, struct_arg4); + (void)bpf_testmod_test_struct_arg_8(16, (void *)17, 18, 19, + (void *)20, struct_arg4, 23); + (void)bpf_testmod_test_struct_arg_9(16, (void *)17, 18, 19, (void *)20, + 21, 22, struct_arg5, 27); + + (void)bpf_testmod_test_arg_ptr_to_struct(&struct_arg1_2); + + (void)trace_bpf_testmod_test_raw_tp_null(NULL); + + bpf_testmod_test_struct_ops3(); + + struct_arg3 = kmalloc((sizeof(struct bpf_testmod_struct_arg_3) + + sizeof(int)), GFP_KERNEL); + if (struct_arg3 != NULL) { + struct_arg3->b[0] = 1; + (void)bpf_testmod_test_struct_arg_6(struct_arg3); + kfree(struct_arg3); + } + + /* This is always true. Use the check to make sure the compiler + * doesn't remove bpf_testmod_loop_test. + */ + if (bpf_testmod_loop_test(101) > 100) + trace_bpf_testmod_test_read(current, &ctx); + + trace_bpf_testmod_test_nullable_bare(NULL); + + /* Magic number to enable writable tp */ + if (len == 64) { + struct bpf_testmod_test_writable_ctx writable = { + .val = 1024, + }; + trace_bpf_testmod_test_writable_bare(&writable); + if (writable.early_ret) + return snprintf(buf, len, "%d\n", writable.val); + } + + if (bpf_testmod_fentry_test1(1) != 2 || + bpf_testmod_fentry_test2(2, 3) != 5 || + bpf_testmod_fentry_test3(4, 5, 6) != 15 || + bpf_testmod_fentry_test7(16, (void *)17, 18, 19, (void *)20, + 21, 22) != 133 || + bpf_testmod_fentry_test11(16, (void *)17, 18, 19, (void *)20, + 21, 22, 23, 24, 25, 26) != 231) + goto out; + + bpf_testmod_fentry_ok = 1; +out: + return -EIO; /* always fail */ +} +EXPORT_SYMBOL(bpf_testmod_test_read); +ALLOW_ERROR_INJECTION(bpf_testmod_test_read, ERRNO); + +noinline ssize_t +bpf_testmod_test_write(struct file *file, struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t len) +{ + struct bpf_testmod_test_write_ctx ctx = { + .buf = buf, + .off = off, + .len = len, + }; + + trace_bpf_testmod_test_write_bare(current, &ctx); + + return -EIO; /* always fail */ +} +EXPORT_SYMBOL(bpf_testmod_test_write); +ALLOW_ERROR_INJECTION(bpf_testmod_test_write, ERRNO); + +noinline int bpf_fentry_shadow_test(int a) +{ + return a + 2; +} +EXPORT_SYMBOL_GPL(bpf_fentry_shadow_test); + +__bpf_hook_end(); + +static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = { + .attr = { .name = "bpf_testmod", .mode = 0666, }, + .read = bpf_testmod_test_read, + .write = bpf_testmod_test_write, +}; + +/* bpf_testmod_uprobe sysfs attribute is so far enabled for x86_64 only, + * please see test_uretprobe_regs_change test + */ +#ifdef __x86_64__ + +static int +uprobe_ret_handler(struct uprobe_consumer *self, unsigned long func, + struct pt_regs *regs, __u64 *data) + +{ + regs->ax = 0x12345678deadbeef; + regs->cx = 0x87654321feebdaed; + regs->r11 = (u64) -1; + return true; +} + +struct testmod_uprobe { + struct path path; + struct uprobe *uprobe; + struct uprobe_consumer consumer; +}; + +static DEFINE_MUTEX(testmod_uprobe_mutex); + +static struct testmod_uprobe uprobe = { + .consumer.ret_handler = uprobe_ret_handler, +}; + +static int testmod_register_uprobe(loff_t offset) +{ + int err = -EBUSY; + + if (uprobe.uprobe) + return -EBUSY; + + mutex_lock(&testmod_uprobe_mutex); + + if (uprobe.uprobe) + goto out; + + err = kern_path("/proc/self/exe", LOOKUP_FOLLOW, &uprobe.path); + if (err) + goto out; + + uprobe.uprobe = uprobe_register(d_real_inode(uprobe.path.dentry), + offset, 0, &uprobe.consumer); + if (IS_ERR(uprobe.uprobe)) { + err = PTR_ERR(uprobe.uprobe); + path_put(&uprobe.path); + uprobe.uprobe = NULL; + } +out: + mutex_unlock(&testmod_uprobe_mutex); + return err; +} + +static void testmod_unregister_uprobe(void) +{ + mutex_lock(&testmod_uprobe_mutex); + + if (uprobe.uprobe) { + uprobe_unregister_nosync(uprobe.uprobe, &uprobe.consumer); + uprobe_unregister_sync(); + path_put(&uprobe.path); + uprobe.uprobe = NULL; + } + + mutex_unlock(&testmod_uprobe_mutex); +} + +static ssize_t +bpf_testmod_uprobe_write(struct file *file, struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t len) +{ + unsigned long offset = 0; + int err = 0; + + if (kstrtoul(buf, 0, &offset)) + return -EINVAL; + + if (offset) + err = testmod_register_uprobe(offset); + else + testmod_unregister_uprobe(); + + return err ?: strlen(buf); +} + +static struct bin_attribute bin_attr_bpf_testmod_uprobe_file __ro_after_init = { + .attr = { .name = "bpf_testmod_uprobe", .mode = 0666, }, + .write = bpf_testmod_uprobe_write, +}; + +static int register_bpf_testmod_uprobe(void) +{ + return sysfs_create_bin_file(kernel_kobj, &bin_attr_bpf_testmod_uprobe_file); +} + +static void unregister_bpf_testmod_uprobe(void) +{ + testmod_unregister_uprobe(); + sysfs_remove_bin_file(kernel_kobj, &bin_attr_bpf_testmod_uprobe_file); +} + +#else +static int register_bpf_testmod_uprobe(void) +{ + return 0; +} + +static void unregister_bpf_testmod_uprobe(void) { } +#endif + +BTF_KFUNCS_START(bpf_testmod_common_kfunc_ids) +BTF_ID_FLAGS(func, bpf_iter_testmod_seq_new, KF_ITER_NEW) +BTF_ID_FLAGS(func, bpf_iter_testmod_seq_next, KF_ITER_NEXT | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_iter_testmod_seq_destroy, KF_ITER_DESTROY) +BTF_ID_FLAGS(func, bpf_iter_testmod_seq_value) +BTF_ID_FLAGS(func, bpf_kfunc_common_test) +BTF_ID_FLAGS(func, bpf_kfunc_dynptr_test) +BTF_ID_FLAGS(func, bpf_kfunc_nested_acquire_nonzero_offset_test, KF_ACQUIRE) +BTF_ID_FLAGS(func, bpf_kfunc_nested_acquire_zero_offset_test, KF_ACQUIRE) +BTF_ID_FLAGS(func, bpf_kfunc_nested_release_test, KF_RELEASE) +BTF_ID_FLAGS(func, bpf_kfunc_trusted_vma_test, KF_TRUSTED_ARGS) +BTF_ID_FLAGS(func, bpf_kfunc_trusted_task_test, KF_TRUSTED_ARGS) +BTF_ID_FLAGS(func, bpf_kfunc_trusted_num_test, KF_TRUSTED_ARGS) +BTF_ID_FLAGS(func, bpf_kfunc_rcu_task_test, KF_RCU) +BTF_ID_FLAGS(func, bpf_testmod_ctx_create, KF_ACQUIRE | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_testmod_ctx_release, KF_RELEASE) +BTF_ID_FLAGS(func, bpf_testmod_ops3_call_test_1) +BTF_ID_FLAGS(func, bpf_testmod_ops3_call_test_2) +BTF_KFUNCS_END(bpf_testmod_common_kfunc_ids) + +BTF_ID_LIST(bpf_testmod_dtor_ids) +BTF_ID(struct, bpf_testmod_ctx) +BTF_ID(func, bpf_testmod_ctx_release) + +static const struct btf_kfunc_id_set bpf_testmod_common_kfunc_set = { + .owner = THIS_MODULE, + .set = &bpf_testmod_common_kfunc_ids, +}; + +__bpf_kfunc u64 bpf_kfunc_call_test1(struct sock *sk, u32 a, u64 b, u32 c, u64 d) +{ + return a + b + c + d; +} + +__bpf_kfunc int bpf_kfunc_call_test2(struct sock *sk, u32 a, u32 b) +{ + return a + b; +} + +__bpf_kfunc struct sock *bpf_kfunc_call_test3(struct sock *sk) +{ + return sk; +} + +__bpf_kfunc long noinline bpf_kfunc_call_test4(signed char a, short b, int c, long d) +{ + /* Provoke the compiler to assume that the caller has sign-extended a, + * b and c on platforms where this is required (e.g. s390x). + */ + return (long)a + (long)b + (long)c + d; +} + +static struct prog_test_ref_kfunc prog_test_struct = { + .a = 42, + .b = 108, + .next = &prog_test_struct, + .cnt = REFCOUNT_INIT(1), +}; + +__bpf_kfunc struct prog_test_ref_kfunc * +bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr) +{ + refcount_inc(&prog_test_struct.cnt); + return &prog_test_struct; +} + +__bpf_kfunc void bpf_kfunc_call_test_offset(struct prog_test_ref_kfunc *p) +{ + WARN_ON_ONCE(1); +} + +__bpf_kfunc struct prog_test_member * +bpf_kfunc_call_memb_acquire(void) +{ + WARN_ON_ONCE(1); + return NULL; +} + +__bpf_kfunc void bpf_kfunc_call_memb1_release(struct prog_test_member1 *p) +{ + WARN_ON_ONCE(1); +} + +static int *__bpf_kfunc_call_test_get_mem(struct prog_test_ref_kfunc *p, const int size) +{ + if (size > 2 * sizeof(int)) + return NULL; + + return (int *)p; +} + +__bpf_kfunc int *bpf_kfunc_call_test_get_rdwr_mem(struct prog_test_ref_kfunc *p, + const int rdwr_buf_size) +{ + return __bpf_kfunc_call_test_get_mem(p, rdwr_buf_size); +} + +__bpf_kfunc int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p, + const int rdonly_buf_size) +{ + return __bpf_kfunc_call_test_get_mem(p, rdonly_buf_size); +} + +/* the next 2 ones can't be really used for testing expect to ensure + * that the verifier rejects the call. + * Acquire functions must return struct pointers, so these ones are + * failing. + */ +__bpf_kfunc int *bpf_kfunc_call_test_acq_rdonly_mem(struct prog_test_ref_kfunc *p, + const int rdonly_buf_size) +{ + return __bpf_kfunc_call_test_get_mem(p, rdonly_buf_size); +} + +__bpf_kfunc void bpf_kfunc_call_int_mem_release(int *p) +{ +} + +__bpf_kfunc void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb) +{ +} + +__bpf_kfunc void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p) +{ +} + +__bpf_kfunc void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p) +{ +} + +__bpf_kfunc void bpf_kfunc_call_test_fail1(struct prog_test_fail1 *p) +{ +} + +__bpf_kfunc void bpf_kfunc_call_test_fail2(struct prog_test_fail2 *p) +{ +} + +__bpf_kfunc void bpf_kfunc_call_test_fail3(struct prog_test_fail3 *p) +{ +} + +__bpf_kfunc void bpf_kfunc_call_test_mem_len_pass1(void *mem, int mem__sz) +{ +} + +__bpf_kfunc void bpf_kfunc_call_test_mem_len_fail1(void *mem, int len) +{ +} + +__bpf_kfunc void bpf_kfunc_call_test_mem_len_fail2(u64 *mem, int len) +{ +} + +__bpf_kfunc void bpf_kfunc_call_test_ref(struct prog_test_ref_kfunc *p) +{ + /* p != NULL, but p->cnt could be 0 */ +} + +__bpf_kfunc void bpf_kfunc_call_test_destructive(void) +{ +} + +__bpf_kfunc static u32 bpf_kfunc_call_test_static_unused_arg(u32 arg, u32 unused) +{ + return arg; +} + +__bpf_kfunc void bpf_kfunc_call_test_sleepable(void) +{ +} + +__bpf_kfunc int bpf_kfunc_init_sock(struct init_sock_args *args) +{ + int proto; + int err; + + mutex_lock(&sock_lock); + + if (sock) { + pr_err("%s called without releasing old sock", __func__); + err = -EPERM; + goto out; + } + + switch (args->af) { + case AF_INET: + case AF_INET6: + proto = args->type == SOCK_STREAM ? IPPROTO_TCP : IPPROTO_UDP; + break; + case AF_UNIX: + proto = PF_UNIX; + break; + default: + pr_err("invalid address family %d\n", args->af); + err = -EINVAL; + goto out; + } + + err = sock_create_kern(current->nsproxy->net_ns, args->af, args->type, + proto, &sock); + + if (!err) + /* Set timeout for call to kernel_connect() to prevent it from hanging, + * and consider the connection attempt failed if it returns + * -EINPROGRESS. + */ + sock->sk->sk_sndtimeo = CONNECT_TIMEOUT_SEC * HZ; +out: + mutex_unlock(&sock_lock); + + return err; +} + +__bpf_kfunc void bpf_kfunc_close_sock(void) +{ + mutex_lock(&sock_lock); + + if (sock) { + sock_release(sock); + sock = NULL; + } + + mutex_unlock(&sock_lock); +} + +__bpf_kfunc int bpf_kfunc_call_kernel_connect(struct addr_args *args) +{ + int err; + + if (args->addrlen > sizeof(args->addr)) + return -EINVAL; + + mutex_lock(&sock_lock); + + if (!sock) { + pr_err("%s called without initializing sock", __func__); + err = -EPERM; + goto out; + } + + err = kernel_connect(sock, (struct sockaddr *)&args->addr, + args->addrlen, 0); +out: + mutex_unlock(&sock_lock); + + return err; +} + +__bpf_kfunc int bpf_kfunc_call_kernel_bind(struct addr_args *args) +{ + int err; + + if (args->addrlen > sizeof(args->addr)) + return -EINVAL; + + mutex_lock(&sock_lock); + + if (!sock) { + pr_err("%s called without initializing sock", __func__); + err = -EPERM; + goto out; + } + + err = kernel_bind(sock, (struct sockaddr *)&args->addr, args->addrlen); +out: + mutex_unlock(&sock_lock); + + return err; +} + +__bpf_kfunc int bpf_kfunc_call_kernel_listen(void) +{ + int err; + + mutex_lock(&sock_lock); + + if (!sock) { + pr_err("%s called without initializing sock", __func__); + err = -EPERM; + goto out; + } + + err = kernel_listen(sock, 128); +out: + mutex_unlock(&sock_lock); + + return err; +} + +__bpf_kfunc int bpf_kfunc_call_kernel_sendmsg(struct sendmsg_args *args) +{ + struct msghdr msg = { + .msg_name = &args->addr.addr, + .msg_namelen = args->addr.addrlen, + }; + struct kvec iov; + int err; + + if (args->addr.addrlen > sizeof(args->addr.addr) || + args->msglen > sizeof(args->msg)) + return -EINVAL; + + iov.iov_base = args->msg; + iov.iov_len = args->msglen; + + mutex_lock(&sock_lock); + + if (!sock) { + pr_err("%s called without initializing sock", __func__); + err = -EPERM; + goto out; + } + + err = kernel_sendmsg(sock, &msg, &iov, 1, args->msglen); + args->addr.addrlen = msg.msg_namelen; +out: + mutex_unlock(&sock_lock); + + return err; +} + +__bpf_kfunc int bpf_kfunc_call_sock_sendmsg(struct sendmsg_args *args) +{ + struct msghdr msg = { + .msg_name = &args->addr.addr, + .msg_namelen = args->addr.addrlen, + }; + struct kvec iov; + int err; + + if (args->addr.addrlen > sizeof(args->addr.addr) || + args->msglen > sizeof(args->msg)) + return -EINVAL; + + iov.iov_base = args->msg; + iov.iov_len = args->msglen; + + iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, &iov, 1, args->msglen); + mutex_lock(&sock_lock); + + if (!sock) { + pr_err("%s called without initializing sock", __func__); + err = -EPERM; + goto out; + } + + err = sock_sendmsg(sock, &msg); + args->addr.addrlen = msg.msg_namelen; +out: + mutex_unlock(&sock_lock); + + return err; +} + +__bpf_kfunc int bpf_kfunc_call_kernel_getsockname(struct addr_args *args) +{ + int err; + + mutex_lock(&sock_lock); + + if (!sock) { + pr_err("%s called without initializing sock", __func__); + err = -EPERM; + goto out; + } + + err = kernel_getsockname(sock, (struct sockaddr *)&args->addr); + if (err < 0) + goto out; + + args->addrlen = err; + err = 0; +out: + mutex_unlock(&sock_lock); + + return err; +} + +__bpf_kfunc int bpf_kfunc_call_kernel_getpeername(struct addr_args *args) +{ + int err; + + mutex_lock(&sock_lock); + + if (!sock) { + pr_err("%s called without initializing sock", __func__); + err = -EPERM; + goto out; + } + + err = kernel_getpeername(sock, (struct sockaddr *)&args->addr); + if (err < 0) + goto out; + + args->addrlen = err; + err = 0; +out: + mutex_unlock(&sock_lock); + + return err; +} + +static DEFINE_MUTEX(st_ops_mutex); +static struct bpf_testmod_st_ops *st_ops; + +__bpf_kfunc int bpf_kfunc_st_ops_test_prologue(struct st_ops_args *args) +{ + int ret = -1; + + mutex_lock(&st_ops_mutex); + if (st_ops && st_ops->test_prologue) + ret = st_ops->test_prologue(args); + mutex_unlock(&st_ops_mutex); + + return ret; +} + +__bpf_kfunc int bpf_kfunc_st_ops_test_epilogue(struct st_ops_args *args) +{ + int ret = -1; + + mutex_lock(&st_ops_mutex); + if (st_ops && st_ops->test_epilogue) + ret = st_ops->test_epilogue(args); + mutex_unlock(&st_ops_mutex); + + return ret; +} + +__bpf_kfunc int bpf_kfunc_st_ops_test_pro_epilogue(struct st_ops_args *args) +{ + int ret = -1; + + mutex_lock(&st_ops_mutex); + if (st_ops && st_ops->test_pro_epilogue) + ret = st_ops->test_pro_epilogue(args); + mutex_unlock(&st_ops_mutex); + + return ret; +} + +__bpf_kfunc int bpf_kfunc_st_ops_inc10(struct st_ops_args *args) +{ + args->a += 10; + return args->a; +} + +BTF_KFUNCS_START(bpf_testmod_check_kfunc_ids) +BTF_ID_FLAGS(func, bpf_testmod_test_mod_kfunc) +BTF_ID_FLAGS(func, bpf_kfunc_call_test1) +BTF_ID_FLAGS(func, bpf_kfunc_call_test2) +BTF_ID_FLAGS(func, bpf_kfunc_call_test3) +BTF_ID_FLAGS(func, bpf_kfunc_call_test4) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_pass1) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail1) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail2) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_acquire, KF_ACQUIRE | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_kfunc_call_memb_acquire, KF_ACQUIRE | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_kfunc_call_memb1_release, KF_RELEASE) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_get_rdwr_mem, KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_get_rdonly_mem, KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_acq_rdonly_mem, KF_ACQUIRE | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_kfunc_call_int_mem_release, KF_RELEASE) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass_ctx) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass1) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass2) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail1) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail2) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail3) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_ref, KF_TRUSTED_ARGS | KF_RCU) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_destructive, KF_DESTRUCTIVE) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_static_unused_arg) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_offset) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_sleepable, KF_SLEEPABLE) +BTF_ID_FLAGS(func, bpf_kfunc_init_sock, KF_SLEEPABLE) +BTF_ID_FLAGS(func, bpf_kfunc_close_sock, KF_SLEEPABLE) +BTF_ID_FLAGS(func, bpf_kfunc_call_kernel_connect, KF_SLEEPABLE) +BTF_ID_FLAGS(func, bpf_kfunc_call_kernel_bind, KF_SLEEPABLE) +BTF_ID_FLAGS(func, bpf_kfunc_call_kernel_listen, KF_SLEEPABLE) +BTF_ID_FLAGS(func, bpf_kfunc_call_kernel_sendmsg, KF_SLEEPABLE) +BTF_ID_FLAGS(func, bpf_kfunc_call_sock_sendmsg, KF_SLEEPABLE) +BTF_ID_FLAGS(func, bpf_kfunc_call_kernel_getsockname, KF_SLEEPABLE) +BTF_ID_FLAGS(func, bpf_kfunc_call_kernel_getpeername, KF_SLEEPABLE) +BTF_ID_FLAGS(func, bpf_kfunc_st_ops_test_prologue, KF_TRUSTED_ARGS | KF_SLEEPABLE) +BTF_ID_FLAGS(func, bpf_kfunc_st_ops_test_epilogue, KF_TRUSTED_ARGS | KF_SLEEPABLE) +BTF_ID_FLAGS(func, bpf_kfunc_st_ops_test_pro_epilogue, KF_TRUSTED_ARGS | KF_SLEEPABLE) +BTF_ID_FLAGS(func, bpf_kfunc_st_ops_inc10, KF_TRUSTED_ARGS) +BTF_KFUNCS_END(bpf_testmod_check_kfunc_ids) + +static int bpf_testmod_ops_init(struct btf *btf) +{ + return 0; +} + +static bool bpf_testmod_ops_is_valid_access(int off, int size, + enum bpf_access_type type, + const struct bpf_prog *prog, + struct bpf_insn_access_aux *info) +{ + return bpf_tracing_btf_ctx_access(off, size, type, prog, info); +} + +static int bpf_testmod_ops_init_member(const struct btf_type *t, + const struct btf_member *member, + void *kdata, const void *udata) +{ + if (member->offset == offsetof(struct bpf_testmod_ops, data) * 8) { + /* For data fields, this function has to copy it and return + * 1 to indicate that the data has been handled by the + * struct_ops type, or the verifier will reject the map if + * the value of the data field is not zero. + */ + ((struct bpf_testmod_ops *)kdata)->data = ((struct bpf_testmod_ops *)udata)->data; + return 1; + } + return 0; +} + +static const struct btf_kfunc_id_set bpf_testmod_kfunc_set = { + .owner = THIS_MODULE, + .set = &bpf_testmod_check_kfunc_ids, +}; + +static const struct bpf_verifier_ops bpf_testmod_verifier_ops = { + .is_valid_access = bpf_testmod_ops_is_valid_access, +}; + +static const struct bpf_verifier_ops bpf_testmod_verifier_ops3 = { + .is_valid_access = bpf_testmod_ops_is_valid_access, +}; + +static int bpf_dummy_reg(void *kdata, struct bpf_link *link) +{ + struct bpf_testmod_ops *ops = kdata; + + if (ops->test_1) + ops->test_1(); + /* Some test cases (ex. struct_ops_maybe_null) may not have test_2 + * initialized, so we need to check for NULL. + */ + if (ops->test_2) + ops->test_2(4, ops->data); + + return 0; +} + +static void bpf_dummy_unreg(void *kdata, struct bpf_link *link) +{ +} + +static int bpf_testmod_test_1(void) +{ + return 0; +} + +static void bpf_testmod_test_2(int a, int b) +{ +} + +static int bpf_testmod_tramp(int value) +{ + return 0; +} + +static int bpf_testmod_ops__test_maybe_null(int dummy, + struct task_struct *task__nullable) +{ + return 0; +} + +static struct bpf_testmod_ops __bpf_testmod_ops = { + .test_1 = bpf_testmod_test_1, + .test_2 = bpf_testmod_test_2, + .test_maybe_null = bpf_testmod_ops__test_maybe_null, +}; + +struct bpf_struct_ops bpf_bpf_testmod_ops = { + .verifier_ops = &bpf_testmod_verifier_ops, + .init = bpf_testmod_ops_init, + .init_member = bpf_testmod_ops_init_member, + .reg = bpf_dummy_reg, + .unreg = bpf_dummy_unreg, + .cfi_stubs = &__bpf_testmod_ops, + .name = "bpf_testmod_ops", + .owner = THIS_MODULE, +}; + +static int bpf_dummy_reg2(void *kdata, struct bpf_link *link) +{ + struct bpf_testmod_ops2 *ops = kdata; + + ops->test_1(); + return 0; +} + +static struct bpf_testmod_ops2 __bpf_testmod_ops2 = { + .test_1 = bpf_testmod_test_1, +}; + +struct bpf_struct_ops bpf_testmod_ops2 = { + .verifier_ops = &bpf_testmod_verifier_ops, + .init = bpf_testmod_ops_init, + .init_member = bpf_testmod_ops_init_member, + .reg = bpf_dummy_reg2, + .unreg = bpf_dummy_unreg, + .cfi_stubs = &__bpf_testmod_ops2, + .name = "bpf_testmod_ops2", + .owner = THIS_MODULE, +}; + +static int st_ops3_reg(void *kdata, struct bpf_link *link) +{ + int err = 0; + + mutex_lock(&st_ops_mutex); + if (st_ops3) { + pr_err("st_ops has already been registered\n"); + err = -EEXIST; + goto unlock; + } + st_ops3 = kdata; + +unlock: + mutex_unlock(&st_ops_mutex); + return err; +} + +static void st_ops3_unreg(void *kdata, struct bpf_link *link) +{ + mutex_lock(&st_ops_mutex); + st_ops3 = NULL; + mutex_unlock(&st_ops_mutex); +} + +static void test_1_recursion_detected(struct bpf_prog *prog) +{ + struct bpf_prog_stats *stats; + + stats = this_cpu_ptr(prog->stats); + printk("bpf_testmod: oh no, recursing into test_1, recursion_misses %llu", + u64_stats_read(&stats->misses)); +} + +static int st_ops3_check_member(const struct btf_type *t, + const struct btf_member *member, + const struct bpf_prog *prog) +{ + u32 moff = __btf_member_bit_offset(t, member) / 8; + + switch (moff) { + case offsetof(struct bpf_testmod_ops3, test_1): + prog->aux->priv_stack_requested = true; + prog->aux->recursion_detected = test_1_recursion_detected; + fallthrough; + default: + break; + } + return 0; +} + +struct bpf_struct_ops bpf_testmod_ops3 = { + .verifier_ops = &bpf_testmod_verifier_ops3, + .init = bpf_testmod_ops_init, + .init_member = bpf_testmod_ops_init_member, + .reg = st_ops3_reg, + .unreg = st_ops3_unreg, + .check_member = st_ops3_check_member, + .cfi_stubs = &__bpf_testmod_ops3, + .name = "bpf_testmod_ops3", + .owner = THIS_MODULE, +}; + +static int bpf_test_mod_st_ops__test_prologue(struct st_ops_args *args) +{ + return 0; +} + +static int bpf_test_mod_st_ops__test_epilogue(struct st_ops_args *args) +{ + return 0; +} + +static int bpf_test_mod_st_ops__test_pro_epilogue(struct st_ops_args *args) +{ + return 0; +} + +static int st_ops_gen_prologue(struct bpf_insn *insn_buf, bool direct_write, + const struct bpf_prog *prog) +{ + struct bpf_insn *insn = insn_buf; + + if (strcmp(prog->aux->attach_func_name, "test_prologue") && + strcmp(prog->aux->attach_func_name, "test_pro_epilogue")) + return 0; + + /* r6 = r1[0]; // r6 will be "struct st_ops *args". r1 is "u64 *ctx". + * r7 = r6->a; + * r7 += 1000; + * r6->a = r7; + */ + *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0); + *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_6, offsetof(struct st_ops_args, a)); + *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 1000); + *insn++ = BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_7, offsetof(struct st_ops_args, a)); + *insn++ = prog->insnsi[0]; + + return insn - insn_buf; +} + +static int st_ops_gen_epilogue(struct bpf_insn *insn_buf, const struct bpf_prog *prog, + s16 ctx_stack_off) +{ + struct bpf_insn *insn = insn_buf; + + if (strcmp(prog->aux->attach_func_name, "test_epilogue") && + strcmp(prog->aux->attach_func_name, "test_pro_epilogue")) + return 0; + + /* r1 = stack[ctx_stack_off]; // r1 will be "u64 *ctx" + * r1 = r1[0]; // r1 will be "struct st_ops *args" + * r6 = r1->a; + * r6 += 10000; + * r1->a = r6; + * r0 = r6; + * r0 *= 2; + * BPF_EXIT; + */ + *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_FP, ctx_stack_off); + *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0); + *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, offsetof(struct st_ops_args, a)); + *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 10000); + *insn++ = BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, offsetof(struct st_ops_args, a)); + *insn++ = BPF_MOV64_REG(BPF_REG_0, BPF_REG_6); + *insn++ = BPF_ALU64_IMM(BPF_MUL, BPF_REG_0, 2); + *insn++ = BPF_EXIT_INSN(); + + return insn - insn_buf; +} + +static int st_ops_btf_struct_access(struct bpf_verifier_log *log, + const struct bpf_reg_state *reg, + int off, int size) +{ + if (off < 0 || off + size > sizeof(struct st_ops_args)) + return -EACCES; + return 0; +} + +static const struct bpf_verifier_ops st_ops_verifier_ops = { + .is_valid_access = bpf_testmod_ops_is_valid_access, + .btf_struct_access = st_ops_btf_struct_access, + .gen_prologue = st_ops_gen_prologue, + .gen_epilogue = st_ops_gen_epilogue, + .get_func_proto = bpf_base_func_proto, +}; + +static struct bpf_testmod_st_ops st_ops_cfi_stubs = { + .test_prologue = bpf_test_mod_st_ops__test_prologue, + .test_epilogue = bpf_test_mod_st_ops__test_epilogue, + .test_pro_epilogue = bpf_test_mod_st_ops__test_pro_epilogue, +}; + +static int st_ops_reg(void *kdata, struct bpf_link *link) +{ + int err = 0; + + mutex_lock(&st_ops_mutex); + if (st_ops) { + pr_err("st_ops has already been registered\n"); + err = -EEXIST; + goto unlock; + } + st_ops = kdata; + +unlock: + mutex_unlock(&st_ops_mutex); + return err; +} + +static void st_ops_unreg(void *kdata, struct bpf_link *link) +{ + mutex_lock(&st_ops_mutex); + st_ops = NULL; + mutex_unlock(&st_ops_mutex); +} + +static int st_ops_init(struct btf *btf) +{ + return 0; +} + +static int st_ops_init_member(const struct btf_type *t, + const struct btf_member *member, + void *kdata, const void *udata) +{ + return 0; +} + +static struct bpf_struct_ops testmod_st_ops = { + .verifier_ops = &st_ops_verifier_ops, + .init = st_ops_init, + .init_member = st_ops_init_member, + .reg = st_ops_reg, + .unreg = st_ops_unreg, + .cfi_stubs = &st_ops_cfi_stubs, + .name = "bpf_testmod_st_ops", + .owner = THIS_MODULE, +}; + +extern int bpf_fentry_test1(int a); + +static int bpf_testmod_init(void) +{ + const struct btf_id_dtor_kfunc bpf_testmod_dtors[] = { + { + .btf_id = bpf_testmod_dtor_ids[0], + .kfunc_btf_id = bpf_testmod_dtor_ids[1] + }, + }; + void **tramp; + int ret; + + ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC, &bpf_testmod_common_kfunc_set); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_testmod_kfunc_set); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_testmod_kfunc_set); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SYSCALL, &bpf_testmod_kfunc_set); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &bpf_testmod_kfunc_set); + ret = ret ?: register_bpf_struct_ops(&bpf_bpf_testmod_ops, bpf_testmod_ops); + ret = ret ?: register_bpf_struct_ops(&bpf_testmod_ops2, bpf_testmod_ops2); + ret = ret ?: register_bpf_struct_ops(&bpf_testmod_ops3, bpf_testmod_ops3); + ret = ret ?: register_bpf_struct_ops(&testmod_st_ops, bpf_testmod_st_ops); + ret = ret ?: register_btf_id_dtor_kfuncs(bpf_testmod_dtors, + ARRAY_SIZE(bpf_testmod_dtors), + THIS_MODULE); + if (ret < 0) + return ret; + if (bpf_fentry_test1(0) < 0) + return -EINVAL; + sock = NULL; + mutex_init(&sock_lock); + ret = sysfs_create_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file); + if (ret < 0) + return ret; + ret = register_bpf_testmod_uprobe(); + if (ret < 0) + return ret; + + /* Ensure nothing is between tramp_1..tramp_40 */ + BUILD_BUG_ON(offsetof(struct bpf_testmod_ops, tramp_1) + 40 * sizeof(long) != + offsetofend(struct bpf_testmod_ops, tramp_40)); + tramp = (void **)&__bpf_testmod_ops.tramp_1; + while (tramp <= (void **)&__bpf_testmod_ops.tramp_40) + *tramp++ = bpf_testmod_tramp; + + return 0; +} + +static void bpf_testmod_exit(void) +{ + /* Need to wait for all references to be dropped because + * bpf_kfunc_call_test_release() which currently resides in kernel can + * be called after bpf_testmod is unloaded. Once release function is + * moved into the module this wait can be removed. + */ + while (refcount_read(&prog_test_struct.cnt) > 1) + msleep(20); + + bpf_kfunc_close_sock(); + sysfs_remove_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file); + unregister_bpf_testmod_uprobe(); +} + +module_init(bpf_testmod_init); +module_exit(bpf_testmod_exit); + +MODULE_AUTHOR("Andrii Nakryiko"); +MODULE_DESCRIPTION("BPF selftests module"); +MODULE_LICENSE("Dual BSD/GPL"); diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.h b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.h new file mode 100644 index 000000000000..356803d1c10e --- /dev/null +++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.h @@ -0,0 +1,113 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2020 Facebook */ +#ifndef _BPF_TESTMOD_H +#define _BPF_TESTMOD_H + +#include + +struct task_struct; + +struct bpf_testmod_test_read_ctx { + char *buf; + loff_t off; + size_t len; +}; + +struct bpf_testmod_test_write_ctx { + char *buf; + loff_t off; + size_t len; +}; + +struct bpf_testmod_test_writable_ctx { + bool early_ret; + int val; +}; + +/* BPF iter that returns *value* *n* times in a row */ +struct bpf_iter_testmod_seq { + s64 value; + int cnt; +}; + +struct bpf_testmod_ops { + int (*test_1)(void); + void (*test_2)(int a, int b); + /* Used to test nullable arguments. */ + int (*test_maybe_null)(int dummy, struct task_struct *task); + int (*unsupported_ops)(void); + + /* The following fields are used to test shadow copies. */ + char onebyte; + struct { + int a; + int b; + } unsupported; + int data; + + /* The following pointers are used to test the maps having multiple + * pages of trampolines. + */ + int (*tramp_1)(int value); + int (*tramp_2)(int value); + int (*tramp_3)(int value); + int (*tramp_4)(int value); + int (*tramp_5)(int value); + int (*tramp_6)(int value); + int (*tramp_7)(int value); + int (*tramp_8)(int value); + int (*tramp_9)(int value); + int (*tramp_10)(int value); + int (*tramp_11)(int value); + int (*tramp_12)(int value); + int (*tramp_13)(int value); + int (*tramp_14)(int value); + int (*tramp_15)(int value); + int (*tramp_16)(int value); + int (*tramp_17)(int value); + int (*tramp_18)(int value); + int (*tramp_19)(int value); + int (*tramp_20)(int value); + int (*tramp_21)(int value); + int (*tramp_22)(int value); + int (*tramp_23)(int value); + int (*tramp_24)(int value); + int (*tramp_25)(int value); + int (*tramp_26)(int value); + int (*tramp_27)(int value); + int (*tramp_28)(int value); + int (*tramp_29)(int value); + int (*tramp_30)(int value); + int (*tramp_31)(int value); + int (*tramp_32)(int value); + int (*tramp_33)(int value); + int (*tramp_34)(int value); + int (*tramp_35)(int value); + int (*tramp_36)(int value); + int (*tramp_37)(int value); + int (*tramp_38)(int value); + int (*tramp_39)(int value); + int (*tramp_40)(int value); +}; + +struct bpf_testmod_ops2 { + int (*test_1)(void); +}; + +struct bpf_testmod_ops3 { + int (*test_1)(void); + int (*test_2)(void); +}; + +struct st_ops_args { + u64 a; +}; + +struct bpf_testmod_st_ops { + int (*test_prologue)(struct st_ops_args *args); + int (*test_epilogue)(struct st_ops_args *args); + int (*test_pro_epilogue)(struct st_ops_args *args); + struct module *owner; +}; + +#endif /* _BPF_TESTMOD_H */ diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h b/tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h new file mode 100644 index 000000000000..b58817938deb --- /dev/null +++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h @@ -0,0 +1,162 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _BPF_TESTMOD_KFUNC_H +#define _BPF_TESTMOD_KFUNC_H + +#ifndef __KERNEL__ +#include +#include +#else +#define __ksym +struct prog_test_member1 { + int a; +}; + +struct prog_test_member { + struct prog_test_member1 m; + int c; +}; + +struct prog_test_ref_kfunc { + int a; + int b; + struct prog_test_member memb; + struct prog_test_ref_kfunc *next; + refcount_t cnt; +}; +#endif + +struct prog_test_pass1 { + int x0; + struct { + int x1; + struct { + int x2; + struct { + int x3; + }; + }; + }; +}; + +struct prog_test_pass2 { + int len; + short arr1[4]; + struct { + char arr2[4]; + unsigned long arr3[8]; + } x; +}; + +struct prog_test_fail1 { + void *p; + int x; +}; + +struct prog_test_fail2 { + int x8; + struct prog_test_pass1 x; +}; + +struct prog_test_fail3 { + int len; + char arr1[2]; + char arr2[]; +}; + +struct init_sock_args { + int af; + int type; +}; + +struct addr_args { + char addr[sizeof(struct __kernel_sockaddr_storage)]; + int addrlen; +}; + +struct sendmsg_args { + struct addr_args addr; + char msg[10]; + int msglen; +}; + +struct bpf_testmod_ctx { + struct callback_head rcu; + refcount_t usage; +}; + +struct prog_test_ref_kfunc * +bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr) __ksym; +void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym; +void bpf_kfunc_call_test_ref(struct prog_test_ref_kfunc *p) __ksym; + +void bpf_kfunc_call_test_mem_len_pass1(void *mem, int len) __ksym; +int *bpf_kfunc_call_test_get_rdwr_mem(struct prog_test_ref_kfunc *p, const int rdwr_buf_size) __ksym; +int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size) __ksym; +int *bpf_kfunc_call_test_acq_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size) __ksym; +void bpf_kfunc_call_int_mem_release(int *p) __ksym; + +/* The bpf_kfunc_call_test_static_unused_arg is defined as static, + * but bpf program compilation needs to see it as global symbol. + */ +#ifndef __KERNEL__ +u32 bpf_kfunc_call_test_static_unused_arg(u32 arg, u32 unused) __ksym; +#endif + +void bpf_testmod_test_mod_kfunc(int i) __ksym; + +__u64 bpf_kfunc_call_test1(struct sock *sk, __u32 a, __u64 b, + __u32 c, __u64 d) __ksym; +int bpf_kfunc_call_test2(struct sock *sk, __u32 a, __u32 b) __ksym; +struct sock *bpf_kfunc_call_test3(struct sock *sk) __ksym; +long bpf_kfunc_call_test4(signed char a, short b, int c, long d) __ksym; + +void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb) __ksym; +void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p) __ksym; +void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p) __ksym; +void bpf_kfunc_call_test_mem_len_fail2(__u64 *mem, int len) __ksym; + +void bpf_kfunc_call_test_destructive(void) __ksym; +void bpf_kfunc_call_test_sleepable(void) __ksym; + +void bpf_kfunc_call_test_offset(struct prog_test_ref_kfunc *p); +struct prog_test_member *bpf_kfunc_call_memb_acquire(void); +void bpf_kfunc_call_memb1_release(struct prog_test_member1 *p); +void bpf_kfunc_call_test_fail1(struct prog_test_fail1 *p); +void bpf_kfunc_call_test_fail2(struct prog_test_fail2 *p); +void bpf_kfunc_call_test_fail3(struct prog_test_fail3 *p); +void bpf_kfunc_call_test_mem_len_fail1(void *mem, int len); + +void bpf_kfunc_common_test(void) __ksym; + +int bpf_kfunc_init_sock(struct init_sock_args *args) __ksym; +void bpf_kfunc_close_sock(void) __ksym; +int bpf_kfunc_call_kernel_connect(struct addr_args *args) __ksym; +int bpf_kfunc_call_kernel_bind(struct addr_args *args) __ksym; +int bpf_kfunc_call_kernel_listen(void) __ksym; +int bpf_kfunc_call_kernel_sendmsg(struct sendmsg_args *args) __ksym; +int bpf_kfunc_call_sock_sendmsg(struct sendmsg_args *args) __ksym; +int bpf_kfunc_call_kernel_getsockname(struct addr_args *args) __ksym; +int bpf_kfunc_call_kernel_getpeername(struct addr_args *args) __ksym; + +void bpf_kfunc_dynptr_test(struct bpf_dynptr *ptr, struct bpf_dynptr *ptr__nullable) __ksym; + +struct bpf_testmod_ctx *bpf_testmod_ctx_create(int *err) __ksym; +void bpf_testmod_ctx_release(struct bpf_testmod_ctx *ctx) __ksym; + +struct sk_buff *bpf_kfunc_nested_acquire_nonzero_offset_test(struct sk_buff_head *ptr) __ksym; +struct sk_buff *bpf_kfunc_nested_acquire_zero_offset_test(struct sock_common *ptr) __ksym; +void bpf_kfunc_nested_release_test(struct sk_buff *ptr) __ksym; + +struct st_ops_args; +int bpf_kfunc_st_ops_test_prologue(struct st_ops_args *args) __ksym; +int bpf_kfunc_st_ops_test_epilogue(struct st_ops_args *args) __ksym; +int bpf_kfunc_st_ops_test_pro_epilogue(struct st_ops_args *args) __ksym; +int bpf_kfunc_st_ops_inc10(struct st_ops_args *args) __ksym; + +void bpf_kfunc_trusted_vma_test(struct vm_area_struct *ptr) __ksym; +void bpf_kfunc_trusted_task_test(struct task_struct *ptr) __ksym; +void bpf_kfunc_trusted_num_test(int *ptr) __ksym; +void bpf_kfunc_rcu_task_test(struct task_struct *ptr) __ksym; + +#endif /* _BPF_TESTMOD_KFUNC_H */ -- cgit v1.2.3 From 9d93db0d1881c9e37e1528cd796e20ff13b7692c Mon Sep 17 00:00:00 2001 From: Gautam Somani Date: Sun, 1 Dec 2024 03:41:02 +0900 Subject: x86/mm/selftests: Fix typo in lam.c Change the spelling from metadate -> metadata Signed-off-by: Gautam Somani Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20241130184102.2182-1-gautamsomani@gmail.com --- tools/testing/selftests/x86/lam.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/x86/lam.c b/tools/testing/selftests/x86/lam.c index 0ea4f6813930..4d4a76532dc9 100644 --- a/tools/testing/selftests/x86/lam.c +++ b/tools/testing/selftests/x86/lam.c @@ -237,7 +237,7 @@ static uint64_t set_metadata(uint64_t src, unsigned long lam) * both pointers should point to the same address. * * @return: - * 0: value on the pointer with metadate and value on original are same + * 0: value on the pointer with metadata and value on original are same * 1: not same. */ static int handle_lam_test(void *src, unsigned int lam) -- cgit v1.2.3 From 48697bdfb65d21bab8c686830b04bf2e47b96d52 Mon Sep 17 00:00:00 2001 From: Joe Damato Date: Wed, 4 Dec 2024 16:32:39 +0000 Subject: selftests: net: cleanup busy_poller.c Fix various integer type conversions by using strtoull and a temporary variable which is bounds checked before being casted into the appropriate cfg_* variable for use by the test program. While here: - free the strdup'd cfg string for overall hygenie. - initialize napi_id = 0 in setup_queue to avoid warnings on some compilers. Signed-off-by: Joe Damato Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20241204163239.294123-1-jdamato@fastly.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/busy_poller.c | 88 ++++++++++++++++++------------- 1 file changed, 50 insertions(+), 38 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/busy_poller.c b/tools/testing/selftests/net/busy_poller.c index 99b0e8c17fca..04c7ff577bb8 100644 --- a/tools/testing/selftests/net/busy_poller.c +++ b/tools/testing/selftests/net/busy_poller.c @@ -54,16 +54,16 @@ struct epoll_params { #define EPIOCGPARAMS _IOR(EPOLL_IOC_TYPE, 0x02, struct epoll_params) #endif -static uint32_t cfg_port = 8000; +static uint16_t cfg_port = 8000; static struct in_addr cfg_bind_addr = { .s_addr = INADDR_ANY }; static char *cfg_outfile; static int cfg_max_events = 8; -static int cfg_ifindex; +static uint32_t cfg_ifindex; /* busy poll params */ static uint32_t cfg_busy_poll_usecs; -static uint32_t cfg_busy_poll_budget; -static uint32_t cfg_prefer_busy_poll; +static uint16_t cfg_busy_poll_budget; +static uint8_t cfg_prefer_busy_poll; /* IRQ params */ static uint32_t cfg_defer_hard_irqs; @@ -79,6 +79,7 @@ static void usage(const char *filepath) static void parse_opts(int argc, char **argv) { + unsigned long long tmp; int ret; int c; @@ -86,31 +87,40 @@ static void parse_opts(int argc, char **argv) usage(argv[0]); while ((c = getopt(argc, argv, "p:m:b:u:P:g:o:d:r:s:i:")) != -1) { + /* most options take integer values, except o and b, so reduce + * code duplication a bit for the common case by calling + * strtoull here and leave bounds checking and casting per + * option below. + */ + if (c != 'o' && c != 'b') + tmp = strtoull(optarg, NULL, 0); + switch (c) { case 'u': - cfg_busy_poll_usecs = strtoul(optarg, NULL, 0); - if (cfg_busy_poll_usecs == ULONG_MAX || - cfg_busy_poll_usecs > UINT32_MAX) + if (tmp == ULLONG_MAX || tmp > UINT32_MAX) error(1, ERANGE, "busy_poll_usecs too large"); + + cfg_busy_poll_usecs = (uint32_t)tmp; break; case 'P': - cfg_prefer_busy_poll = strtoul(optarg, NULL, 0); - if (cfg_prefer_busy_poll == ULONG_MAX || - cfg_prefer_busy_poll > 1) + if (tmp == ULLONG_MAX || tmp > 1) error(1, ERANGE, "prefer busy poll should be 0 or 1"); + + cfg_prefer_busy_poll = (uint8_t)tmp; break; case 'g': - cfg_busy_poll_budget = strtoul(optarg, NULL, 0); - if (cfg_busy_poll_budget == ULONG_MAX || - cfg_busy_poll_budget > UINT16_MAX) + if (tmp == ULLONG_MAX || tmp > UINT16_MAX) error(1, ERANGE, "busy poll budget must be [0, UINT16_MAX]"); + + cfg_busy_poll_budget = (uint16_t)tmp; break; case 'p': - cfg_port = strtoul(optarg, NULL, 0); - if (cfg_port > UINT16_MAX) + if (tmp == ULLONG_MAX || tmp > UINT16_MAX) error(1, ERANGE, "port must be <= 65535"); + + cfg_port = (uint16_t)tmp; break; case 'b': ret = inet_aton(optarg, &cfg_bind_addr); @@ -124,41 +134,39 @@ static void parse_opts(int argc, char **argv) error(1, 0, "outfile invalid"); break; case 'm': - cfg_max_events = strtol(optarg, NULL, 0); - - if (cfg_max_events == LONG_MIN || - cfg_max_events == LONG_MAX || - cfg_max_events <= 0) + if (tmp == ULLONG_MAX || tmp > INT_MAX) error(1, ERANGE, - "max events must be > 0 and < LONG_MAX"); + "max events must be > 0 and <= INT_MAX"); + + cfg_max_events = (int)tmp; break; case 'd': - cfg_defer_hard_irqs = strtoul(optarg, NULL, 0); - - if (cfg_defer_hard_irqs == ULONG_MAX || - cfg_defer_hard_irqs > INT32_MAX) + if (tmp == ULLONG_MAX || tmp > INT32_MAX) error(1, ERANGE, "defer_hard_irqs must be <= INT32_MAX"); + + cfg_defer_hard_irqs = (uint32_t)tmp; break; case 'r': - cfg_gro_flush_timeout = strtoull(optarg, NULL, 0); - - if (cfg_gro_flush_timeout == ULLONG_MAX) + if (tmp == ULLONG_MAX || tmp > UINT64_MAX) error(1, ERANGE, - "gro_flush_timeout must be < ULLONG_MAX"); + "gro_flush_timeout must be < UINT64_MAX"); + + cfg_gro_flush_timeout = (uint64_t)tmp; break; case 's': - cfg_irq_suspend_timeout = strtoull(optarg, NULL, 0); - - if (cfg_irq_suspend_timeout == ULLONG_MAX) + if (tmp == ULLONG_MAX || tmp > UINT64_MAX) error(1, ERANGE, "irq_suspend_timeout must be < ULLONG_MAX"); + + cfg_irq_suspend_timeout = (uint64_t)tmp; break; case 'i': - cfg_ifindex = strtoul(optarg, NULL, 0); - if (cfg_ifindex == ULONG_MAX) + if (tmp == ULLONG_MAX || tmp > INT_MAX) error(1, ERANGE, - "ifindex must be < ULONG_MAX"); + "ifindex must be <= INT_MAX"); + + cfg_ifindex = (int)tmp; break; } } @@ -215,7 +223,7 @@ static void setup_queue(void) struct netdev_napi_set_req *set_req = NULL; struct ynl_sock *ys; struct ynl_error yerr; - uint32_t napi_id; + uint32_t napi_id = 0; ys = ynl_sock_create(&ynl_netdev_family, &yerr); if (!ys) @@ -277,8 +285,8 @@ static void run_poller(void) * here */ epoll_params.busy_poll_usecs = cfg_busy_poll_usecs; - epoll_params.busy_poll_budget = (uint16_t)cfg_busy_poll_budget; - epoll_params.prefer_busy_poll = (uint8_t)cfg_prefer_busy_poll; + epoll_params.busy_poll_budget = cfg_busy_poll_budget; + epoll_params.prefer_busy_poll = cfg_prefer_busy_poll; epoll_params.__pad = 0; val = 1; @@ -342,5 +350,9 @@ int main(int argc, char *argv[]) parse_opts(argc, argv); setup_queue(); run_poller(); + + if (cfg_outfile) + free(cfg_outfile); + return 0; } -- cgit v1.2.3 From 00ab246750821b226f14ebc94ad21431dc82820b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 6 Dec 2024 11:30:56 +0100 Subject: tools: ynl-gen-c: annotate valid choices for --mode This makes argparse validate the input and helps users understand which modes are possible. Signed-off-by: Johannes Berg Link: https://patch.msgid.link/20241206113100.e2ab5cf6937c.Ie149a0ca5df713860964b44fe9d9ae547f2e1553@changeid Signed-off-by: Jakub Kicinski --- tools/net/ynl/ynl-gen-c.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py index 8098bcbb6f40..7f6e5157770d 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/ynl-gen-c.py @@ -2706,7 +2706,8 @@ def find_kernel_root(full_path): def main(): parser = argparse.ArgumentParser(description='Netlink simple parsing generator') - parser.add_argument('--mode', dest='mode', type=str, required=True) + parser.add_argument('--mode', dest='mode', type=str, required=True, + choices=('user', 'kernel', 'uapi')) parser.add_argument('--spec', dest='spec', type=str, required=True) parser.add_argument('--header', dest='header', action='store_true', default=None) parser.add_argument('--source', dest='header', action='store_false') -- cgit v1.2.3 From 81d89e6e88d5d592c1792940753d69d9753b3a8a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 6 Dec 2024 11:30:57 +0100 Subject: tools: ynl-gen-c: don't require -o argument Without -o the tool currently crashes, but it's not marked as required. The only thing we can't do without it is to generate the correct #include for user source files, but we can put a placeholder instead. Signed-off-by: Johannes Berg Link: https://patch.msgid.link/20241206113100.89d35bf124d6.I9228fb704e6d5c9d8e046ef15025a47a48439c1e@changeid Signed-off-by: Jakub Kicinski --- tools/net/ynl/ynl-gen-c.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py index 7f6e5157770d..ec2288948795 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/ynl-gen-c.py @@ -2761,7 +2761,10 @@ def main(): cw.p('#define ' + hdr_prot) cw.nl() - hdr_file=os.path.basename(args.out_file[:-2]) + ".h" + if args.out_file: + hdr_file = os.path.basename(args.out_file[:-2]) + ".h" + else: + hdr_file = "generated_header_file.h" if args.mode == 'kernel': cw.p('#include ') -- cgit v1.2.3 From 477fb0671feb7b51fed9a803aeafa64d66cf6101 Mon Sep 17 00:00:00 2001 From: George Guo Date: Mon, 25 Nov 2024 19:28:12 +0800 Subject: selftests/livepatch: Replace hardcoded module name with variable in test-callbacks.sh Replaced the hardcoded module name test_klp_callbacks_demo in the pre_patch_callback log message with the variable $MOD_LIVEPATCH. Signed-off-by: George Guo Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20241125112812.281018-2-dongtai.guo@linux.dev Signed-off-by: Petr Mladek --- tools/testing/selftests/livepatch/test-callbacks.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/livepatch/test-callbacks.sh b/tools/testing/selftests/livepatch/test-callbacks.sh index 37bbc3fb2780..2a03deb26a12 100755 --- a/tools/testing/selftests/livepatch/test-callbacks.sh +++ b/tools/testing/selftests/livepatch/test-callbacks.sh @@ -259,7 +259,7 @@ $MOD_TARGET: ${MOD_TARGET}_init % insmod test_modules/$MOD_LIVEPATCH.ko pre_patch_ret=-19 livepatch: enabling patch '$MOD_LIVEPATCH' livepatch: '$MOD_LIVEPATCH': initializing patching transition -test_klp_callbacks_demo: pre_patch_callback: vmlinux +$MOD_LIVEPATCH: pre_patch_callback: vmlinux livepatch: pre-patch callback failed for object 'vmlinux' livepatch: failed to enable patch '$MOD_LIVEPATCH' livepatch: '$MOD_LIVEPATCH': canceling patching transition, going to unpatch -- cgit v1.2.3 From ed2ec63d3faa4a3fda0dbf7cca93a9d0d9793100 Mon Sep 17 00:00:00 2001 From: Wardenjohn Date: Thu, 24 Oct 2024 16:35:30 +0800 Subject: selftests: livepatch: add test cases of stack_order sysfs interface Add selftest test cases to sysfs attribute 'stack_order'. Suggested-by: Petr Mladek Signed-off-by: Wardenjohn Reviewed-by: Petr Mladek Tested-by: Petr Mladek Link: https://lore.kernel.org/r/20241024083530.58775-1-zhangwarden@gmail.com [pmladek@suse.com: Substitute $SYSFS_KLP_DIR] Signed-off-by: Petr Mladek --- tools/testing/selftests/livepatch/test-sysfs.sh | 71 +++++++++++++++++++++++++ 1 file changed, 71 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/livepatch/test-sysfs.sh b/tools/testing/selftests/livepatch/test-sysfs.sh index 2c91428d2997..58fe1d96997c 100755 --- a/tools/testing/selftests/livepatch/test-sysfs.sh +++ b/tools/testing/selftests/livepatch/test-sysfs.sh @@ -5,6 +5,8 @@ . $(dirname $0)/functions.sh MOD_LIVEPATCH=test_klp_livepatch +MOD_LIVEPATCH2=test_klp_callbacks_demo +MOD_LIVEPATCH3=test_klp_syscall setup_config @@ -19,6 +21,8 @@ check_sysfs_rights "$MOD_LIVEPATCH" "enabled" "-rw-r--r--" check_sysfs_value "$MOD_LIVEPATCH" "enabled" "1" check_sysfs_rights "$MOD_LIVEPATCH" "force" "--w-------" check_sysfs_rights "$MOD_LIVEPATCH" "replace" "-r--r--r--" +check_sysfs_rights "$MOD_LIVEPATCH" "stack_order" "-r--r--r--" +check_sysfs_value "$MOD_LIVEPATCH" "stack_order" "1" check_sysfs_rights "$MOD_LIVEPATCH" "transition" "-r--r--r--" check_sysfs_value "$MOD_LIVEPATCH" "transition" "0" check_sysfs_rights "$MOD_LIVEPATCH" "vmlinux/patched" "-r--r--r--" @@ -131,4 +135,71 @@ livepatch: '$MOD_LIVEPATCH': completing unpatching transition livepatch: '$MOD_LIVEPATCH': unpatching complete % rmmod $MOD_LIVEPATCH" +start_test "sysfs test stack_order value" + +load_lp $MOD_LIVEPATCH + +check_sysfs_value "$MOD_LIVEPATCH" "stack_order" "1" + +load_lp $MOD_LIVEPATCH2 + +check_sysfs_value "$MOD_LIVEPATCH2" "stack_order" "2" + +load_lp $MOD_LIVEPATCH3 + +check_sysfs_value "$MOD_LIVEPATCH3" "stack_order" "3" + +disable_lp $MOD_LIVEPATCH2 +unload_lp $MOD_LIVEPATCH2 + +check_sysfs_value "$MOD_LIVEPATCH" "stack_order" "1" +check_sysfs_value "$MOD_LIVEPATCH3" "stack_order" "2" + +disable_lp $MOD_LIVEPATCH3 +unload_lp $MOD_LIVEPATCH3 + +disable_lp $MOD_LIVEPATCH +unload_lp $MOD_LIVEPATCH + +check_result "% insmod test_modules/$MOD_LIVEPATCH.ko +livepatch: enabling patch '$MOD_LIVEPATCH' +livepatch: '$MOD_LIVEPATCH': initializing patching transition +livepatch: '$MOD_LIVEPATCH': starting patching transition +livepatch: '$MOD_LIVEPATCH': completing patching transition +livepatch: '$MOD_LIVEPATCH': patching complete +% insmod test_modules/$MOD_LIVEPATCH2.ko +livepatch: enabling patch '$MOD_LIVEPATCH2' +livepatch: '$MOD_LIVEPATCH2': initializing patching transition +$MOD_LIVEPATCH2: pre_patch_callback: vmlinux +livepatch: '$MOD_LIVEPATCH2': starting patching transition +livepatch: '$MOD_LIVEPATCH2': completing patching transition +$MOD_LIVEPATCH2: post_patch_callback: vmlinux +livepatch: '$MOD_LIVEPATCH2': patching complete +% insmod test_modules/$MOD_LIVEPATCH3.ko +livepatch: enabling patch '$MOD_LIVEPATCH3' +livepatch: '$MOD_LIVEPATCH3': initializing patching transition +livepatch: '$MOD_LIVEPATCH3': starting patching transition +livepatch: '$MOD_LIVEPATCH3': completing patching transition +livepatch: '$MOD_LIVEPATCH3': patching complete +% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH2/enabled +livepatch: '$MOD_LIVEPATCH2': initializing unpatching transition +$MOD_LIVEPATCH2: pre_unpatch_callback: vmlinux +livepatch: '$MOD_LIVEPATCH2': starting unpatching transition +livepatch: '$MOD_LIVEPATCH2': completing unpatching transition +$MOD_LIVEPATCH2: post_unpatch_callback: vmlinux +livepatch: '$MOD_LIVEPATCH2': unpatching complete +% rmmod $MOD_LIVEPATCH2 +% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH3/enabled +livepatch: '$MOD_LIVEPATCH3': initializing unpatching transition +livepatch: '$MOD_LIVEPATCH3': starting unpatching transition +livepatch: '$MOD_LIVEPATCH3': completing unpatching transition +livepatch: '$MOD_LIVEPATCH3': unpatching complete +% rmmod $MOD_LIVEPATCH3 +% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH/enabled +livepatch: '$MOD_LIVEPATCH': initializing unpatching transition +livepatch: '$MOD_LIVEPATCH': starting unpatching transition +livepatch: '$MOD_LIVEPATCH': completing unpatching transition +livepatch: '$MOD_LIVEPATCH': unpatching complete +% rmmod $MOD_LIVEPATCH" + exit 0 -- cgit v1.2.3 From 82c1f13de315f38ecdb63d8b0e63ad7d70994d55 Mon Sep 17 00:00:00 2001 From: Mykyta Yatsenko Date: Mon, 9 Dec 2024 13:04:55 +0000 Subject: selftests/bpf: Add more stats into veristat Extend veristat to collect and print more stats, namely: - program size in instructions - jited program size in bytes - program type - attach type - stack depth Signed-off-by: Mykyta Yatsenko Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20241209130455.94592-1-mykyta.yatsenko5@gmail.com --- tools/testing/selftests/bpf/veristat.c | 64 ++++++++++++++++++++++++++++++---- 1 file changed, 58 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c index e12ef953fba8..162fe27d06f8 100644 --- a/tools/testing/selftests/bpf/veristat.c +++ b/tools/testing/selftests/bpf/veristat.c @@ -34,6 +34,11 @@ enum stat_id { PEAK_STATES, MAX_STATES_PER_INSN, MARK_READ_MAX_LEN, + SIZE, + JITED_SIZE, + STACK, + PROG_TYPE, + ATTACH_TYPE, FILE_NAME, PROG_NAME, @@ -640,19 +645,21 @@ cleanup: } static const struct stat_specs default_output_spec = { - .spec_cnt = 7, + .spec_cnt = 8, .ids = { FILE_NAME, PROG_NAME, VERDICT, DURATION, - TOTAL_INSNS, TOTAL_STATES, PEAK_STATES, + TOTAL_INSNS, TOTAL_STATES, SIZE, JITED_SIZE }, }; static const struct stat_specs default_csv_output_spec = { - .spec_cnt = 9, + .spec_cnt = 14, .ids = { FILE_NAME, PROG_NAME, VERDICT, DURATION, TOTAL_INSNS, TOTAL_STATES, PEAK_STATES, MAX_STATES_PER_INSN, MARK_READ_MAX_LEN, + SIZE, JITED_SIZE, PROG_TYPE, ATTACH_TYPE, + STACK, }, }; @@ -688,6 +695,11 @@ static struct stat_def { [PEAK_STATES] = { "Peak states", {"peak_states"}, }, [MAX_STATES_PER_INSN] = { "Max states per insn", {"max_states_per_insn"}, }, [MARK_READ_MAX_LEN] = { "Max mark read length", {"max_mark_read_len", "mark_read"}, }, + [SIZE] = { "Program size", {"prog_size"}, }, + [JITED_SIZE] = { "Jited size", {"prog_size_jited"}, }, + [STACK] = {"Stack depth", {"stack_depth", "stack"}, }, + [PROG_TYPE] = { "Program type", {"prog_type"}, }, + [ATTACH_TYPE] = { "Attach type", {"attach_type", }, }, }; static bool parse_stat_id_var(const char *name, size_t len, int *id, @@ -835,7 +847,8 @@ static char verif_log_buf[64 * 1024]; static int parse_verif_log(char * const buf, size_t buf_sz, struct verif_stats *s) { const char *cur; - int pos, lines; + int pos, lines, sub_stack, cnt = 0; + char *state = NULL, *token, stack[512]; buf[buf_sz - 1] = '\0'; @@ -853,15 +866,22 @@ static int parse_verif_log(char * const buf, size_t buf_sz, struct verif_stats * if (1 == sscanf(cur, "verification time %ld usec\n", &s->stats[DURATION])) continue; - if (6 == sscanf(cur, "processed %ld insns (limit %*d) max_states_per_insn %ld total_states %ld peak_states %ld mark_read %ld", + if (5 == sscanf(cur, "processed %ld insns (limit %*d) max_states_per_insn %ld total_states %ld peak_states %ld mark_read %ld", &s->stats[TOTAL_INSNS], &s->stats[MAX_STATES_PER_INSN], &s->stats[TOTAL_STATES], &s->stats[PEAK_STATES], &s->stats[MARK_READ_MAX_LEN])) continue; - } + if (1 == sscanf(cur, "stack depth %511s", stack)) + continue; + } + while ((token = strtok_r(cnt++ ? NULL : stack, "+", &state))) { + if (sscanf(token, "%d", &sub_stack) == 0) + break; + s->stats[STACK] += sub_stack; + } return 0; } @@ -1146,8 +1166,11 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf char *buf; int buf_sz, log_level; struct verif_stats *stats; + struct bpf_prog_info info; + __u32 info_len = sizeof(info); int err = 0; void *tmp; + int fd; if (!should_process_file_prog(base_filename, bpf_program__name(prog))) { env.progs_skipped++; @@ -1196,6 +1219,15 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf stats->file_name = strdup(base_filename); stats->prog_name = strdup(bpf_program__name(prog)); stats->stats[VERDICT] = err == 0; /* 1 - success, 0 - failure */ + stats->stats[SIZE] = bpf_program__insn_cnt(prog); + stats->stats[PROG_TYPE] = bpf_program__type(prog); + stats->stats[ATTACH_TYPE] = bpf_program__expected_attach_type(prog); + + memset(&info, 0, info_len); + fd = bpf_program__fd(prog); + if (fd > 0 && bpf_prog_get_info_by_fd(fd, &info, &info_len) == 0) + stats->stats[JITED_SIZE] = info.jited_prog_len; + parse_verif_log(buf, buf_sz, stats); if (env.verbose) { @@ -1309,6 +1341,11 @@ static int cmp_stat(const struct verif_stats *s1, const struct verif_stats *s2, case PROG_NAME: cmp = strcmp(s1->prog_name, s2->prog_name); break; + case ATTACH_TYPE: + case PROG_TYPE: + case SIZE: + case JITED_SIZE: + case STACK: case VERDICT: case DURATION: case TOTAL_INSNS: @@ -1523,12 +1560,27 @@ static void prepare_value(const struct verif_stats *s, enum stat_id id, else *str = s->stats[VERDICT] ? "success" : "failure"; break; + case ATTACH_TYPE: + if (!s) + *str = "N/A"; + else + *str = libbpf_bpf_attach_type_str(s->stats[ATTACH_TYPE]) ?: "N/A"; + break; + case PROG_TYPE: + if (!s) + *str = "N/A"; + else + *str = libbpf_bpf_prog_type_str(s->stats[PROG_TYPE]) ?: "N/A"; + break; case DURATION: case TOTAL_INSNS: case TOTAL_STATES: case PEAK_STATES: case MAX_STATES_PER_INSN: case MARK_READ_MAX_LEN: + case STACK: + case SIZE: + case JITED_SIZE: *val = s ? s->stats[id] : 0; break; default: -- cgit v1.2.3 From cb1b78f1c726c938bd47497c1ab16b01ce967f37 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Tue, 10 Sep 2024 00:44:32 +0000 Subject: tools: hv: Fix a complier warning in the fcopy uio daemon hv_fcopy_uio_daemon.c:436:53: warning: '%s' directive output may be truncated writing up to 14 bytes into a region of size 10 [-Wformat-truncation=] 436 | snprintf(uio_dev_path, sizeof(uio_dev_path), "/dev/%s", uio_name); Also added 'static' for the array 'desc[]'. Fixes: 82b0945ce2c2 ("tools: hv: Add new fcopy application based on uio driver") Cc: stable@vger.kernel.org # 6.10+ Signed-off-by: Dexuan Cui Reviewed-by: Saurabh Sengar Link: https://lore.kernel.org/r/20240910004433.50254-1-decui@microsoft.com Signed-off-by: Wei Liu Message-ID: <20240910004433.50254-1-decui@microsoft.com> --- tools/hv/hv_fcopy_uio_daemon.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/hv/hv_fcopy_uio_daemon.c b/tools/hv/hv_fcopy_uio_daemon.c index 7a00f3066a98..12743d7f164f 100644 --- a/tools/hv/hv_fcopy_uio_daemon.c +++ b/tools/hv/hv_fcopy_uio_daemon.c @@ -35,8 +35,6 @@ #define WIN8_SRV_MINOR 1 #define WIN8_SRV_VERSION (WIN8_SRV_MAJOR << 16 | WIN8_SRV_MINOR) -#define MAX_FOLDER_NAME 15 -#define MAX_PATH_LEN 15 #define FCOPY_UIO "/sys/bus/vmbus/devices/eb765408-105f-49b6-b4aa-c123b64d17d4/uio" #define FCOPY_VER_COUNT 1 @@ -51,7 +49,7 @@ static const int fw_versions[] = { #define HV_RING_SIZE 0x4000 /* 16KB ring buffer size */ -unsigned char desc[HV_RING_SIZE]; +static unsigned char desc[HV_RING_SIZE]; static int target_fd; static char target_fname[PATH_MAX]; @@ -409,8 +407,8 @@ int main(int argc, char *argv[]) struct vmbus_br txbr, rxbr; void *ring; uint32_t len = HV_RING_SIZE; - char uio_name[MAX_FOLDER_NAME] = {0}; - char uio_dev_path[MAX_PATH_LEN] = {0}; + char uio_name[NAME_MAX] = {0}; + char uio_dev_path[PATH_MAX] = {0}; static struct option long_options[] = { {"help", no_argument, 0, 'h' }, -- cgit v1.2.3 From 91ae69c7ed9e262f24240c425ad1eef2cf6639b7 Mon Sep 17 00:00:00 2001 From: Olaf Hering Date: Wed, 16 Oct 2024 16:35:10 +0200 Subject: tools: hv: change permissions of NetworkManager configuration file Align permissions of the resulting .nmconnection file, instead of the input file from hv_kvp_daemon. To avoid the tiny time frame where the output file is world-readable, use umask instead of chmod. Fixes: 42999c904612 ("hv/hv_kvp_daemon:Support for keyfile based connection profile") Signed-off-by: Olaf Hering Reviewed-by: Shradha Gupta Link: https://lore.kernel.org/r/20241016143521.3735-1-olaf@aepfle.de Signed-off-by: Wei Liu Message-ID: <20241016143521.3735-1-olaf@aepfle.de> --- tools/hv/hv_set_ifconfig.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/hv/hv_set_ifconfig.sh b/tools/hv/hv_set_ifconfig.sh index 440a91b35823..2f8baed2b8f7 100755 --- a/tools/hv/hv_set_ifconfig.sh +++ b/tools/hv/hv_set_ifconfig.sh @@ -81,7 +81,7 @@ echo "ONBOOT=yes" >> $1 cp $1 /etc/sysconfig/network-scripts/ -chmod 600 $2 +umask 0177 interface=$(echo $2 | awk -F - '{ print $2 }') filename="${2##*/}" -- cgit v1.2.3 From a9640fcdd400463442846677e62b8208b81cb031 Mon Sep 17 00:00:00 2001 From: Olaf Hering Date: Tue, 5 Nov 2024 09:14:04 +0100 Subject: tools/hv: terminate fcopy daemon if read from uio fails Terminate endless loop in reading fails, to avoid flooding syslog. This happens if the state of "Guest services" integration service is changed from "enabled" to "disabled" at runtime in the VM settings. In this case pread returns EIO. Also handle an interrupted system call, and continue in this case. Signed-off-by: Olaf Hering Reviewed-by: Saurabh Sengar Link: https://lore.kernel.org/r/20241105081437.15689-1-olaf@aepfle.de Signed-off-by: Wei Liu Message-ID: <20241105081437.15689-1-olaf@aepfle.de> --- tools/hv/hv_fcopy_uio_daemon.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/hv/hv_fcopy_uio_daemon.c b/tools/hv/hv_fcopy_uio_daemon.c index 12743d7f164f..0198321d14a2 100644 --- a/tools/hv/hv_fcopy_uio_daemon.c +++ b/tools/hv/hv_fcopy_uio_daemon.c @@ -466,8 +466,10 @@ int main(int argc, char *argv[]) */ ret = pread(fcopy_fd, &tmp, sizeof(int), 0); if (ret < 0) { + if (errno == EINTR || errno == EAGAIN) + continue; syslog(LOG_ERR, "pread failed: %s", strerror(errno)); - continue; + goto close; } len = HV_RING_SIZE; -- cgit v1.2.3 From 07dfa6e821e1c58cbd0f195173dddbd593721f9b Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 12 Nov 2024 16:04:01 +0100 Subject: hv/hv_kvp_daemon: Pass NIC name to hv_get_dns_info as well The reference implementation of hv_get_dns_info which is in the tree uses /etc/resolv.conf to get DNS servers and this does not require to know which NIC is queried. Distro specific implementations, however, may want to provide per-NIC, fine grained information. E.g. NetworkManager keeps track of DNS servers per connection. Similar to hv_get_dhcp_info, pass NIC name as a parameter to hv_get_dns_info script. Signed-off-by: Vitaly Kuznetsov Link: https://lore.kernel.org/r/20241112150401.217094-1-vkuznets@redhat.com Signed-off-by: Wei Liu Message-ID: <20241112150401.217094-1-vkuznets@redhat.com> --- tools/hv/hv_kvp_daemon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c index ae57bf69ad4a..296a7a62c54d 100644 --- a/tools/hv/hv_kvp_daemon.c +++ b/tools/hv/hv_kvp_daemon.c @@ -725,7 +725,7 @@ static void kvp_get_ipconfig_info(char *if_name, * . */ - sprintf(cmd, KVP_SCRIPTS_PATH "%s", "hv_get_dns_info"); + sprintf(cmd, KVP_SCRIPTS_PATH "%s %s", "hv_get_dns_info", if_name); /* * Execute the command to gather DNS info. -- cgit v1.2.3 From a4d024fe2e77063069c5f423f2f9be766450f0f9 Mon Sep 17 00:00:00 2001 From: Olaf Hering Date: Mon, 2 Dec 2024 13:04:10 +0100 Subject: tools/hv: reduce resouce usage in hv_get_dns_info helper Remove the usage of cat. Replace the shell process with awk with 'exec'. Also use a generic shell because no bash specific features will be used. Signed-off-by: Olaf Hering Acked-by: Wei Liu Link: https://lore.kernel.org/r/20241202120432.21115-1-olaf@aepfle.de Signed-off-by: Wei Liu Message-ID: <20241202120432.21115-1-olaf@aepfle.de> --- tools/hv/hv_get_dns_info.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/hv/hv_get_dns_info.sh b/tools/hv/hv_get_dns_info.sh index 058c17b46ffc..268521234d4b 100755 --- a/tools/hv/hv_get_dns_info.sh +++ b/tools/hv/hv_get_dns_info.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/bin/sh # This example script parses /etc/resolv.conf to retrive DNS information. # In the interest of keeping the KVP daemon code free of distro specific @@ -10,4 +10,4 @@ # this script can be based on the Network Manager APIs for retrieving DNS # entries. -cat /etc/resolv.conf 2>/dev/null | awk '/^nameserver/ { print $2 }' +exec awk '/^nameserver/ { print $2 }' /etc/resolv.conf 2>/dev/null -- cgit v1.2.3 From becc7fe329c09a7744fa908fca83418fa94a45a0 Mon Sep 17 00:00:00 2001 From: Olaf Hering Date: Mon, 2 Dec 2024 13:40:52 +0100 Subject: tools/hv: add a .gitignore file Remove generated files from 'git status' output after 'make -C tools/hv'. Signed-off-by: Olaf Hering Link: https://lore.kernel.org/r/20241202124107.28650-1-olaf@aepfle.de Signed-off-by: Wei Liu Message-ID: <20241202124107.28650-1-olaf@aepfle.de> --- tools/hv/.gitignore | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 tools/hv/.gitignore (limited to 'tools') diff --git a/tools/hv/.gitignore b/tools/hv/.gitignore new file mode 100644 index 000000000000..0c5bc15d602f --- /dev/null +++ b/tools/hv/.gitignore @@ -0,0 +1,3 @@ +hv_fcopy_uio_daemon +hv_kvp_daemon +hv_vss_daemon -- cgit v1.2.3 From 175c71c2aceef173ae6d3dceb41edfc2ac0d5937 Mon Sep 17 00:00:00 2001 From: Olaf Hering Date: Sun, 8 Dec 2024 23:47:17 +0000 Subject: tools/hv: reduce resource usage in hv_kvp_daemon hv_kvp_daemon uses popen(3) and system(3) as convinience helper to launch external helpers. These helpers are invoked via a temporary shell process. There is no need to keep this temporary process around while the helper runs. Replace this temporary shell with the actual helper process via 'exec'. Signed-off-by: Olaf Hering Link: https://lore.kernel.org/linux-hyperv/20241202123520.27812-1-olaf@aepfle.de/ Signed-off-by: Wei Liu --- tools/hv/hv_kvp_daemon.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c index 296a7a62c54d..04ba035d67e9 100644 --- a/tools/hv/hv_kvp_daemon.c +++ b/tools/hv/hv_kvp_daemon.c @@ -725,7 +725,7 @@ static void kvp_get_ipconfig_info(char *if_name, * . */ - sprintf(cmd, KVP_SCRIPTS_PATH "%s %s", "hv_get_dns_info", if_name); + sprintf(cmd, "exec %s %s", KVP_SCRIPTS_PATH "hv_get_dns_info", if_name); /* * Execute the command to gather DNS info. @@ -742,7 +742,7 @@ static void kvp_get_ipconfig_info(char *if_name, * Enabled: DHCP enabled. */ - sprintf(cmd, KVP_SCRIPTS_PATH "%s %s", "hv_get_dhcp_info", if_name); + sprintf(cmd, "exec %s %s", KVP_SCRIPTS_PATH "hv_get_dhcp_info", if_name); file = popen(cmd, "r"); if (file == NULL) @@ -1606,8 +1606,9 @@ static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val) * invoke the external script to do its magic. */ - str_len = snprintf(cmd, sizeof(cmd), KVP_SCRIPTS_PATH "%s %s %s", - "hv_set_ifconfig", if_filename, nm_filename); + str_len = snprintf(cmd, sizeof(cmd), "exec %s %s %s", + KVP_SCRIPTS_PATH "hv_set_ifconfig", + if_filename, nm_filename); /* * This is a little overcautious, but it's necessary to suppress some * false warnings from gcc 8.0.1. -- cgit v1.2.3 From 4d33dc1bc31df80356c49e40dbd3ddff19500bcb Mon Sep 17 00:00:00 2001 From: Saket Kumar Bhaskar Date: Mon, 9 Dec 2024 12:27:20 +0530 Subject: selftests/bpf: Fix fill_link_info selftest on powerpc With CONFIG_KPROBES_ON_FTRACE enabled on powerpc, ftrace_location_range returns ftrace location for bpf_fentry_test1 at offset of 4 bytes from function entry. This is because branch to _mcount function is at offset of 4 bytes in function profile sequence. To fix this, add entry_offset of 4 bytes while verifying the address for kprobe entry address of bpf_fentry_test1 in verify_perf_link_info in selftest, when CONFIG_KPROBES_ON_FTRACE is enabled. Disassemble of bpf_fentry_test1: c000000000e4b080 : c000000000e4b080: a6 02 08 7c mflr r0 c000000000e4b084: b9 e2 22 4b bl c00000000007933c <_mcount> c000000000e4b088: 01 00 63 38 addi r3,r3,1 c000000000e4b08c: b4 07 63 7c extsw r3,r3 c000000000e4b090: 20 00 80 4e blr When CONFIG_PPC_FTRACE_OUT_OF_LINE [1] is enabled, these function profile sequence is moved out of line with an unconditional branch at offset 0. So, the test works without altering the offset for 'CONFIG_KPROBES_ON_FTRACE && CONFIG_PPC_FTRACE_OUT_OF_LINE' case. Disassemble of bpf_fentry_test1: c000000000f95190 : c000000000f95190: 00 00 00 60 nop c000000000f95194: 01 00 63 38 addi r3,r3,1 c000000000f95198: b4 07 63 7c extsw r3,r3 c000000000f9519c: 20 00 80 4e blr [1] https://lore.kernel.org/all/20241030070850.1361304-13-hbathini@linux.ibm.com/ Fixes: 23cf7aa539dc ("selftests/bpf: Add selftest for fill_link_info") Signed-off-by: Saket Kumar Bhaskar Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20241209065720.234344-1-skb99@linux.ibm.com --- tools/testing/selftests/bpf/prog_tests/fill_link_info.c | 4 ++++ tools/testing/selftests/bpf/progs/test_fill_link_info.c | 13 ++++++++++--- 2 files changed, 14 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c index d50cbd8040d4..e59af2aa6601 100644 --- a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c +++ b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c @@ -171,6 +171,10 @@ static void test_kprobe_fill_link_info(struct test_fill_link_info *skel, /* See also arch_adjust_kprobe_addr(). */ if (skel->kconfig->CONFIG_X86_KERNEL_IBT) entry_offset = 4; + if (skel->kconfig->CONFIG_PPC64 && + skel->kconfig->CONFIG_KPROBES_ON_FTRACE && + !skel->kconfig->CONFIG_PPC_FTRACE_OUT_OF_LINE) + entry_offset = 4; err = verify_perf_link_info(link_fd, type, kprobe_addr, 0, entry_offset); ASSERT_OK(err, "verify_perf_link_info"); } else { diff --git a/tools/testing/selftests/bpf/progs/test_fill_link_info.c b/tools/testing/selftests/bpf/progs/test_fill_link_info.c index 6afa834756e9..fac33a14f200 100644 --- a/tools/testing/selftests/bpf/progs/test_fill_link_info.c +++ b/tools/testing/selftests/bpf/progs/test_fill_link_info.c @@ -6,13 +6,20 @@ #include extern bool CONFIG_X86_KERNEL_IBT __kconfig __weak; +extern bool CONFIG_PPC_FTRACE_OUT_OF_LINE __kconfig __weak; +extern bool CONFIG_KPROBES_ON_FTRACE __kconfig __weak; +extern bool CONFIG_PPC64 __kconfig __weak; -/* This function is here to have CONFIG_X86_KERNEL_IBT - * used and added to object BTF. +/* This function is here to have CONFIG_X86_KERNEL_IBT, + * CONFIG_PPC_FTRACE_OUT_OF_LINE, CONFIG_KPROBES_ON_FTRACE, + * CONFIG_PPC6 used and added to object BTF. */ int unused(void) { - return CONFIG_X86_KERNEL_IBT ? 0 : 1; + return CONFIG_X86_KERNEL_IBT || + CONFIG_PPC_FTRACE_OUT_OF_LINE || + CONFIG_KPROBES_ON_FTRACE || + CONFIG_PPC64 ? 0 : 1; } SEC("kprobe") -- cgit v1.2.3 From 8653eb21d68c6882ce5716b04379431817310b85 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 5 Dec 2024 16:40:58 +0100 Subject: selftests: net: lib: Rename ip_link_master() to ip_link_set_master() Let's have a verb in that function name to make it clearer what's going on. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Link: https://patch.msgid.link/fbf7c53a429b340b9cff5831280ea8c305a224f9.1733412063.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/fdb_notify.sh | 6 +++--- tools/testing/selftests/net/lib.sh | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/fdb_notify.sh b/tools/testing/selftests/net/fdb_notify.sh index c03151e7791c..c159230c9b62 100755 --- a/tools/testing/selftests/net/fdb_notify.sh +++ b/tools/testing/selftests/net/fdb_notify.sh @@ -49,7 +49,7 @@ test_dup_vxlan_self() { ip_link_add br up type bridge vlan_filtering 1 ip_link_add vx up type vxlan id 2000 dstport 4789 - ip_link_master vx br + ip_link_set_master vx br do_test_dup add "vxlan" dev vx self dst 192.0.2.1 do_test_dup del "vxlan" dev vx self dst 192.0.2.1 @@ -59,7 +59,7 @@ test_dup_vxlan_master() { ip_link_add br up type bridge vlan_filtering 1 ip_link_add vx up type vxlan id 2000 dstport 4789 - ip_link_master vx br + ip_link_set_master vx br do_test_dup add "vxlan master" dev vx master do_test_dup del "vxlan master" dev vx master @@ -79,7 +79,7 @@ test_dup_macvlan_master() ip_link_add br up type bridge vlan_filtering 1 ip_link_add dd up type dummy ip_link_add mv up link dd type macvlan mode passthru - ip_link_master mv br + ip_link_set_master mv br do_test_dup add "macvlan master" dev mv self do_test_dup del "macvlan master" dev mv self diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh index 8994fec1c38f..5ea6537acd2b 100644 --- a/tools/testing/selftests/net/lib.sh +++ b/tools/testing/selftests/net/lib.sh @@ -451,7 +451,7 @@ ip_link_add() defer ip link del dev "$name" } -ip_link_master() +ip_link_set_master() { local member=$1; shift local master=$1; shift -- cgit v1.2.3 From d76ccb2ec368c8a44f64839140cd253c19f6a79a Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 5 Dec 2024 16:40:59 +0100 Subject: selftests: net: lib: Add several autodefer helpers Add ip_link_set_addr(), ip_link_set_up(), ip_addr_add() and ip_route_add() to the suite of helpers that automatically schedule a corresponding cleanup. When setting a new MAC, one needs to remember the old address first. Move mac_get() from forwarding/ to that end. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Link: https://patch.msgid.link/add6bcbe30828fd01363266df20c338cf13aaf25.1733412063.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/forwarding/lib.sh | 7 ----- tools/testing/selftests/net/lib.sh | 39 +++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 7337f398f9cc..1fd40bada694 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -932,13 +932,6 @@ packets_rate() echo $(((t1 - t0) / interval)) } -mac_get() -{ - local if_name=$1 - - ip -j link show dev $if_name | jq -r '.[]["address"]' -} - ether_addr_to_u64() { local addr="$1" diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh index 5ea6537acd2b..2cd5c743b2d9 100644 --- a/tools/testing/selftests/net/lib.sh +++ b/tools/testing/selftests/net/lib.sh @@ -435,6 +435,13 @@ xfail_on_veth() fi } +mac_get() +{ + local if_name=$1 + + ip -j link show dev $if_name | jq -r '.[]["address"]' +} + kill_process() { local pid=$1; shift @@ -459,3 +466,35 @@ ip_link_set_master() ip link set dev "$member" master "$master" defer ip link set dev "$member" nomaster } + +ip_link_set_addr() +{ + local name=$1; shift + local addr=$1; shift + + local old_addr=$(mac_get "$name") + ip link set dev "$name" address "$addr" + defer ip link set dev "$name" address "$old_addr" +} + +ip_link_set_up() +{ + local name=$1; shift + + ip link set dev "$name" up + defer ip link set dev "$name" down +} + +ip_addr_add() +{ + local name=$1; shift + + ip addr add dev "$name" "$@" + defer ip addr del dev "$name" "$@" +} + +ip_route_add() +{ + ip route add "$@" + defer ip route del "$@" +} -- cgit v1.2.3 From d84b5dccf3ebdeeabef910d1c19b931c84f67884 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 5 Dec 2024 16:41:00 +0100 Subject: selftests: forwarding: Add a selftest for the new reserved_bits UAPI Run VXLAN packets through a gateway. Flip individual bits of the packet and/or reserved bits of the gateway, and check that the gateway treats the packets as expected. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Link: https://patch.msgid.link/388bef3c30ebc887d4e64cd86a362e2df2f2d2e1.1733412063.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/forwarding/Makefile | 1 + .../selftests/net/forwarding/vxlan_reserved.sh | 352 +++++++++++++++++++++ 2 files changed, 353 insertions(+) create mode 100755 tools/testing/selftests/net/forwarding/vxlan_reserved.sh (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile index 7d885cff8d79..00bde7b6f39e 100644 --- a/tools/testing/selftests/net/forwarding/Makefile +++ b/tools/testing/selftests/net/forwarding/Makefile @@ -105,6 +105,7 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \ vxlan_bridge_1q_port_8472_ipv6.sh \ vxlan_bridge_1q_port_8472.sh \ vxlan_bridge_1q.sh \ + vxlan_reserved.sh \ vxlan_symmetric_ipv6.sh \ vxlan_symmetric.sh diff --git a/tools/testing/selftests/net/forwarding/vxlan_reserved.sh b/tools/testing/selftests/net/forwarding/vxlan_reserved.sh new file mode 100755 index 000000000000..46c31794b91b --- /dev/null +++ b/tools/testing/selftests/net/forwarding/vxlan_reserved.sh @@ -0,0 +1,352 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +--------------------+ +# | H1 (vrf) | +# | + $h1 | +# | | 192.0.2.1/28 | +# +----|---------------+ +# | +# +----|--------------------------------+ +# | SW | | +# | +--|------------------------------+ | +# | | + $swp1 BR1 (802.1d) | | +# | | | | +# | | + vx1 (vxlan) | | +# | | local 192.0.2.17 | | +# | | id 1000 dstport $VXPORT | | +# | +---------------------------------+ | +# | | +# | 192.0.2.32/28 via 192.0.2.18 | +# | | +# | + $rp1 | +# | | 192.0.2.17/28 | +# +--|----------------------------------+ +# | +# +--|----------------------------------+ +# | | | +# | + $rp2 | +# | 192.0.2.18/28 | +# | | +# | VRP2 (vrf) | +# +-------------------------------------+ + +: ${VXPORT:=4789} +: ${ALL_TESTS:=" + default_test + plain_test + reserved_0_test + reserved_10_test + reserved_31_test + reserved_56_test + reserved_63_test + "} + +NUM_NETIFS=4 +source lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/28 + defer simple_if_fini $h1 192.0.2.1/28 + + tc qdisc add dev $h1 clsact + defer tc qdisc del dev $h1 clsact + + tc filter add dev $h1 ingress pref 77 \ + prot ip flower skip_hw ip_proto icmp action drop + defer tc filter del dev $h1 ingress pref 77 +} + +switch_create() +{ + ip_link_add br1 type bridge vlan_filtering 0 mcast_snooping 0 + # Make sure the bridge uses the MAC address of the local port and not + # that of the VxLAN's device. + ip_link_set_addr br1 $(mac_get $swp1) + ip_link_set_up br1 + + ip_link_set_up $rp1 + ip_addr_add $rp1 192.0.2.17/28 + ip_route_add 192.0.2.32/28 nexthop via 192.0.2.18 + + ip_link_set_master $swp1 br1 + ip_link_set_up $swp1 +} + +vrp2_create() +{ + simple_if_init $rp2 192.0.2.18/28 + defer simple_if_fini $rp2 192.0.2.18/28 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + rp1=${NETIFS[p3]} + rp2=${NETIFS[p4]} + + vrf_prepare + defer vrf_cleanup + + forwarding_enable + defer forwarding_restore + + h1_create + switch_create + + vrp2_create +} + +vxlan_header_bytes() +{ + local vni=$1; shift + local -a extra_bits=("$@") + local -a bits + local i + + for ((i=0; i < 64; i++)); do + bits[i]=0 + done + + # Bit 4 is the I flag and is always on. + bits[4]=1 + + for i in ${extra_bits[@]}; do + bits[i]=1 + done + + # Bits 32..55 carry the VNI + local mask=0x800000 + for ((i=0; i < 24; i++)); do + bits[$((i + 32))]=$(((vni & mask) != 0)) + ((mask >>= 1)) + done + + local bytes + for ((i=0; i < 8; i++)); do + local byte=0 + local j + for ((j=0; j < 8; j++)); do + local bit=${bits[8 * i + j]} + ((byte += bit << (7 - j))) + done + bytes+=$(printf %02x $byte): + done + + echo ${bytes%:} +} + +neg_bytes() +{ + local bytes=$1; shift + + local -A neg=([0]=f [1]=e [2]=d [3]=c [4]=b [5]=a [6]=9 [7]=8 + [8]=7 [9]=6 [a]=5 [b]=4 [c]=3 [d]=2 [e]=1 [f]=0 [:]=:) + local out + local i + + for ((i=0; i < ${#bytes}; i++)); do + local c=${bytes:$i:1} + out+=${neg[$c]} + done + echo $out +} + +vxlan_ping_do() +{ + local count=$1; shift + local dev=$1; shift + local next_hop_mac=$1; shift + local dest_ip=$1; shift + local dest_mac=$1; shift + local vni=$1; shift + local reserved_bits=$1; shift + + local vxlan_header=$(vxlan_header_bytes $vni $reserved_bits) + + $MZ $dev -c $count -d 100msec -q \ + -b $next_hop_mac -B $dest_ip \ + -t udp sp=23456,dp=$VXPORT,p=$(: + )"$vxlan_header:"$( : VXLAN + )"$dest_mac:"$( : ETH daddr + )"00:11:22:33:44:55:"$( : ETH saddr + )"08:00:"$( : ETH type + )"45:"$( : IP version + IHL + )"00:"$( : IP TOS + )"00:54:"$( : IP total length + )"99:83:"$( : IP identification + )"40:00:"$( : IP flags + frag off + )"40:"$( : IP TTL + )"01:"$( : IP proto + )"00:00:"$( : IP header csum + )"$(ipv4_to_bytes 192.0.2.3):"$( : IP saddr + )"$(ipv4_to_bytes 192.0.2.1):"$( : IP daddr + )"08:"$( : ICMP type + )"00:"$( : ICMP code + )"8b:f2:"$( : ICMP csum + )"1f:6a:"$( : ICMP request identifier + )"00:01:"$( : ICMP request seq. number + )"4f:ff:c5:5b:00:00:00:00:"$( : ICMP payload + )"6d:74:0b:00:00:00:00:00:"$( : + )"10:11:12:13:14:15:16:17:"$( : + )"18:19:1a:1b:1c:1d:1e:1f:"$( : + )"20:21:22:23:24:25:26:27:"$( : + )"28:29:2a:2b:2c:2d:2e:2f:"$( : + )"30:31:32:33:34:35:36:37" +} + +vxlan_device_add() +{ + ip_link_add vx1 up type vxlan id 1000 \ + local 192.0.2.17 dstport "$VXPORT" \ + nolearning noudpcsum tos inherit ttl 100 "$@" + ip_link_set_master vx1 br1 +} + +vxlan_all_reserved_bits() +{ + local i + + for ((i=0; i < 64; i++)); do + if ((i == 4 || i >= 32 && i < 56)); then + continue + fi + echo $i + done +} + +vxlan_ping_vanilla() +{ + vxlan_ping_do 10 $rp2 $(mac_get $rp1) 192.0.2.17 $(mac_get $h1) 1000 +} + +vxlan_ping_reserved() +{ + for bit in $(vxlan_all_reserved_bits); do + vxlan_ping_do 1 $rp2 $(mac_get $rp1) \ + 192.0.2.17 $(mac_get $h1) 1000 "$bit" + ((n++)) + done +} + +vxlan_ping_test() +{ + local what=$1; shift + local get_stat=$1; shift + local expect=$1; shift + + RET=0 + + local t0=$($get_stat) + + "$@" + check_err $? "Failure when running $@" + + local t1=$($get_stat) + local delta=$((t1 - t0)) + + ((expect == delta)) + check_err $? "Expected to capture $expect packets, got $delta." + + log_test "$what" +} + +__default_test_do() +{ + local n_allowed_bits=$1; shift + local what=$1; shift + + vxlan_ping_test "$what: clean packets" \ + "tc_rule_stats_get $h1 77 ingress" \ + 10 vxlan_ping_vanilla + + local t0=$(link_stats_get vx1 rx errors) + vxlan_ping_test "$what: mangled packets" \ + "tc_rule_stats_get $h1 77 ingress" \ + $n_allowed_bits vxlan_ping_reserved + local t1=$(link_stats_get vx1 rx errors) + + RET=0 + local expect=$((39 - n_allowed_bits)) + local delta=$((t1 - t0)) + ((expect == delta)) + check_err $? "Expected $expect error packets, got $delta." + log_test "$what: drops reported" +} + +default_test_do() +{ + vxlan_device_add + __default_test_do 0 "Default" +} + +default_test() +{ + in_defer_scope \ + default_test_do +} + +plain_test_do() +{ + vxlan_device_add reserved_bits 0xf7ffffff000000ff + __default_test_do 0 "reserved_bits 0xf7ffffff000000ff" +} + +plain_test() +{ + in_defer_scope \ + plain_test_do +} + +reserved_test() +{ + local bit=$1; shift + + local allowed_bytes=$(vxlan_header_bytes 0xffffff $bit) + local reserved_bytes=$(neg_bytes $allowed_bytes) + local reserved_bits=${reserved_bytes//:/} + + vxlan_device_add reserved_bits 0x$reserved_bits + __default_test_do 1 "reserved_bits 0x$reserved_bits" +} + +reserved_0_test() +{ + in_defer_scope \ + reserved_test 0 +} + +reserved_10_test() +{ + in_defer_scope \ + reserved_test 10 +} + +reserved_31_test() +{ + in_defer_scope \ + reserved_test 31 +} + +reserved_56_test() +{ + in_defer_scope \ + reserved_test 56 +} + +reserved_63_test() +{ + in_defer_scope \ + reserved_test 63 +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS -- cgit v1.2.3 From 694389cd2bdfc6bc646bbb0fd2a5684c5e8d5fbf Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Fri, 22 Nov 2024 15:47:57 +0800 Subject: selftests/cpufreq: gitignore output files and clean them in make clean After `make run_tests`, the git status complains: Untracked files: (use "git add ..." to include in what will be committed) cpufreq/cpufreq_selftest.dmesg_cpufreq.txt cpufreq/cpufreq_selftest.dmesg_full.txt cpufreq/cpufreq_selftest.txt Link: https://lore.kernel.org/all/20241122074757.1583002-1-lizhijian@fujitsu.com/ Cc: "Rafael J. Wysocki" Cc: Viresh Kumar Cc: Shuah Khan Signed-off-by: Li Zhijian Acked-by: Viresh Kumar Signed-off-by: Shuah Khan --- tools/testing/selftests/cpufreq/.gitignore | 2 ++ tools/testing/selftests/cpufreq/Makefile | 1 + 2 files changed, 3 insertions(+) create mode 100644 tools/testing/selftests/cpufreq/.gitignore (limited to 'tools') diff --git a/tools/testing/selftests/cpufreq/.gitignore b/tools/testing/selftests/cpufreq/.gitignore new file mode 100644 index 000000000000..67604e91e068 --- /dev/null +++ b/tools/testing/selftests/cpufreq/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +cpufreq_selftest.* diff --git a/tools/testing/selftests/cpufreq/Makefile b/tools/testing/selftests/cpufreq/Makefile index c86ca8342222..9b2ccb10b0cf 100644 --- a/tools/testing/selftests/cpufreq/Makefile +++ b/tools/testing/selftests/cpufreq/Makefile @@ -3,6 +3,7 @@ all: TEST_PROGS := main.sh TEST_FILES := cpu.sh cpufreq.sh governor.sh module.sh special-tests.sh +EXTRA_CLEAN := cpufreq_selftest.dmesg_cpufreq.txt cpufreq_selftest.dmesg_full.txt cpufreq_selftest.txt include ../lib.mk -- cgit v1.2.3 From 3075476a7af666de3ec10b4f35d8e62db8fd5b6d Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Fri, 29 Nov 2024 09:20:05 +0800 Subject: pm: cpupower: Makefile: Fix cross compilation After commit f79473ed9220 ("pm: cpupower: Makefile: Allow overriding cross-compiling env params") we would fail to cross compile cpupower in buildroot which uses the recipe at [1] where only the CROSS variable is being set. The issue here is the use of the lazy evaluation for all variables: CC, LD, AR, STRIP, RANLIB, rather than just CROSS. [1]: https://git.buildroot.net/buildroot/tree/package/linux-tools/linux-tool-cpupower.mk.in Fixes: f79473ed9220 ("pm: cpupower: Makefile: Allow overriding cross-compiling env params") Reported-by: Florian Fainelli Closes: https://lore.kernel.org/all/2bbabd2c-24ef-493c-a199-594e5dada3da@broadcom.com/ Signed-off-by: Peng Fan Tested-by: Florian Fainelli Signed-off-by: Shuah Khan --- tools/power/cpupower/Makefile | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'tools') diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile index 175004ce44b2..51a95239fe06 100644 --- a/tools/power/cpupower/Makefile +++ b/tools/power/cpupower/Makefile @@ -87,11 +87,19 @@ INSTALL_SCRIPT = ${INSTALL} -m 644 # to something more interesting, like "arm-linux-". If you want # to compile vs uClibc, that can be done here as well. CROSS ?= #/usr/i386-linux-uclibc/usr/bin/i386-uclibc- +ifneq ($(CROSS), ) +CC = $(CROSS)gcc +LD = $(CROSS)gcc +AR = $(CROSS)ar +STRIP = $(CROSS)strip +RANLIB = $(CROSS)ranlib +else CC ?= $(CROSS)gcc LD ?= $(CROSS)gcc AR ?= $(CROSS)ar STRIP ?= $(CROSS)strip RANLIB ?= $(CROSS)ranlib +endif HOSTCC = gcc MKDIR = mkdir -- cgit v1.2.3 From 46fd8c707b552c0a846917192f66e623bb03f976 Mon Sep 17 00:00:00 2001 From: wangfushuai Date: Wed, 4 Dec 2024 15:02:47 +0800 Subject: cpupower: revise is_valid flag handling for idle_monitor The is_valid flag should reflect the validity state of both the XXX_start and XXX_stop functions. But the use of '=' in XXX_stop overwrites the validity state set by XXX_start. This commit changes '=' to '|=' in XXX_stop to preserve and combine the validity state of XXX_start and XXX_stop. Signed-off-by: wangfushuai Signed-off-by: Shuah Khan --- tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c | 4 ++-- tools/power/cpupower/utils/idle_monitor/mperf_monitor.c | 2 +- tools/power/cpupower/utils/idle_monitor/nhm_idle.c | 2 +- tools/power/cpupower/utils/idle_monitor/snb_idle.c | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c b/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c index 55e55b6b42f9..f5a2a326b1b7 100644 --- a/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c +++ b/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c @@ -117,7 +117,7 @@ static int hsw_ext_start(void) for (num = 0; num < HSW_EXT_CSTATE_COUNT; num++) { for (cpu = 0; cpu < cpu_count; cpu++) { - hsw_ext_get_count(num, &val, cpu); + is_valid[cpu] = !hsw_ext_get_count(num, &val, cpu); previous_count[num][cpu] = val; } } @@ -134,7 +134,7 @@ static int hsw_ext_stop(void) for (num = 0; num < HSW_EXT_CSTATE_COUNT; num++) { for (cpu = 0; cpu < cpu_count; cpu++) { - is_valid[cpu] = !hsw_ext_get_count(num, &val, cpu); + is_valid[cpu] |= !hsw_ext_get_count(num, &val, cpu); current_count[num][cpu] = val; } } diff --git a/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c b/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c index ae6af354a81d..0a03573ebcc2 100644 --- a/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c +++ b/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c @@ -148,7 +148,7 @@ static int mperf_measure_stats(unsigned int cpu) ret = get_aperf_mperf(cpu, &aval, &mval); aperf_current_count[cpu] = aval; mperf_current_count[cpu] = mval; - is_valid[cpu] = !ret; + is_valid[cpu] |= !ret; return 0; } diff --git a/tools/power/cpupower/utils/idle_monitor/nhm_idle.c b/tools/power/cpupower/utils/idle_monitor/nhm_idle.c index 16eaf006f61f..6b1733782ffa 100644 --- a/tools/power/cpupower/utils/idle_monitor/nhm_idle.c +++ b/tools/power/cpupower/utils/idle_monitor/nhm_idle.c @@ -151,7 +151,7 @@ static int nhm_stop(void) for (num = 0; num < NHM_CSTATE_COUNT; num++) { for (cpu = 0; cpu < cpu_count; cpu++) { - is_valid[cpu] = !nhm_get_count(num, &val, cpu); + is_valid[cpu] |= !nhm_get_count(num, &val, cpu); current_count[num][cpu] = val; } } diff --git a/tools/power/cpupower/utils/idle_monitor/snb_idle.c b/tools/power/cpupower/utils/idle_monitor/snb_idle.c index 811d63ab17a7..5969b88a85b4 100644 --- a/tools/power/cpupower/utils/idle_monitor/snb_idle.c +++ b/tools/power/cpupower/utils/idle_monitor/snb_idle.c @@ -115,7 +115,7 @@ static int snb_start(void) for (num = 0; num < SNB_CSTATE_COUNT; num++) { for (cpu = 0; cpu < cpu_count; cpu++) { - snb_get_count(num, &val, cpu); + is_valid[cpu] = !snb_get_count(num, &val, cpu); previous_count[num][cpu] = val; } } @@ -132,7 +132,7 @@ static int snb_stop(void) for (num = 0; num < SNB_CSTATE_COUNT; num++) { for (cpu = 0; cpu < cpu_count; cpu++) { - is_valid[cpu] = !snb_get_count(num, &val, cpu); + is_valid[cpu] |= !snb_get_count(num, &val, cpu); current_count[num][cpu] = val; } } -- cgit v1.2.3 From 3a7a53c8d4813ef510a731f529b8c58208ab8896 Mon Sep 17 00:00:00 2001 From: Zhu Jun Date: Wed, 4 Dec 2024 00:01:49 -0800 Subject: selftests/powerpc: Fix typo in test-vphn.c The word 'accross' is wrong, so fix it. Signed-off-by: Zhu Jun Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20241204080149.11759-1-zhujun2@cmss.chinamobile.com --- tools/testing/selftests/powerpc/vphn/test-vphn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/powerpc/vphn/test-vphn.c b/tools/testing/selftests/powerpc/vphn/test-vphn.c index 81d3069ffb84..f348f54914a9 100644 --- a/tools/testing/selftests/powerpc/vphn/test-vphn.c +++ b/tools/testing/selftests/powerpc/vphn/test-vphn.c @@ -275,7 +275,7 @@ static struct test { } }, { - /* Parse a 32-bit value split accross two consecutives 64-bit + /* Parse a 32-bit value split across two consecutives 64-bit * input values. */ "vphn: 16-bit value followed by 2 x 32-bit values", -- cgit v1.2.3 From 50a78409a2157c0340572beecab86e2b263fe2a2 Mon Sep 17 00:00:00 2001 From: Charlie Jenkins Date: Fri, 6 Dec 2024 10:05:06 -0800 Subject: selftests/hid: Add host-tools to .gitignore When compiling these selftests the host-tools directory is generated. Add it to the .gitignore so git doesn't see these files as trackable. Signed-off-by: Charlie Jenkins Signed-off-by: Jiri Kosina --- tools/testing/selftests/hid/.gitignore | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/testing/selftests/hid/.gitignore b/tools/testing/selftests/hid/.gitignore index 746c62361f77..933f483815b2 100644 --- a/tools/testing/selftests/hid/.gitignore +++ b/tools/testing/selftests/hid/.gitignore @@ -1,5 +1,6 @@ bpftool *.skel.h +/host-tools /tools hid_bpf hidraw -- cgit v1.2.3 From 11d5245f608f8ac01c97b93f31497cef7b96e457 Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Mon, 2 Dec 2024 12:29:24 +0100 Subject: selftests/bpf: Extend test for sockmap update with same Verify that the sockmap link was not severed, and socket's entry is indeed removed from the map when the corresponding descriptor gets closed. Signed-off-by: Michal Luczaj Signed-off-by: Daniel Borkmann Reviewed-by: John Fastabend Link: https://lore.kernel.org/bpf/20241202-sockmap-replace-v1-2-1e88579e7bd5@rbox.co --- tools/testing/selftests/bpf/prog_tests/sockmap_basic.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index fdff0652d7ef..248754296d97 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -934,8 +934,10 @@ static void test_sockmap_same_sock(void) err = socketpair(AF_UNIX, SOCK_STREAM, 0, stream); ASSERT_OK(err, "socketpair(af_unix, sock_stream)"); - if (err) + if (err) { + close(tcp); goto out; + } for (i = 0; i < 2; i++) { err = bpf_map_update_elem(map, &zero, &stream[0], BPF_ANY); @@ -954,14 +956,14 @@ static void test_sockmap_same_sock(void) ASSERT_OK(err, "bpf_map_update_elem(tcp)"); } + close(tcp); err = bpf_map_delete_elem(map, &zero); - ASSERT_OK(err, "bpf_map_delete_elem(entry)"); + ASSERT_ERR(err, "bpf_map_delete_elem(entry)"); close(stream[0]); close(stream[1]); out: close(dgram); - close(tcp); close(udp); test_sockmap_pass_prog__destroy(skel); } -- cgit v1.2.3 From b70b073979ebf7a5271e7ce655ea1c25b4673f04 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Wed, 4 Dec 2024 20:37:44 +0100 Subject: tools/resolve_btfids: Add --fatal_warnings option MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently warnings emitted by resolve_btfids are buried in the build log and are slipping into mainline frequently. Add an option to elevate warnings to hard errors so the CI bots can catch any new warnings. Signed-off-by: Thomas Weißschuh Signed-off-by: Daniel Borkmann Acked-by: Jiri Olsa Link: https://lore.kernel.org/bpf/20241204-resolve_btfids-v3-1-e6a279a74cfd@weissschuh.net --- tools/bpf/resolve_btfids/main.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c index bd9f960bce3d..d47191c6e55e 100644 --- a/tools/bpf/resolve_btfids/main.c +++ b/tools/bpf/resolve_btfids/main.c @@ -141,6 +141,7 @@ struct object { }; static int verbose; +static int warnings; static int eprintf(int level, int var, const char *fmt, ...) { @@ -604,6 +605,7 @@ static int symbols_resolve(struct object *obj) if (id->id) { pr_info("WARN: multiple IDs found for '%s': %d, %d - using %d\n", str, id->id, type_id, id->id); + warnings++; } else { id->id = type_id; (*nr)--; @@ -625,8 +627,10 @@ static int id_patch(struct object *obj, struct btf_id *id) int i; /* For set, set8, id->id may be 0 */ - if (!id->id && !id->is_set && !id->is_set8) + if (!id->id && !id->is_set && !id->is_set8) { pr_err("WARN: resolve_btfids: unresolved symbol %s\n", id->name); + warnings++; + } for (i = 0; i < id->addr_cnt; i++) { unsigned long addr = id->addr[i]; @@ -782,6 +786,7 @@ int main(int argc, const char **argv) .funcs = RB_ROOT, .sets = RB_ROOT, }; + bool fatal_warnings = false; struct option btfid_options[] = { OPT_INCR('v', "verbose", &verbose, "be more verbose (show errors, etc)"), @@ -789,6 +794,8 @@ int main(int argc, const char **argv) "BTF data"), OPT_STRING('b', "btf_base", &obj.base_btf_path, "file", "path of file providing base BTF"), + OPT_BOOLEAN(0, "fatal_warnings", &fatal_warnings, + "turn warnings into errors"), OPT_END() }; int err = -1; @@ -823,7 +830,8 @@ int main(int argc, const char **argv) if (symbols_patch(&obj)) goto out; - err = 0; + if (!(fatal_warnings && warnings)) + err = 0; out: if (obj.efile.elf) { elf_end(obj.efile.elf); -- cgit v1.2.3 From 3f23ee5590d9605dbde9a5e1d4b97637a4803329 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Mon, 9 Dec 2024 20:10:56 -0800 Subject: selftests/bpf: test for changing packet data from global functions Check if verifier is aware of packet pointers invalidation done in global functions. Based on a test shared by Nick Zavaritsky in [0]. [0] https://lore.kernel.org/bpf/0498CA22-5779-4767-9C0C-A9515CEA711F@gmail.com/ Suggested-by: Nick Zavaritsky Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20241210041100.1898468-5-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/verifier_sock.c | 28 +++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/verifier_sock.c b/tools/testing/selftests/bpf/progs/verifier_sock.c index d3e70e38e442..51826379a1aa 100644 --- a/tools/testing/selftests/bpf/progs/verifier_sock.c +++ b/tools/testing/selftests/bpf/progs/verifier_sock.c @@ -1037,4 +1037,32 @@ __naked void sock_create_read_src_port(void) : __clobber_all); } +__noinline +long skb_pull_data2(struct __sk_buff *sk, __u32 len) +{ + return bpf_skb_pull_data(sk, len); +} + +__noinline +long skb_pull_data1(struct __sk_buff *sk, __u32 len) +{ + return skb_pull_data2(sk, len); +} + +/* global function calls bpf_skb_pull_data(), which invalidates packet + * pointers established before global function call. + */ +SEC("tc") +__failure __msg("invalid mem access") +int invalidate_pkt_pointers_from_global_func(struct __sk_buff *sk) +{ + int *p = (void *)(long)sk->data; + + if ((void *)(p + 1) > (void *)(long)sk->data_end) + return TCX_DROP; + skb_pull_data1(sk, 0); + *p = 42; /* this is unsafe */ + return TCX_PASS; +} + char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From 89ff40890d8f12a7d7e93fb602cc27562f3834f0 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Mon, 9 Dec 2024 20:10:58 -0800 Subject: selftests/bpf: freplace tests for tracking of changes_packet_data Try different combinations of global functions replacement: - replace function that changes packet data with one that doesn't; - replace function that changes packet data with one that does; - replace function that doesn't change packet data with one that does; - replace function that doesn't change packet data with one that doesn't; Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20241210041100.1898468-7-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/changes_pkt_data.c | 76 ++++++++++++++++++++++ .../testing/selftests/bpf/progs/changes_pkt_data.c | 26 ++++++++ .../bpf/progs/changes_pkt_data_freplace.c | 18 +++++ 3 files changed, 120 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/changes_pkt_data.c create mode 100644 tools/testing/selftests/bpf/progs/changes_pkt_data.c create mode 100644 tools/testing/selftests/bpf/progs/changes_pkt_data_freplace.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/changes_pkt_data.c b/tools/testing/selftests/bpf/prog_tests/changes_pkt_data.c new file mode 100644 index 000000000000..c0c7202f6c5c --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/changes_pkt_data.c @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "bpf/libbpf.h" +#include "changes_pkt_data_freplace.skel.h" +#include "changes_pkt_data.skel.h" +#include + +static void print_verifier_log(const char *log) +{ + if (env.verbosity >= VERBOSE_VERY) + fprintf(stdout, "VERIFIER LOG:\n=============\n%s=============\n", log); +} + +static void test_aux(const char *main_prog_name, const char *freplace_prog_name, bool expect_load) +{ + struct changes_pkt_data_freplace *freplace = NULL; + struct bpf_program *freplace_prog = NULL; + LIBBPF_OPTS(bpf_object_open_opts, opts); + struct changes_pkt_data *main = NULL; + char log[16*1024]; + int err; + + opts.kernel_log_buf = log; + opts.kernel_log_size = sizeof(log); + if (env.verbosity >= VERBOSE_SUPER) + opts.kernel_log_level = 1 | 2 | 4; + main = changes_pkt_data__open_opts(&opts); + if (!ASSERT_OK_PTR(main, "changes_pkt_data__open")) + goto out; + err = changes_pkt_data__load(main); + print_verifier_log(log); + if (!ASSERT_OK(err, "changes_pkt_data__load")) + goto out; + freplace = changes_pkt_data_freplace__open_opts(&opts); + if (!ASSERT_OK_PTR(freplace, "changes_pkt_data_freplace__open")) + goto out; + freplace_prog = bpf_object__find_program_by_name(freplace->obj, freplace_prog_name); + if (!ASSERT_OK_PTR(freplace_prog, "freplace_prog")) + goto out; + bpf_program__set_autoload(freplace_prog, true); + bpf_program__set_autoattach(freplace_prog, true); + bpf_program__set_attach_target(freplace_prog, + bpf_program__fd(main->progs.dummy), + main_prog_name); + err = changes_pkt_data_freplace__load(freplace); + print_verifier_log(log); + if (expect_load) { + ASSERT_OK(err, "changes_pkt_data_freplace__load"); + } else { + ASSERT_ERR(err, "changes_pkt_data_freplace__load"); + ASSERT_HAS_SUBSTR(log, "Extension program changes packet data", "error log"); + } + +out: + changes_pkt_data_freplace__destroy(freplace); + changes_pkt_data__destroy(main); +} + +/* There are two global subprograms in both changes_pkt_data.skel.h: + * - one changes packet data; + * - another does not. + * It is ok to freplace subprograms that change packet data with those + * that either do or do not. It is only ok to freplace subprograms + * that do not change packet data with those that do not as well. + * The below tests check outcomes for each combination of such freplace. + */ +void test_changes_pkt_data_freplace(void) +{ + if (test__start_subtest("changes_with_changes")) + test_aux("changes_pkt_data", "changes_pkt_data", true); + if (test__start_subtest("changes_with_doesnt_change")) + test_aux("changes_pkt_data", "does_not_change_pkt_data", true); + if (test__start_subtest("doesnt_change_with_changes")) + test_aux("does_not_change_pkt_data", "changes_pkt_data", false); + if (test__start_subtest("doesnt_change_with_doesnt_change")) + test_aux("does_not_change_pkt_data", "does_not_change_pkt_data", true); +} diff --git a/tools/testing/selftests/bpf/progs/changes_pkt_data.c b/tools/testing/selftests/bpf/progs/changes_pkt_data.c new file mode 100644 index 000000000000..f87da8e9d6b3 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/changes_pkt_data.c @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include + +__noinline +long changes_pkt_data(struct __sk_buff *sk, __u32 len) +{ + return bpf_skb_pull_data(sk, len); +} + +__noinline __weak +long does_not_change_pkt_data(struct __sk_buff *sk, __u32 len) +{ + return 0; +} + +SEC("tc") +int dummy(struct __sk_buff *sk) +{ + changes_pkt_data(sk, 0); + does_not_change_pkt_data(sk, 0); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/changes_pkt_data_freplace.c b/tools/testing/selftests/bpf/progs/changes_pkt_data_freplace.c new file mode 100644 index 000000000000..0e525beb8603 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/changes_pkt_data_freplace.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include + +SEC("?freplace") +long changes_pkt_data(struct __sk_buff *sk, __u32 len) +{ + return bpf_skb_pull_data(sk, len); +} + +SEC("?freplace") +long does_not_change_pkt_data(struct __sk_buff *sk, __u32 len) +{ + return 0; +} + +char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From 1a4607ffba35bf2a630aab299e34dd3f6e658d70 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Mon, 9 Dec 2024 20:10:59 -0800 Subject: bpf: consider that tail calls invalidate packet pointers Tail-called programs could execute any of the helpers that invalidate packet pointers. Hence, conservatively assume that each tail call invalidates packet pointers. Making the change in bpf_helper_changes_pkt_data() automatically makes use of check_cfg() logic that computes 'changes_pkt_data' effect for global sub-programs, such that the following program could be rejected: int tail_call(struct __sk_buff *sk) { bpf_tail_call_static(sk, &jmp_table, 0); return 0; } SEC("tc") int not_safe(struct __sk_buff *sk) { int *p = (void *)(long)sk->data; ... make p valid ... tail_call(sk); *p = 42; /* this is unsafe */ ... } The tc_bpf2bpf.c:subprog_tc() needs change: mark it as a function that can invalidate packet pointers. Otherwise, it can't be freplaced with tailcall_freplace.c:entry_freplace() that does a tail call. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20241210041100.1898468-8-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/tc_bpf2bpf.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/tc_bpf2bpf.c b/tools/testing/selftests/bpf/progs/tc_bpf2bpf.c index d1a57f7d09bd..fe6249d99b31 100644 --- a/tools/testing/selftests/bpf/progs/tc_bpf2bpf.c +++ b/tools/testing/selftests/bpf/progs/tc_bpf2bpf.c @@ -11,6 +11,8 @@ int subprog_tc(struct __sk_buff *skb) __sink(skb); __sink(ret); + /* let verifier know that 'subprog_tc' can change pointers to skb->data */ + bpf_skb_change_proto(skb, 0, 0); return ret; } -- cgit v1.2.3 From d9706b56e13b7916461ca6b4b731e169ed44ed09 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Mon, 9 Dec 2024 20:11:00 -0800 Subject: selftests/bpf: validate that tail call invalidates packet pointers Add a test case with a tail call done from a global sub-program. Such tails calls should be considered as invalidating packet pointers. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20241210041100.1898468-9-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/verifier_sock.c | 28 +++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/verifier_sock.c b/tools/testing/selftests/bpf/progs/verifier_sock.c index 51826379a1aa..0d5e56dffabb 100644 --- a/tools/testing/selftests/bpf/progs/verifier_sock.c +++ b/tools/testing/selftests/bpf/progs/verifier_sock.c @@ -50,6 +50,13 @@ struct { __uint(map_flags, BPF_F_NO_PREALLOC); } sk_storage_map SEC(".maps"); +struct { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __uint(max_entries, 1); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); +} jmp_table SEC(".maps"); + SEC("cgroup/skb") __description("skb->sk: no NULL check") __failure __msg("invalid mem access 'sock_common_or_null'") @@ -1065,4 +1072,25 @@ int invalidate_pkt_pointers_from_global_func(struct __sk_buff *sk) return TCX_PASS; } +__noinline +int tail_call(struct __sk_buff *sk) +{ + bpf_tail_call_static(sk, &jmp_table, 0); + return 0; +} + +/* Tail calls invalidate packet pointers. */ +SEC("tc") +__failure __msg("invalid mem access") +int invalidate_pkt_pointers_by_tail_call(struct __sk_buff *sk) +{ + int *p = (void *)(long)sk->data; + + if ((void *)(p + 1) > (void *)(long)sk->data_end) + return TCX_DROP; + tail_call(sk); + *p = 42; /* this is unsafe */ + return TCX_PASS; +} + char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From b8f614207b0d5e4abd6df8d5cb3cc11f009d1d93 Mon Sep 17 00:00:00 2001 From: David Vernet Date: Mon, 9 Dec 2024 09:29:24 -0600 Subject: scx: Fix maximal BPF selftest prog maximal.bpf.c is still dispatching to and consuming from SCX_DSQ_GLOBAL. Let's have it use its own DSQ to avoid any runtime errors. Signed-off-by: David Vernet Tested-by: Andrea Righi Signed-off-by: Tejun Heo --- tools/testing/selftests/sched_ext/maximal.bpf.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/sched_ext/maximal.bpf.c b/tools/testing/selftests/sched_ext/maximal.bpf.c index 4c005fa71810..430f5e13bf55 100644 --- a/tools/testing/selftests/sched_ext/maximal.bpf.c +++ b/tools/testing/selftests/sched_ext/maximal.bpf.c @@ -12,6 +12,8 @@ char _license[] SEC("license") = "GPL"; +#define DSQ_ID 0 + s32 BPF_STRUCT_OPS(maximal_select_cpu, struct task_struct *p, s32 prev_cpu, u64 wake_flags) { @@ -20,7 +22,7 @@ s32 BPF_STRUCT_OPS(maximal_select_cpu, struct task_struct *p, s32 prev_cpu, void BPF_STRUCT_OPS(maximal_enqueue, struct task_struct *p, u64 enq_flags) { - scx_bpf_dsq_insert(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags); + scx_bpf_dsq_insert(p, DSQ_ID, SCX_SLICE_DFL, enq_flags); } void BPF_STRUCT_OPS(maximal_dequeue, struct task_struct *p, u64 deq_flags) @@ -28,7 +30,7 @@ void BPF_STRUCT_OPS(maximal_dequeue, struct task_struct *p, u64 deq_flags) void BPF_STRUCT_OPS(maximal_dispatch, s32 cpu, struct task_struct *prev) { - scx_bpf_dsq_move_to_local(SCX_DSQ_GLOBAL); + scx_bpf_dsq_move_to_local(DSQ_ID); } void BPF_STRUCT_OPS(maximal_runnable, struct task_struct *p, u64 enq_flags) @@ -123,7 +125,7 @@ void BPF_STRUCT_OPS(maximal_cgroup_set_weight, struct cgroup *cgrp, u32 weight) s32 BPF_STRUCT_OPS_SLEEPABLE(maximal_init) { - return 0; + return scx_bpf_create_dsq(DSQ_ID, -1); } void BPF_STRUCT_OPS(maximal_exit, struct scx_exit_info *info) -- cgit v1.2.3 From 9b496a8bbed9cc292b0dfd796f38ec58b6d0375f Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Thu, 5 Dec 2024 14:51:01 -0500 Subject: cgroup/cpuset: Prevent leakage of isolated CPUs into sched domains Isolated CPUs are not allowed to be used in a non-isolated partition. The only exception is the top cpuset which is allowed to contain boot time isolated CPUs. Commit ccac8e8de99c ("cgroup/cpuset: Fix remote root partition creation problem") introduces a simplified scheme of including only partition roots in sched domain generation. However, it does not properly account for this exception case. This can result in leakage of isolated CPUs into a sched domain. Fix it by making sure that isolated CPUs are excluded from the top cpuset before generating sched domains. Also update the way the boot time isolated CPUs are handled in test_cpuset_prs.sh to make sure that those isolated CPUs are really isolated instead of just skipping them in the tests. Fixes: ccac8e8de99c ("cgroup/cpuset: Fix remote root partition creation problem") Signed-off-by: Waiman Long Signed-off-by: Tejun Heo --- tools/testing/selftests/cgroup/test_cpuset_prs.sh | 33 +++++++++++++---------- 1 file changed, 19 insertions(+), 14 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/cgroup/test_cpuset_prs.sh b/tools/testing/selftests/cgroup/test_cpuset_prs.sh index 03c1bdaed2c3..400a696a0d21 100755 --- a/tools/testing/selftests/cgroup/test_cpuset_prs.sh +++ b/tools/testing/selftests/cgroup/test_cpuset_prs.sh @@ -86,15 +86,15 @@ echo "" > test/cpuset.cpus # # If isolated CPUs have been reserved at boot time (as shown in -# cpuset.cpus.isolated), these isolated CPUs should be outside of CPUs 0-7 +# cpuset.cpus.isolated), these isolated CPUs should be outside of CPUs 0-8 # that will be used by this script for testing purpose. If not, some of -# the tests may fail incorrectly. These isolated CPUs will also be removed -# before being compared with the expected results. +# the tests may fail incorrectly. These pre-isolated CPUs should stay in +# an isolated state throughout the testing process for now. # BOOT_ISOLCPUS=$(cat $CGROUP2/cpuset.cpus.isolated) if [[ -n "$BOOT_ISOLCPUS" ]] then - [[ $(echo $BOOT_ISOLCPUS | sed -e "s/[,-].*//") -le 7 ]] && + [[ $(echo $BOOT_ISOLCPUS | sed -e "s/[,-].*//") -le 8 ]] && skip_test "Pre-isolated CPUs ($BOOT_ISOLCPUS) overlap CPUs to be tested" echo "Pre-isolated CPUs: $BOOT_ISOLCPUS" fi @@ -683,15 +683,19 @@ check_isolcpus() EXPECT_VAL2=$EXPECT_VAL fi + # + # Appending pre-isolated CPUs + # Even though CPU #8 isn't used for testing, it can't be pre-isolated + # to make appending those CPUs easier. + # + [[ -n "$BOOT_ISOLCPUS" ]] && { + EXPECT_VAL=${EXPECT_VAL:+${EXPECT_VAL},}${BOOT_ISOLCPUS} + EXPECT_VAL2=${EXPECT_VAL2:+${EXPECT_VAL2},}${BOOT_ISOLCPUS} + } + # # Check cpuset.cpus.isolated cpumask # - if [[ -z "$BOOT_ISOLCPUS" ]] - then - ISOLCPUS=$(cat $ISCPUS) - else - ISOLCPUS=$(cat $ISCPUS | sed -e "s/,*$BOOT_ISOLCPUS//") - fi [[ "$EXPECT_VAL2" != "$ISOLCPUS" ]] && { # Take a 50ms pause and try again pause 0.05 @@ -731,8 +735,6 @@ check_isolcpus() fi done [[ "$ISOLCPUS" = *- ]] && ISOLCPUS=${ISOLCPUS}$LASTISOLCPU - [[ -n "BOOT_ISOLCPUS" ]] && - ISOLCPUS=$(echo $ISOLCPUS | sed -e "s/,*$BOOT_ISOLCPUS//") [[ "$EXPECT_VAL" = "$ISOLCPUS" ]] } @@ -836,8 +838,11 @@ run_state_test() # if available [[ -n "$ICPUS" ]] && { check_isolcpus $ICPUS - [[ $? -ne 0 ]] && test_fail $I "isolated CPU" \ - "Expect $ICPUS, get $ISOLCPUS instead" + [[ $? -ne 0 ]] && { + [[ -n "$BOOT_ISOLCPUS" ]] && ICPUS=${ICPUS},${BOOT_ISOLCPUS} + test_fail $I "isolated CPU" \ + "Expect $ICPUS, get $ISOLCPUS instead" + } } reset_cgroup_states # -- cgit v1.2.3 From a3b16198d3df38aa2fc6de167b919ecb3fae74a6 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 11 Dec 2024 01:35:40 +0200 Subject: selftests: forwarding: add a pvid_change test to bridge_vlan_unaware Historically, DSA drivers have seen problems with the model in which bridge VLANs work, particularly with them being offloaded to switchdev asynchronously relative to when they become active (vlan_filtering=1). This switchdev API peculiarity was papered over by commit 2ea7a679ca2a ("net: dsa: Don't add vlans when vlan filtering is disabled"), which introduced other problems, fixed by commit 54a0ed0df496 ("net: dsa: provide an option for drivers to always receive bridge VLANs") through an opt-in ds->configure_vlan_while_not_filtering bool (which later became an opt-out). The point is that some DSA drivers still skip VLAN configuration while VLAN-unaware, and there is a desire to get rid of that behavior. It's hard to deduce from the wording "at least one corner case" what Andrew saw, but my best guess is that there is a discrepancy of meaning between bridge pvid and hardware port pvid which caused breakage. On one side, the Linux bridge with vlan_filtering=0 is completely VLAN-unaware, and will accept and process a packet the same way irrespective of the VLAN groups on the ports or the bridge itself (there may not even be a pvid, and this makes no difference). On the other hand, DSA switches still do VLAN processing internally, even with vlan_filtering disabled, but they are expected to classify all packets to the port pvid. That pvid shouldn't be confused with the bridge pvid, and there lies the problem. When a switch port is under a VLAN-unaware bridge, the hardware pvid must be explicitly managed by the driver to classify all received packets to it, regardless of bridge VLAN groups. When under a VLAN-aware bridge, the hardware pvid must be synchronized to the bridge port pvid. To do this correctly, the pattern is unfortunately a bit complicated, and involves hooking the pvid change logic into quite a few places (the ones that change the input variables which determine the value to use as hardware pvid for a port). See mv88e6xxx_port_commit_pvid(), sja1105_commit_pvid(), ocelot_port_set_pvid() etc. The point is that not all drivers used to do that, especially in older kernels. If a driver is to blindly program a bridge pvid VLAN received from switchdev while it's VLAN-unaware, this might in turn change the hardware pvid used by a VLAN-unaware bridge port, which might result in packet loss depending which other ports have that pvid too (in that same note, it might also go unnoticed). To capture that condition, it is sufficient to take a VLAN-unaware bridge and change the [VLAN-aware] bridge pvid on a single port, to a VID that isn't present on any other port. This shouldn't have absolutely any effect on packet classification or forwarding. However, broken drivers will take the bait, and change their PVID to 3, causing packet loss. Signed-off-by: Vladimir Oltean Tested-by: Ido Schimmel Link: https://patch.msgid.link/20241210233541.1401837-1-vladimir.oltean@nxp.com Signed-off-by: Paolo Abeni --- .../net/forwarding/bridge_vlan_unaware.sh | 25 +++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh index 1c8a26046589..2b5700b61ffa 100755 --- a/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh +++ b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh @@ -1,7 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding" +ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding pvid_change" NUM_NETIFS=4 source lib.sh @@ -77,12 +77,16 @@ cleanup() ping_ipv4() { - ping_test $h1 192.0.2.2 + local msg=$1 + + ping_test $h1 192.0.2.2 "$msg" } ping_ipv6() { - ping6_test $h1 2001:db8:1::2 + local msg=$1 + + ping6_test $h1 2001:db8:1::2 "$msg" } learning() @@ -95,6 +99,21 @@ flooding() flood_test $swp2 $h1 $h2 } +pvid_change() +{ + # Test that the changing of the VLAN-aware PVID does not affect + # VLAN-unaware forwarding + bridge vlan add vid 3 dev $swp1 pvid untagged + + ping_ipv4 " with bridge port $swp1 PVID changed" + ping_ipv6 " with bridge port $swp1 PVID changed" + + bridge vlan del vid 3 dev $swp1 + + ping_ipv4 " with bridge port $swp1 PVID deleted" + ping_ipv6 " with bridge port $swp1 PVID deleted" +} + trap cleanup EXIT setup_prepare -- cgit v1.2.3 From b9fee10a52c0999f6f1c7e1c0ea83869f3cd10ae Mon Sep 17 00:00:00 2001 From: Simone Magnani Date: Mon, 9 Dec 2024 15:54:39 +0100 Subject: bpftool: Probe for ISA v4 instruction set extension This patch introduces a new probe to check whether the kernel supports instruction set extensions v4. The v4 extension comprises several new instructions: BPF_{SDIV,SMOD} (signed div and mod), BPF_{LD,LDX,ST,STX,MOV} (sign-extended load/store/move), 32-bit BPF_JA (unconditional jump), target-independent BPF_ALU64 BSWAP (byte-swapping 16/32/64). These have been introduced in the following commits respectively: * ec0e2da95f72 ("bpf: Support new signed div/mod instructions.") * 1f9a1ea821ff ("bpf: Support new sign-extension load insns") * 8100928c8814 ("bpf: Support new sign-extension mov insns") * 4cd58e9af8b9 ("bpf: Support new 32bit offset jmp instruction") * 0845c3db7bf5 ("bpf: Support new unconditional bswap instruction") Support in bpftool for previous ISA extensions was added in commit 0fd800b2456c ("bpftool: Probe for instruction set extensions"). These probes are useful for userspace BPF projects that want to use newer instruction set extensions on newer kernels, to reduce the programs' sizes or their complexity. LLVM provides the mcpu=v4 option since LLVM commit 8f28e8069c4b ("[BPF] support for BPF_ST instruction in codegen") [0]. Signed-off-by: Simone Magnani Signed-off-by: Daniel Borkmann Reviewed-by: Quentin Monnet Link: https://github.com/llvm/llvm-project/commit/8f28e8069c4ba1110daee8bddc4d5049b6d4646e [0] Link: https://lore.kernel.org/bpf/20241209145439.336362-1-simone.magnani@isovalent.com --- tools/bpf/bpftool/feature.c | 23 +++++++++++++++++++++++ tools/include/linux/filter.h | 10 ++++++++++ 2 files changed, 33 insertions(+) (limited to 'tools') diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c index 4dbc4fcdf473..24fecdf8e430 100644 --- a/tools/bpf/bpftool/feature.c +++ b/tools/bpf/bpftool/feature.c @@ -885,6 +885,28 @@ probe_v3_isa_extension(const char *define_prefix, __u32 ifindex) "V3_ISA_EXTENSION"); } +/* + * Probe for the v4 instruction set extension introduced in commit 1f9a1ea821ff + * ("bpf: Support new sign-extension load insns"). + */ +static void +probe_v4_isa_extension(const char *define_prefix, __u32 ifindex) +{ + struct bpf_insn insns[5] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP32_IMM(BPF_JEQ, BPF_REG_0, 1, 1), + BPF_JMP32_A(1), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN() + }; + + probe_misc_feature(insns, ARRAY_SIZE(insns), + define_prefix, ifindex, + "have_v4_isa_extension", + "ISA extension v4", + "V4_ISA_EXTENSION"); +} + static void section_system_config(enum probe_component target, const char *define_prefix) { @@ -1029,6 +1051,7 @@ static void section_misc(const char *define_prefix, __u32 ifindex) probe_bounded_loops(define_prefix, ifindex); probe_v2_isa_extension(define_prefix, ifindex); probe_v3_isa_extension(define_prefix, ifindex); + probe_v4_isa_extension(define_prefix, ifindex); print_end_section(); } diff --git a/tools/include/linux/filter.h b/tools/include/linux/filter.h index 65aa8ce142e5..bcc6df79301a 100644 --- a/tools/include/linux/filter.h +++ b/tools/include/linux/filter.h @@ -273,6 +273,16 @@ .off = OFF, \ .imm = 0 }) +/* Unconditional jumps, gotol pc + imm32 */ + +#define BPF_JMP32_A(IMM) \ + ((struct bpf_insn) { \ + .code = BPF_JMP32 | BPF_JA, \ + .dst_reg = 0, \ + .src_reg = 0, \ + .off = 0, \ + .imm = IMM }) + /* Function call */ #define BPF_EMIT_CALL(FUNC) \ -- cgit v1.2.3 From 04789af756a4a43e72986185f66f148e65b32fed Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Wed, 11 Dec 2024 23:07:11 -0800 Subject: selftests/bpf: extend changes_pkt_data with cases w/o subprograms Extend changes_pkt_data tests with test cases freplacing the main program that does not have subprograms. Try four combinations when both main program and replacement do and do not change packet data. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20241212070711.427443-2-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/changes_pkt_data.c | 55 +++++++++++++++++----- .../testing/selftests/bpf/progs/changes_pkt_data.c | 27 ++++++++--- .../bpf/progs/changes_pkt_data_freplace.c | 6 +-- 3 files changed, 66 insertions(+), 22 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/changes_pkt_data.c b/tools/testing/selftests/bpf/prog_tests/changes_pkt_data.c index c0c7202f6c5c..7526de379081 100644 --- a/tools/testing/selftests/bpf/prog_tests/changes_pkt_data.c +++ b/tools/testing/selftests/bpf/prog_tests/changes_pkt_data.c @@ -10,10 +10,14 @@ static void print_verifier_log(const char *log) fprintf(stdout, "VERIFIER LOG:\n=============\n%s=============\n", log); } -static void test_aux(const char *main_prog_name, const char *freplace_prog_name, bool expect_load) +static void test_aux(const char *main_prog_name, + const char *to_be_replaced, + const char *replacement, + bool expect_load) { struct changes_pkt_data_freplace *freplace = NULL; struct bpf_program *freplace_prog = NULL; + struct bpf_program *main_prog = NULL; LIBBPF_OPTS(bpf_object_open_opts, opts); struct changes_pkt_data *main = NULL; char log[16*1024]; @@ -26,6 +30,10 @@ static void test_aux(const char *main_prog_name, const char *freplace_prog_name, main = changes_pkt_data__open_opts(&opts); if (!ASSERT_OK_PTR(main, "changes_pkt_data__open")) goto out; + main_prog = bpf_object__find_program_by_name(main->obj, main_prog_name); + if (!ASSERT_OK_PTR(main_prog, "main_prog")) + goto out; + bpf_program__set_autoload(main_prog, true); err = changes_pkt_data__load(main); print_verifier_log(log); if (!ASSERT_OK(err, "changes_pkt_data__load")) @@ -33,14 +41,14 @@ static void test_aux(const char *main_prog_name, const char *freplace_prog_name, freplace = changes_pkt_data_freplace__open_opts(&opts); if (!ASSERT_OK_PTR(freplace, "changes_pkt_data_freplace__open")) goto out; - freplace_prog = bpf_object__find_program_by_name(freplace->obj, freplace_prog_name); + freplace_prog = bpf_object__find_program_by_name(freplace->obj, replacement); if (!ASSERT_OK_PTR(freplace_prog, "freplace_prog")) goto out; bpf_program__set_autoload(freplace_prog, true); bpf_program__set_autoattach(freplace_prog, true); bpf_program__set_attach_target(freplace_prog, - bpf_program__fd(main->progs.dummy), - main_prog_name); + bpf_program__fd(main_prog), + to_be_replaced); err = changes_pkt_data_freplace__load(freplace); print_verifier_log(log); if (expect_load) { @@ -62,15 +70,38 @@ out: * that either do or do not. It is only ok to freplace subprograms * that do not change packet data with those that do not as well. * The below tests check outcomes for each combination of such freplace. + * Also test a case when main subprogram itself is replaced and is a single + * subprogram in a program. */ void test_changes_pkt_data_freplace(void) { - if (test__start_subtest("changes_with_changes")) - test_aux("changes_pkt_data", "changes_pkt_data", true); - if (test__start_subtest("changes_with_doesnt_change")) - test_aux("changes_pkt_data", "does_not_change_pkt_data", true); - if (test__start_subtest("doesnt_change_with_changes")) - test_aux("does_not_change_pkt_data", "changes_pkt_data", false); - if (test__start_subtest("doesnt_change_with_doesnt_change")) - test_aux("does_not_change_pkt_data", "does_not_change_pkt_data", true); + struct { + const char *main; + const char *to_be_replaced; + bool changes; + } mains[] = { + { "main_with_subprogs", "changes_pkt_data", true }, + { "main_with_subprogs", "does_not_change_pkt_data", false }, + { "main_changes", "main_changes", true }, + { "main_does_not_change", "main_does_not_change", false }, + }; + struct { + const char *func; + bool changes; + } replacements[] = { + { "changes_pkt_data", true }, + { "does_not_change_pkt_data", false } + }; + char buf[64]; + + for (int i = 0; i < ARRAY_SIZE(mains); ++i) { + for (int j = 0; j < ARRAY_SIZE(replacements); ++j) { + snprintf(buf, sizeof(buf), "%s_with_%s", + mains[i].to_be_replaced, replacements[j].func); + if (!test__start_subtest(buf)) + continue; + test_aux(mains[i].main, mains[i].to_be_replaced, replacements[j].func, + mains[i].changes || !replacements[j].changes); + } + } } diff --git a/tools/testing/selftests/bpf/progs/changes_pkt_data.c b/tools/testing/selftests/bpf/progs/changes_pkt_data.c index f87da8e9d6b3..43cada48b28a 100644 --- a/tools/testing/selftests/bpf/progs/changes_pkt_data.c +++ b/tools/testing/selftests/bpf/progs/changes_pkt_data.c @@ -4,22 +4,35 @@ #include __noinline -long changes_pkt_data(struct __sk_buff *sk, __u32 len) +long changes_pkt_data(struct __sk_buff *sk) { - return bpf_skb_pull_data(sk, len); + return bpf_skb_pull_data(sk, 0); } __noinline __weak -long does_not_change_pkt_data(struct __sk_buff *sk, __u32 len) +long does_not_change_pkt_data(struct __sk_buff *sk) { return 0; } -SEC("tc") -int dummy(struct __sk_buff *sk) +SEC("?tc") +int main_with_subprogs(struct __sk_buff *sk) +{ + changes_pkt_data(sk); + does_not_change_pkt_data(sk); + return 0; +} + +SEC("?tc") +int main_changes(struct __sk_buff *sk) +{ + bpf_skb_pull_data(sk, 0); + return 0; +} + +SEC("?tc") +int main_does_not_change(struct __sk_buff *sk) { - changes_pkt_data(sk, 0); - does_not_change_pkt_data(sk, 0); return 0; } diff --git a/tools/testing/selftests/bpf/progs/changes_pkt_data_freplace.c b/tools/testing/selftests/bpf/progs/changes_pkt_data_freplace.c index 0e525beb8603..f9a622705f1b 100644 --- a/tools/testing/selftests/bpf/progs/changes_pkt_data_freplace.c +++ b/tools/testing/selftests/bpf/progs/changes_pkt_data_freplace.c @@ -4,13 +4,13 @@ #include SEC("?freplace") -long changes_pkt_data(struct __sk_buff *sk, __u32 len) +long changes_pkt_data(struct __sk_buff *sk) { - return bpf_skb_pull_data(sk, len); + return bpf_skb_pull_data(sk, 0); } SEC("?freplace") -long does_not_change_pkt_data(struct __sk_buff *sk, __u32 len) +long does_not_change_pkt_data(struct __sk_buff *sk) { return 0; } -- cgit v1.2.3 From 659b9ba7cb2d7adb64618b87ddfaa528a143766e Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Thu, 12 Dec 2024 01:20:49 -0800 Subject: bpf: Check size for BTF-based ctx access of pointer members Robert Morris reported the following program type which passes the verifier in [0]: SEC("struct_ops/bpf_cubic_init") void BPF_PROG(bpf_cubic_init, struct sock *sk) { asm volatile("r2 = *(u16*)(r1 + 0)"); // verifier should demand u64 asm volatile("*(u32 *)(r2 +1504) = 0"); // 1280 in some configs } The second line may or may not work, but the first instruction shouldn't pass, as it's a narrow load into the context structure of the struct ops callback. The code falls back to btf_ctx_access to ensure correctness and obtaining the types of pointers. Ensure that the size of the access is correctly checked to be 8 bytes, otherwise the verifier thinks the narrow load obtained a trusted BTF pointer and will permit loads/stores as it sees fit. Perform the check on size after we've verified that the load is for a pointer field, as for scalar values narrow loads are fine. Access to structs passed as arguments to a BPF program are also treated as scalars, therefore no adjustment is needed in their case. Existing verifier selftests are broken by this change, but because they were incorrect. Verifier tests for d_path were performing narrow load into context to obtain path pointer, had this program actually run it would cause a crash. The same holds for verifier_btf_ctx_access tests. [0]: https://lore.kernel.org/bpf/51338.1732985814@localhost Fixes: 9e15db66136a ("bpf: Implement accurate raw_tp context access via BTF") Reported-by: Robert Morris Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20241212092050.3204165-2-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c | 4 ++-- tools/testing/selftests/bpf/progs/verifier_d_path.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c b/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c index a570e48b917a..bfc3bf18fed4 100644 --- a/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c +++ b/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c @@ -11,7 +11,7 @@ __success __retval(0) __naked void btf_ctx_access_accept(void) { asm volatile (" \ - r2 = *(u32*)(r1 + 8); /* load 2nd argument value (int pointer) */\ + r2 = *(u64 *)(r1 + 8); /* load 2nd argument value (int pointer) */\ r0 = 0; \ exit; \ " ::: __clobber_all); @@ -23,7 +23,7 @@ __success __retval(0) __naked void ctx_access_u32_pointer_accept(void) { asm volatile (" \ - r2 = *(u32*)(r1 + 0); /* load 1nd argument value (u32 pointer) */\ + r2 = *(u64 *)(r1 + 0); /* load 1nd argument value (u32 pointer) */\ r0 = 0; \ exit; \ " ::: __clobber_all); diff --git a/tools/testing/selftests/bpf/progs/verifier_d_path.c b/tools/testing/selftests/bpf/progs/verifier_d_path.c index ec79cbcfde91..87e51a215558 100644 --- a/tools/testing/selftests/bpf/progs/verifier_d_path.c +++ b/tools/testing/selftests/bpf/progs/verifier_d_path.c @@ -11,7 +11,7 @@ __success __retval(0) __naked void d_path_accept(void) { asm volatile (" \ - r1 = *(u32*)(r1 + 0); \ + r1 = *(u64 *)(r1 + 0); \ r2 = r10; \ r2 += -8; \ r6 = 0; \ @@ -31,7 +31,7 @@ __failure __msg("helper call is not allowed in probe") __naked void d_path_reject(void) { asm volatile (" \ - r1 = *(u32*)(r1 + 0); \ + r1 = *(u64 *)(r1 + 0); \ r2 = r10; \ r2 += -8; \ r6 = 0; \ -- cgit v1.2.3 From 8025731c28beb4700dc801a1ca4504d1f78bac27 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Thu, 12 Dec 2024 01:20:50 -0800 Subject: selftests/bpf: Add test for narrow ctx load for pointer args Ensure that performing narrow ctx loads other than size == 8 are rejected when the argument is a pointer type. Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20241212092050.3204165-3-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/progs/verifier_btf_ctx_access.c | 36 ++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c b/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c index bfc3bf18fed4..28b939572cda 100644 --- a/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c +++ b/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c @@ -29,4 +29,40 @@ __naked void ctx_access_u32_pointer_accept(void) " ::: __clobber_all); } +SEC("fentry/bpf_fentry_test9") +__description("btf_ctx_access u32 pointer reject u32") +__failure __msg("size 4 must be 8") +__naked void ctx_access_u32_pointer_reject_32(void) +{ + asm volatile (" \ + r2 = *(u32 *)(r1 + 0); /* load 1st argument with narrow load */\ + r0 = 0; \ + exit; \ +" ::: __clobber_all); +} + +SEC("fentry/bpf_fentry_test9") +__description("btf_ctx_access u32 pointer reject u16") +__failure __msg("size 2 must be 8") +__naked void ctx_access_u32_pointer_reject_16(void) +{ + asm volatile (" \ + r2 = *(u16 *)(r1 + 0); /* load 1st argument with narrow load */\ + r0 = 0; \ + exit; \ +" ::: __clobber_all); +} + +SEC("fentry/bpf_fentry_test9") +__description("btf_ctx_access u32 pointer reject u8") +__failure __msg("size 1 must be 8") +__naked void ctx_access_u32_pointer_reject_8(void) +{ + asm volatile (" \ + r2 = *(u8 *)(r1 + 0); /* load 1st argument with narrow load */\ + r0 = 0; \ + exit; \ +" ::: __clobber_all); +} + char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From b641712925bfe89ff7217cc2d0b7a8e042df556b Mon Sep 17 00:00:00 2001 From: Alastair Robertson Date: Wed, 11 Dec 2024 08:40:29 -0800 Subject: libbpf: Pull file-opening logic up to top-level functions Move the filename arguments and file-descriptor handling from init_output_elf() and linker_load_obj_file() and instead handle them at the top-level in bpf_linker__new() and bpf_linker__add_file(). This will allow the inner functions to be shared with a new, non-filename-based, API in the next commit. Signed-off-by: Alastair Robertson Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20241211164030.573042-2-ajor@meta.com --- tools/lib/bpf/linker.c | 84 +++++++++++++++++++++++++------------------------- 1 file changed, 42 insertions(+), 42 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c index e56ba6e67451..c49e94506d9c 100644 --- a/tools/lib/bpf/linker.c +++ b/tools/lib/bpf/linker.c @@ -157,10 +157,9 @@ struct bpf_linker { #define pr_warn_elf(fmt, ...) \ libbpf_print(LIBBPF_WARN, "libbpf: " fmt ": %s\n", ##__VA_ARGS__, elf_errmsg(-1)) -static int init_output_elf(struct bpf_linker *linker, const char *file); +static int init_output_elf(struct bpf_linker *linker); -static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, - const struct bpf_linker_file_opts *opts, +static int linker_load_obj_file(struct bpf_linker *linker, struct src_obj *obj); static int linker_sanity_check_elf(struct src_obj *obj); static int linker_sanity_check_elf_symtab(struct src_obj *obj, struct src_sec *sec); @@ -233,9 +232,20 @@ struct bpf_linker *bpf_linker__new(const char *filename, struct bpf_linker_opts if (!linker) return errno = ENOMEM, NULL; - linker->fd = -1; + linker->filename = strdup(filename); + if (!linker->filename) { + err = -ENOMEM; + goto err_out; + } + + linker->fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0644); + if (linker->fd < 0) { + err = -errno; + pr_warn("failed to create '%s': %d\n", filename, err); + goto err_out; + } - err = init_output_elf(linker, filename); + err = init_output_elf(linker); if (err) goto err_out; @@ -294,23 +304,12 @@ static Elf64_Sym *add_new_sym(struct bpf_linker *linker, size_t *sym_idx) return sym; } -static int init_output_elf(struct bpf_linker *linker, const char *file) +static int init_output_elf(struct bpf_linker *linker) { int err, str_off; Elf64_Sym *init_sym; struct dst_sec *sec; - linker->filename = strdup(file); - if (!linker->filename) - return -ENOMEM; - - linker->fd = open(file, O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0644); - if (linker->fd < 0) { - err = -errno; - pr_warn("failed to create '%s': %s\n", file, errstr(err)); - return err; - } - linker->elf = elf_begin(linker->fd, ELF_C_WRITE, NULL); if (!linker->elf) { pr_warn_elf("failed to create ELF object"); @@ -440,7 +439,7 @@ int bpf_linker__add_file(struct bpf_linker *linker, const char *filename, const struct bpf_linker_file_opts *opts) { struct src_obj obj = {}; - int err = 0; + int err = 0, fd; if (!OPTS_VALID(opts, bpf_linker_file_opts)) return libbpf_err(-EINVAL); @@ -448,7 +447,17 @@ int bpf_linker__add_file(struct bpf_linker *linker, const char *filename, if (!linker->elf) return libbpf_err(-EINVAL); - err = err ?: linker_load_obj_file(linker, filename, opts, &obj); + fd = open(filename, O_RDONLY | O_CLOEXEC); + if (fd < 0) { + err = -errno; + pr_warn("failed to open file '%s': %s\n", filename, errstr(err)); + return libbpf_err(err); + } + + obj.filename = filename; + obj.fd = fd; + + err = err ?: linker_load_obj_file(linker, &obj); err = err ?: linker_append_sec_data(linker, &obj); err = err ?: linker_append_elf_syms(linker, &obj); err = err ?: linker_append_elf_relos(linker, &obj); @@ -534,8 +543,7 @@ static struct src_sec *add_src_sec(struct src_obj *obj, const char *sec_name) return sec; } -static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, - const struct bpf_linker_file_opts *opts, +static int linker_load_obj_file(struct bpf_linker *linker, struct src_obj *obj) { int err = 0; @@ -554,26 +562,18 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, #error "Unknown __BYTE_ORDER__" #endif - pr_debug("linker: adding object file '%s'...\n", filename); - - obj->filename = filename; + pr_debug("linker: adding object file '%s'...\n", obj->filename); - obj->fd = open(filename, O_RDONLY | O_CLOEXEC); - if (obj->fd < 0) { - err = -errno; - pr_warn("failed to open file '%s': %s\n", filename, errstr(err)); - return err; - } obj->elf = elf_begin(obj->fd, ELF_C_READ_MMAP, NULL); if (!obj->elf) { - pr_warn_elf("failed to parse ELF file '%s'", filename); + pr_warn_elf("failed to parse ELF file '%s'", obj->filename); return -EINVAL; } /* Sanity check ELF file high-level properties */ ehdr = elf64_getehdr(obj->elf); if (!ehdr) { - pr_warn_elf("failed to get ELF header for %s", filename); + pr_warn_elf("failed to get ELF header for %s", obj->filename); return -EINVAL; } @@ -581,7 +581,7 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, obj_byteorder = ehdr->e_ident[EI_DATA]; if (obj_byteorder != ELFDATA2LSB && obj_byteorder != ELFDATA2MSB) { err = -EOPNOTSUPP; - pr_warn("unknown byte order of ELF file %s\n", filename); + pr_warn("unknown byte order of ELF file %s\n", obj->filename); return err; } if (link_byteorder == ELFDATANONE) { @@ -591,7 +591,7 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, obj_byteorder == ELFDATA2MSB ? "big" : "little"); } else if (link_byteorder != obj_byteorder) { err = -EOPNOTSUPP; - pr_warn("byte order mismatch with ELF file %s\n", filename); + pr_warn("byte order mismatch with ELF file %s\n", obj->filename); return err; } @@ -599,12 +599,12 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, || ehdr->e_machine != EM_BPF || ehdr->e_ident[EI_CLASS] != ELFCLASS64) { err = -EOPNOTSUPP; - pr_warn_elf("unsupported kind of ELF file %s", filename); + pr_warn_elf("unsupported kind of ELF file %s", obj->filename); return err; } if (elf_getshdrstrndx(obj->elf, &obj->shstrs_sec_idx)) { - pr_warn_elf("failed to get SHSTRTAB section index for %s", filename); + pr_warn_elf("failed to get SHSTRTAB section index for %s", obj->filename); return -EINVAL; } @@ -616,21 +616,21 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, shdr = elf64_getshdr(scn); if (!shdr) { pr_warn_elf("failed to get section #%zu header for %s", - sec_idx, filename); + sec_idx, obj->filename); return -EINVAL; } sec_name = elf_strptr(obj->elf, obj->shstrs_sec_idx, shdr->sh_name); if (!sec_name) { pr_warn_elf("failed to get section #%zu name for %s", - sec_idx, filename); + sec_idx, obj->filename); return -EINVAL; } data = elf_getdata(scn, 0); if (!data) { pr_warn_elf("failed to get section #%zu (%s) data from %s", - sec_idx, sec_name, filename); + sec_idx, sec_name, obj->filename); return -EINVAL; } @@ -666,7 +666,7 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, err = libbpf_get_error(obj->btf); if (err) { pr_warn("failed to parse .BTF from %s: %s\n", - filename, errstr(err)); + obj->filename, errstr(err)); return err; } sec->skipped = true; @@ -677,7 +677,7 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, err = libbpf_get_error(obj->btf_ext); if (err) { pr_warn("failed to parse .BTF.ext from '%s': %s\n", - filename, errstr(err)); + obj->filename, errstr(err)); return err; } sec->skipped = true; @@ -694,7 +694,7 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, break; default: pr_warn("unrecognized section #%zu (%s) in %s\n", - sec_idx, sec_name, filename); + sec_idx, sec_name, obj->filename); err = -EINVAL; return err; } -- cgit v1.2.3 From 6d5e5e5d7ce134a0b334c3bfe44a9326d8c5f32b Mon Sep 17 00:00:00 2001 From: Alastair Robertson Date: Wed, 11 Dec 2024 08:40:30 -0800 Subject: libbpf: Extend linker API to support in-memory ELF files The new_fd and add_fd functions correspond to the original new and add_file functions, but accept an FD instead of a file name. This gives API consumers the option of using anonymous files/memfds to avoid writing ELFs to disk. This new API will be useful for performing linking as part of bpftrace's JIT compilation. The add_buf function is a convenience wrapper that does the work of creating a memfd for the caller. Signed-off-by: Alastair Robertson Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20241211164030.573042-3-ajor@meta.com --- tools/lib/bpf/libbpf.h | 5 ++ tools/lib/bpf/libbpf.map | 4 ++ tools/lib/bpf/linker.c | 162 +++++++++++++++++++++++++++++++++++++++++------ 3 files changed, 150 insertions(+), 21 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index b2ce3a72b11d..d45807103565 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -1796,9 +1796,14 @@ struct bpf_linker_file_opts { struct bpf_linker; LIBBPF_API struct bpf_linker *bpf_linker__new(const char *filename, struct bpf_linker_opts *opts); +LIBBPF_API struct bpf_linker *bpf_linker__new_fd(int fd, struct bpf_linker_opts *opts); LIBBPF_API int bpf_linker__add_file(struct bpf_linker *linker, const char *filename, const struct bpf_linker_file_opts *opts); +LIBBPF_API int bpf_linker__add_fd(struct bpf_linker *linker, int fd, + const struct bpf_linker_file_opts *opts); +LIBBPF_API int bpf_linker__add_buf(struct bpf_linker *linker, void *buf, size_t buf_sz, + const struct bpf_linker_file_opts *opts); LIBBPF_API int bpf_linker__finalize(struct bpf_linker *linker); LIBBPF_API void bpf_linker__free(struct bpf_linker *linker); diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 54b6f312cfa8..a8b2936a1646 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -432,4 +432,8 @@ LIBBPF_1.5.0 { } LIBBPF_1.4.0; LIBBPF_1.6.0 { + global: + bpf_linker__add_buf; + bpf_linker__add_fd; + bpf_linker__new_fd; } LIBBPF_1.5.0; diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c index c49e94506d9c..b52f71c59616 100644 --- a/tools/lib/bpf/linker.c +++ b/tools/lib/bpf/linker.c @@ -4,6 +4,10 @@ * * Copyright (c) 2021 Facebook */ +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + #include #include #include @@ -16,6 +20,7 @@ #include #include #include +#include #include "libbpf.h" #include "btf.h" #include "libbpf_internal.h" @@ -152,6 +157,8 @@ struct bpf_linker { /* global (including extern) ELF symbols */ int glob_sym_cnt; struct glob_sym *glob_syms; + + bool fd_is_owned; }; #define pr_warn_elf(fmt, ...) \ @@ -159,6 +166,9 @@ struct bpf_linker { static int init_output_elf(struct bpf_linker *linker); +static int bpf_linker_add_file(struct bpf_linker *linker, int fd, + const char *filename); + static int linker_load_obj_file(struct bpf_linker *linker, struct src_obj *obj); static int linker_sanity_check_elf(struct src_obj *obj); @@ -190,7 +200,7 @@ void bpf_linker__free(struct bpf_linker *linker) if (linker->elf) elf_end(linker->elf); - if (linker->fd >= 0) + if (linker->fd >= 0 && linker->fd_is_owned) close(linker->fd); strset__free(linker->strtab_strs); @@ -244,6 +254,49 @@ struct bpf_linker *bpf_linker__new(const char *filename, struct bpf_linker_opts pr_warn("failed to create '%s': %d\n", filename, err); goto err_out; } + linker->fd_is_owned = true; + + err = init_output_elf(linker); + if (err) + goto err_out; + + return linker; + +err_out: + bpf_linker__free(linker); + return errno = -err, NULL; +} + +struct bpf_linker *bpf_linker__new_fd(int fd, struct bpf_linker_opts *opts) +{ + struct bpf_linker *linker; + char filename[32]; + int err; + + if (fd < 0) + return errno = EINVAL, NULL; + + if (!OPTS_VALID(opts, bpf_linker_opts)) + return errno = EINVAL, NULL; + + if (elf_version(EV_CURRENT) == EV_NONE) { + pr_warn_elf("libelf initialization failed"); + return errno = EINVAL, NULL; + } + + linker = calloc(1, sizeof(*linker)); + if (!linker) + return errno = ENOMEM, NULL; + + snprintf(filename, sizeof(filename), "fd:%d", fd); + linker->filename = strdup(filename); + if (!linker->filename) { + err = -ENOMEM; + goto err_out; + } + + linker->fd = fd; + linker->fd_is_owned = false; err = init_output_elf(linker); if (err) @@ -435,24 +488,11 @@ static int init_output_elf(struct bpf_linker *linker) return 0; } -int bpf_linker__add_file(struct bpf_linker *linker, const char *filename, - const struct bpf_linker_file_opts *opts) +static int bpf_linker_add_file(struct bpf_linker *linker, int fd, + const char *filename) { struct src_obj obj = {}; - int err = 0, fd; - - if (!OPTS_VALID(opts, bpf_linker_file_opts)) - return libbpf_err(-EINVAL); - - if (!linker->elf) - return libbpf_err(-EINVAL); - - fd = open(filename, O_RDONLY | O_CLOEXEC); - if (fd < 0) { - err = -errno; - pr_warn("failed to open file '%s': %s\n", filename, errstr(err)); - return libbpf_err(err); - } + int err = 0; obj.filename = filename; obj.fd = fd; @@ -472,12 +512,91 @@ int bpf_linker__add_file(struct bpf_linker *linker, const char *filename, free(obj.sym_map); if (obj.elf) elf_end(obj.elf); - if (obj.fd >= 0) - close(obj.fd); + return err; +} + +int bpf_linker__add_file(struct bpf_linker *linker, const char *filename, + const struct bpf_linker_file_opts *opts) +{ + int fd, err; + + if (!OPTS_VALID(opts, bpf_linker_file_opts)) + return libbpf_err(-EINVAL); + + if (!linker->elf) + return libbpf_err(-EINVAL); + + fd = open(filename, O_RDONLY | O_CLOEXEC); + if (fd < 0) { + err = -errno; + pr_warn("failed to open file '%s': %s\n", filename, errstr(err)); + return libbpf_err(err); + } + + err = bpf_linker_add_file(linker, fd, filename); + close(fd); return libbpf_err(err); } +int bpf_linker__add_fd(struct bpf_linker *linker, int fd, + const struct bpf_linker_file_opts *opts) +{ + char filename[32]; + int err; + + if (!OPTS_VALID(opts, bpf_linker_file_opts)) + return libbpf_err(-EINVAL); + + if (!linker->elf) + return libbpf_err(-EINVAL); + + if (fd < 0) + return libbpf_err(-EINVAL); + + snprintf(filename, sizeof(filename), "fd:%d", fd); + err = bpf_linker_add_file(linker, fd, filename); + return libbpf_err(err); +} + +int bpf_linker__add_buf(struct bpf_linker *linker, void *buf, size_t buf_sz, + const struct bpf_linker_file_opts *opts) +{ + char filename[32]; + int fd, written, ret; + + if (!OPTS_VALID(opts, bpf_linker_file_opts)) + return libbpf_err(-EINVAL); + + if (!linker->elf) + return libbpf_err(-EINVAL); + + snprintf(filename, sizeof(filename), "mem:%p+%zu", buf, buf_sz); + + fd = memfd_create(filename, 0); + if (fd < 0) { + ret = -errno; + pr_warn("failed to create memfd '%s': %s\n", filename, errstr(ret)); + return libbpf_err(ret); + } + + written = 0; + while (written < buf_sz) { + ret = write(fd, buf, buf_sz); + if (ret < 0) { + ret = -errno; + pr_warn("failed to write '%s': %s\n", filename, errstr(ret)); + goto err_out; + } + written += ret; + } + + ret = bpf_linker_add_file(linker, fd, filename); +err_out: + close(fd); + return libbpf_err(ret); +} + static bool is_dwarf_sec_name(const char *name) { /* approximation, but the actual list is too long */ @@ -2687,9 +2806,10 @@ int bpf_linker__finalize(struct bpf_linker *linker) } elf_end(linker->elf); - close(linker->fd); - linker->elf = NULL; + + if (linker->fd_is_owned) + close(linker->fd); linker->fd = -1; return 0; -- cgit v1.2.3 From 8da7bf2cee2735dbd2478cf07672ff0d243ce6ed Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 12 Dec 2024 16:16:57 -1000 Subject: tools/sched_ext: Receive updates from SCX repo Receive tools/sched_ext updates form https://github.com/sched-ext/scx to sync userspace bits: - scx_bpf_dump_header() added which can be used to print out basic scheduler info on dump. - BPF possible/online CPU iterators added. - CO-RE enums added. The enums are autogenerated from vmlinux.h. Include the generated artifacts in tools/sched_ext to keep the Makefile simpler. - Other misc changes. Signed-off-by: Tejun Heo --- tools/sched_ext/include/scx/common.bpf.h | 83 ++++++++++++++++++- tools/sched_ext/include/scx/common.h | 6 ++ tools/sched_ext/include/scx/compat.h | 1 + tools/sched_ext/include/scx/enums.autogen.bpf.h | 105 ++++++++++++++++++++++++ tools/sched_ext/include/scx/enums.autogen.h | 41 +++++++++ tools/sched_ext/include/scx/enums.bpf.h | 12 +++ tools/sched_ext/include/scx/enums.h | 27 ++++++ tools/sched_ext/include/scx/user_exit_info.h | 9 +- tools/sched_ext/scx_central.bpf.c | 2 +- tools/sched_ext/scx_central.c | 1 + tools/sched_ext/scx_flatcg.bpf.c | 2 +- tools/sched_ext/scx_flatcg.c | 1 + tools/sched_ext/scx_qmap.bpf.c | 2 +- tools/sched_ext/scx_qmap.c | 2 + 14 files changed, 286 insertions(+), 8 deletions(-) create mode 100644 tools/sched_ext/include/scx/enums.autogen.bpf.h create mode 100644 tools/sched_ext/include/scx/enums.autogen.h create mode 100644 tools/sched_ext/include/scx/enums.bpf.h create mode 100644 tools/sched_ext/include/scx/enums.h (limited to 'tools') diff --git a/tools/sched_ext/include/scx/common.bpf.h b/tools/sched_ext/include/scx/common.bpf.h index 625f5b046776..858ba1f438f6 100644 --- a/tools/sched_ext/include/scx/common.bpf.h +++ b/tools/sched_ext/include/scx/common.bpf.h @@ -9,7 +9,7 @@ #ifdef LSP #define __bpf__ -#include "../vmlinux/vmlinux.h" +#include "../vmlinux.h" #else #include "vmlinux.h" #endif @@ -24,6 +24,10 @@ #define PF_EXITING 0x00000004 #define CLOCK_MONOTONIC 1 +extern int LINUX_KERNEL_VERSION __kconfig; +extern const char CONFIG_CC_VERSION_TEXT[64] __kconfig __weak; +extern const char CONFIG_LOCALVERSION[64] __kconfig __weak; + /* * Earlier versions of clang/pahole lost upper 32bits in 64bit enums which can * lead to really confusing misbehaviors. Let's trigger a build failure. @@ -98,7 +102,7 @@ void ___scx_bpf_bstr_format_checker(const char *fmt, ...) {} _Pragma("GCC diagnostic push") \ _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ ___bpf_fill(___param, args); \ - _Pragma("GCC diagnostic pop") \ + _Pragma("GCC diagnostic pop") /* * scx_bpf_exit() wraps the scx_bpf_exit_bstr() kfunc with variadic arguments @@ -136,6 +140,20 @@ void ___scx_bpf_bstr_format_checker(const char *fmt, ...) {} ___scx_bpf_bstr_format_checker(fmt, ##args); \ }) +/* + * scx_bpf_dump_header() is a wrapper around scx_bpf_dump that adds a header + * of system information for debugging. + */ +#define scx_bpf_dump_header() \ +({ \ + scx_bpf_dump("kernel: %d.%d.%d %s\ncc: %s\n", \ + LINUX_KERNEL_VERSION >> 16, \ + LINUX_KERNEL_VERSION >> 8 & 0xFF, \ + LINUX_KERNEL_VERSION & 0xFF, \ + CONFIG_LOCALVERSION, \ + CONFIG_CC_VERSION_TEXT); \ +}) + #define BPF_STRUCT_OPS(name, args...) \ SEC("struct_ops/"#name) \ BPF_PROG(name, ##args) @@ -317,6 +335,66 @@ u32 bpf_cpumask_any_and_distribute(const struct cpumask *src1, const struct cpumask *src2) __ksym; u32 bpf_cpumask_weight(const struct cpumask *cpumask) __ksym; +int bpf_iter_bits_new(struct bpf_iter_bits *it, const u64 *unsafe_ptr__ign, u32 nr_words) __ksym; +int *bpf_iter_bits_next(struct bpf_iter_bits *it) __ksym; +void bpf_iter_bits_destroy(struct bpf_iter_bits *it) __ksym; + +#define def_iter_struct(name) \ +struct bpf_iter_##name { \ + struct bpf_iter_bits it; \ + const struct cpumask *bitmap; \ +}; + +#define def_iter_new(name) \ +static inline int bpf_iter_##name##_new( \ + struct bpf_iter_##name *it, const u64 *unsafe_ptr__ign, u32 nr_words) \ +{ \ + it->bitmap = scx_bpf_get_##name##_cpumask(); \ + return bpf_iter_bits_new(&it->it, (const u64 *)it->bitmap, \ + sizeof(struct cpumask) / 8); \ +} + +#define def_iter_next(name) \ +static inline int *bpf_iter_##name##_next(struct bpf_iter_##name *it) { \ + return bpf_iter_bits_next(&it->it); \ +} + +#define def_iter_destroy(name) \ +static inline void bpf_iter_##name##_destroy(struct bpf_iter_##name *it) { \ + scx_bpf_put_cpumask(it->bitmap); \ + bpf_iter_bits_destroy(&it->it); \ +} +#define def_for_each_cpu(cpu, name) for_each_##name##_cpu(cpu) + +/// Provides iterator for possible and online cpus. +/// +/// # Example +/// +/// ``` +/// static inline void example_use() { +/// int *cpu; +/// +/// for_each_possible_cpu(cpu){ +/// bpf_printk("CPU %d is possible", *cpu); +/// } +/// +/// for_each_online_cpu(cpu){ +/// bpf_printk("CPU %d is online", *cpu); +/// } +/// } +/// ``` +def_iter_struct(possible); +def_iter_new(possible); +def_iter_next(possible); +def_iter_destroy(possible); +#define for_each_possible_cpu(cpu) bpf_for_each(possible, cpu, NULL, 0) + +def_iter_struct(online); +def_iter_new(online); +def_iter_next(online); +def_iter_destroy(online); +#define for_each_online_cpu(cpu) bpf_for_each(online, cpu, NULL, 0) + /* * Access a cpumask in read-only mode (typically to check bits). */ @@ -423,5 +501,6 @@ static inline u32 log2_u64(u64 v) } #include "compat.bpf.h" +#include "enums.bpf.h" #endif /* __SCX_COMMON_BPF_H */ diff --git a/tools/sched_ext/include/scx/common.h b/tools/sched_ext/include/scx/common.h index 5b0f90152152..dc18b99e55cd 100644 --- a/tools/sched_ext/include/scx/common.h +++ b/tools/sched_ext/include/scx/common.h @@ -71,5 +71,11 @@ typedef int64_t s64; #include "user_exit_info.h" #include "compat.h" +#include "enums.h" + +/* not available when building kernel tools/sched_ext */ +#if __has_include() +#include +#endif #endif /* __SCHED_EXT_COMMON_H */ diff --git a/tools/sched_ext/include/scx/compat.h b/tools/sched_ext/include/scx/compat.h index cc56ff9aa252..b50280e2ba2b 100644 --- a/tools/sched_ext/include/scx/compat.h +++ b/tools/sched_ext/include/scx/compat.h @@ -149,6 +149,7 @@ static inline long scx_hotplug_seq(void) __skel = __scx_name##__open(); \ SCX_BUG_ON(!__skel, "Could not open " #__scx_name); \ __skel->struct_ops.__ops_name->hotplug_seq = scx_hotplug_seq(); \ + SCX_ENUM_INIT(__skel); \ __skel; \ }) diff --git a/tools/sched_ext/include/scx/enums.autogen.bpf.h b/tools/sched_ext/include/scx/enums.autogen.bpf.h new file mode 100644 index 000000000000..0e941a0d6f88 --- /dev/null +++ b/tools/sched_ext/include/scx/enums.autogen.bpf.h @@ -0,0 +1,105 @@ +/* + * WARNING: This file is autogenerated from scripts/gen_enums.py. If you would + * like to access an enum that is currently missing, add it to the script + * and run it from the root directory to update this file. + */ + +const volatile u64 __SCX_OPS_NAME_LEN __weak; +#define SCX_OPS_NAME_LEN __SCX_OPS_NAME_LEN + +const volatile u64 __SCX_SLICE_DFL __weak; +#define SCX_SLICE_DFL __SCX_SLICE_DFL + +const volatile u64 __SCX_SLICE_INF __weak; +#define SCX_SLICE_INF __SCX_SLICE_INF + +const volatile u64 __SCX_DSQ_FLAG_BUILTIN __weak; +#define SCX_DSQ_FLAG_BUILTIN __SCX_DSQ_FLAG_BUILTIN + +const volatile u64 __SCX_DSQ_FLAG_LOCAL_ON __weak; +#define SCX_DSQ_FLAG_LOCAL_ON __SCX_DSQ_FLAG_LOCAL_ON + +const volatile u64 __SCX_DSQ_INVALID __weak; +#define SCX_DSQ_INVALID __SCX_DSQ_INVALID + +const volatile u64 __SCX_DSQ_GLOBAL __weak; +#define SCX_DSQ_GLOBAL __SCX_DSQ_GLOBAL + +const volatile u64 __SCX_DSQ_LOCAL __weak; +#define SCX_DSQ_LOCAL __SCX_DSQ_LOCAL + +const volatile u64 __SCX_DSQ_LOCAL_ON __weak; +#define SCX_DSQ_LOCAL_ON __SCX_DSQ_LOCAL_ON + +const volatile u64 __SCX_DSQ_LOCAL_CPU_MASK __weak; +#define SCX_DSQ_LOCAL_CPU_MASK __SCX_DSQ_LOCAL_CPU_MASK + +const volatile u64 __SCX_TASK_QUEUED __weak; +#define SCX_TASK_QUEUED __SCX_TASK_QUEUED + +const volatile u64 __SCX_TASK_RESET_RUNNABLE_AT __weak; +#define SCX_TASK_RESET_RUNNABLE_AT __SCX_TASK_RESET_RUNNABLE_AT + +const volatile u64 __SCX_TASK_DEQD_FOR_SLEEP __weak; +#define SCX_TASK_DEQD_FOR_SLEEP __SCX_TASK_DEQD_FOR_SLEEP + +const volatile u64 __SCX_TASK_STATE_SHIFT __weak; +#define SCX_TASK_STATE_SHIFT __SCX_TASK_STATE_SHIFT + +const volatile u64 __SCX_TASK_STATE_BITS __weak; +#define SCX_TASK_STATE_BITS __SCX_TASK_STATE_BITS + +const volatile u64 __SCX_TASK_STATE_MASK __weak; +#define SCX_TASK_STATE_MASK __SCX_TASK_STATE_MASK + +const volatile u64 __SCX_TASK_CURSOR __weak; +#define SCX_TASK_CURSOR __SCX_TASK_CURSOR + +const volatile u64 __SCX_TASK_NONE __weak; +#define SCX_TASK_NONE __SCX_TASK_NONE + +const volatile u64 __SCX_TASK_INIT __weak; +#define SCX_TASK_INIT __SCX_TASK_INIT + +const volatile u64 __SCX_TASK_READY __weak; +#define SCX_TASK_READY __SCX_TASK_READY + +const volatile u64 __SCX_TASK_ENABLED __weak; +#define SCX_TASK_ENABLED __SCX_TASK_ENABLED + +const volatile u64 __SCX_TASK_NR_STATES __weak; +#define SCX_TASK_NR_STATES __SCX_TASK_NR_STATES + +const volatile u64 __SCX_TASK_DSQ_ON_PRIQ __weak; +#define SCX_TASK_DSQ_ON_PRIQ __SCX_TASK_DSQ_ON_PRIQ + +const volatile u64 __SCX_KICK_IDLE __weak; +#define SCX_KICK_IDLE __SCX_KICK_IDLE + +const volatile u64 __SCX_KICK_PREEMPT __weak; +#define SCX_KICK_PREEMPT __SCX_KICK_PREEMPT + +const volatile u64 __SCX_KICK_WAIT __weak; +#define SCX_KICK_WAIT __SCX_KICK_WAIT + +const volatile u64 __SCX_ENQ_WAKEUP __weak; +#define SCX_ENQ_WAKEUP __SCX_ENQ_WAKEUP + +const volatile u64 __SCX_ENQ_HEAD __weak; +#define SCX_ENQ_HEAD __SCX_ENQ_HEAD + +const volatile u64 __SCX_ENQ_PREEMPT __weak; +#define SCX_ENQ_PREEMPT __SCX_ENQ_PREEMPT + +const volatile u64 __SCX_ENQ_REENQ __weak; +#define SCX_ENQ_REENQ __SCX_ENQ_REENQ + +const volatile u64 __SCX_ENQ_LAST __weak; +#define SCX_ENQ_LAST __SCX_ENQ_LAST + +const volatile u64 __SCX_ENQ_CLEAR_OPSS __weak; +#define SCX_ENQ_CLEAR_OPSS __SCX_ENQ_CLEAR_OPSS + +const volatile u64 __SCX_ENQ_DSQ_PRIQ __weak; +#define SCX_ENQ_DSQ_PRIQ __SCX_ENQ_DSQ_PRIQ + diff --git a/tools/sched_ext/include/scx/enums.autogen.h b/tools/sched_ext/include/scx/enums.autogen.h new file mode 100644 index 000000000000..88137a140e72 --- /dev/null +++ b/tools/sched_ext/include/scx/enums.autogen.h @@ -0,0 +1,41 @@ +/* + * WARNING: This file is autogenerated from scripts/gen_enums.py. If you would + * like to access an enum that is currently missing, add it to the script + * and run it from the root directory to update this file. + */ + +#define SCX_ENUM_INIT(skel) do { \ + SCX_ENUM_SET(skel, scx_public_consts, SCX_OPS_NAME_LEN); \ + SCX_ENUM_SET(skel, scx_public_consts, SCX_SLICE_DFL); \ + SCX_ENUM_SET(skel, scx_public_consts, SCX_SLICE_INF); \ + SCX_ENUM_SET(skel, scx_dsq_id_flags, SCX_DSQ_FLAG_BUILTIN); \ + SCX_ENUM_SET(skel, scx_dsq_id_flags, SCX_DSQ_FLAG_LOCAL_ON); \ + SCX_ENUM_SET(skel, scx_dsq_id_flags, SCX_DSQ_INVALID); \ + SCX_ENUM_SET(skel, scx_dsq_id_flags, SCX_DSQ_GLOBAL); \ + SCX_ENUM_SET(skel, scx_dsq_id_flags, SCX_DSQ_LOCAL); \ + SCX_ENUM_SET(skel, scx_dsq_id_flags, SCX_DSQ_LOCAL_ON); \ + SCX_ENUM_SET(skel, scx_dsq_id_flags, SCX_DSQ_LOCAL_CPU_MASK); \ + SCX_ENUM_SET(skel, scx_ent_flags, SCX_TASK_QUEUED); \ + SCX_ENUM_SET(skel, scx_ent_flags, SCX_TASK_RESET_RUNNABLE_AT); \ + SCX_ENUM_SET(skel, scx_ent_flags, SCX_TASK_DEQD_FOR_SLEEP); \ + SCX_ENUM_SET(skel, scx_ent_flags, SCX_TASK_STATE_SHIFT); \ + SCX_ENUM_SET(skel, scx_ent_flags, SCX_TASK_STATE_BITS); \ + SCX_ENUM_SET(skel, scx_ent_flags, SCX_TASK_STATE_MASK); \ + SCX_ENUM_SET(skel, scx_ent_flags, SCX_TASK_CURSOR); \ + SCX_ENUM_SET(skel, scx_task_state, SCX_TASK_NONE); \ + SCX_ENUM_SET(skel, scx_task_state, SCX_TASK_INIT); \ + SCX_ENUM_SET(skel, scx_task_state, SCX_TASK_READY); \ + SCX_ENUM_SET(skel, scx_task_state, SCX_TASK_ENABLED); \ + SCX_ENUM_SET(skel, scx_task_state, SCX_TASK_NR_STATES); \ + SCX_ENUM_SET(skel, scx_ent_dsq_flags, SCX_TASK_DSQ_ON_PRIQ); \ + SCX_ENUM_SET(skel, scx_kick_flags, SCX_KICK_IDLE); \ + SCX_ENUM_SET(skel, scx_kick_flags, SCX_KICK_PREEMPT); \ + SCX_ENUM_SET(skel, scx_kick_flags, SCX_KICK_WAIT); \ + SCX_ENUM_SET(skel, scx_enq_flags, SCX_ENQ_WAKEUP); \ + SCX_ENUM_SET(skel, scx_enq_flags, SCX_ENQ_HEAD); \ + SCX_ENUM_SET(skel, scx_enq_flags, SCX_ENQ_PREEMPT); \ + SCX_ENUM_SET(skel, scx_enq_flags, SCX_ENQ_REENQ); \ + SCX_ENUM_SET(skel, scx_enq_flags, SCX_ENQ_LAST); \ + SCX_ENUM_SET(skel, scx_enq_flags, SCX_ENQ_CLEAR_OPSS); \ + SCX_ENUM_SET(skel, scx_enq_flags, SCX_ENQ_DSQ_PRIQ); \ +} while (0) diff --git a/tools/sched_ext/include/scx/enums.bpf.h b/tools/sched_ext/include/scx/enums.bpf.h new file mode 100644 index 000000000000..af704c5d6334 --- /dev/null +++ b/tools/sched_ext/include/scx/enums.bpf.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Convenience macros for getting/setting struct scx_enums instances. + * + * Copyright (c) 2024 Meta Platforms, Inc. and affiliates. + */ +#ifndef __SCX_ENUMS_BPF_H +#define __SCX_ENUMS_BPF_H + +#include "enums.autogen.bpf.h" + +#endif /* __SCX_ENUMS_BPF_H */ diff --git a/tools/sched_ext/include/scx/enums.h b/tools/sched_ext/include/scx/enums.h new file mode 100644 index 000000000000..34cbebe974b7 --- /dev/null +++ b/tools/sched_ext/include/scx/enums.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Define struct scx_enums that stores the load-time values of enums + * used by the BPF program. + * + * Copyright (c) 2024 Meta Platforms, Inc. and affiliates. + */ + +#ifndef __SCX_ENUMS_H +#define __SCX_ENUMS_H + +static inline void __ENUM_set(u64 *val, char *type, char *name) +{ + bool res; + + res = __COMPAT_read_enum(type, name, val); + SCX_BUG_ON(!res, "enum not found(%s)", name); +} + +#define SCX_ENUM_SET(skel, type, name) do { \ + __ENUM_set(&skel->rodata->__##name, #type, #name); \ + } while (0) + + +#include "enums.autogen.h" + +#endif /* __SCX_ENUMS_H */ diff --git a/tools/sched_ext/include/scx/user_exit_info.h b/tools/sched_ext/include/scx/user_exit_info.h index 8ce2734402e1..66f856640ee7 100644 --- a/tools/sched_ext/include/scx/user_exit_info.h +++ b/tools/sched_ext/include/scx/user_exit_info.h @@ -10,6 +10,11 @@ #ifndef __USER_EXIT_INFO_H #define __USER_EXIT_INFO_H +#ifdef LSP +#define __bpf__ +#include "../vmlinux.h" +#endif + enum uei_sizes { UEI_REASON_LEN = 128, UEI_MSG_LEN = 1024, @@ -25,9 +30,7 @@ struct user_exit_info { #ifdef __bpf__ -#ifdef LSP -#include "../vmlinux/vmlinux.h" -#else +#ifndef LSP #include "vmlinux.h" #endif #include diff --git a/tools/sched_ext/scx_central.bpf.c b/tools/sched_ext/scx_central.bpf.c index e6fad6211f6c..2907df78241e 100644 --- a/tools/sched_ext/scx_central.bpf.c +++ b/tools/sched_ext/scx_central.bpf.c @@ -57,7 +57,7 @@ enum { const volatile s32 central_cpu; const volatile u32 nr_cpu_ids = 1; /* !0 for veristat, set during init */ -const volatile u64 slice_ns = SCX_SLICE_DFL; +const volatile u64 slice_ns; bool timer_pinned = true; u64 nr_total, nr_locals, nr_queued, nr_lost_pids; diff --git a/tools/sched_ext/scx_central.c b/tools/sched_ext/scx_central.c index e938156ed0a0..1e9f74525d8f 100644 --- a/tools/sched_ext/scx_central.c +++ b/tools/sched_ext/scx_central.c @@ -58,6 +58,7 @@ restart: skel->rodata->central_cpu = 0; skel->rodata->nr_cpu_ids = libbpf_num_possible_cpus(); + skel->rodata->slice_ns = __COMPAT_ENUM_OR_ZERO("scx_public_consts", "SCX_SLICE_DFL"); while ((opt = getopt(argc, argv, "s:c:pvh")) != -1) { switch (opt) { diff --git a/tools/sched_ext/scx_flatcg.bpf.c b/tools/sched_ext/scx_flatcg.bpf.c index 4e3afcd260bf..3dbfa82883be 100644 --- a/tools/sched_ext/scx_flatcg.bpf.c +++ b/tools/sched_ext/scx_flatcg.bpf.c @@ -57,7 +57,7 @@ enum { char _license[] SEC("license") = "GPL"; const volatile u32 nr_cpus = 32; /* !0 for veristat, set during init */ -const volatile u64 cgrp_slice_ns = SCX_SLICE_DFL; +const volatile u64 cgrp_slice_ns; const volatile bool fifo_sched; u64 cvtime_now; diff --git a/tools/sched_ext/scx_flatcg.c b/tools/sched_ext/scx_flatcg.c index 5d24ca9c29d9..6dd423eeb4ff 100644 --- a/tools/sched_ext/scx_flatcg.c +++ b/tools/sched_ext/scx_flatcg.c @@ -137,6 +137,7 @@ restart: skel = SCX_OPS_OPEN(flatcg_ops, scx_flatcg); skel->rodata->nr_cpus = libbpf_num_possible_cpus(); + skel->rodata->cgrp_slice_ns = __COMPAT_ENUM_OR_ZERO("scx_public_consts", "SCX_SLICE_DFL"); while ((opt = getopt(argc, argv, "s:i:dfvh")) != -1) { double v; diff --git a/tools/sched_ext/scx_qmap.bpf.c b/tools/sched_ext/scx_qmap.bpf.c index ee264947e0c3..3a20bb0c014a 100644 --- a/tools/sched_ext/scx_qmap.bpf.c +++ b/tools/sched_ext/scx_qmap.bpf.c @@ -33,7 +33,7 @@ enum consts { char _license[] SEC("license") = "GPL"; -const volatile u64 slice_ns = SCX_SLICE_DFL; +const volatile u64 slice_ns; const volatile u32 stall_user_nth; const volatile u32 stall_kernel_nth; const volatile u32 dsp_inf_loop_after; diff --git a/tools/sched_ext/scx_qmap.c b/tools/sched_ext/scx_qmap.c index ac45a02b4055..c4912ab2e76f 100644 --- a/tools/sched_ext/scx_qmap.c +++ b/tools/sched_ext/scx_qmap.c @@ -64,6 +64,8 @@ int main(int argc, char **argv) skel = SCX_OPS_OPEN(qmap_ops, scx_qmap); + skel->rodata->slice_ns = __COMPAT_ENUM_OR_ZERO("scx_public_consts", "SCX_SLICE_DFL"); + while ((opt = getopt(argc, argv, "s:e:t:T:l:b:PHd:D:Spvh")) != -1) { switch (opt) { case 's': -- cgit v1.2.3 From 5506b7d7bbdb7622959d80a4a2fc18985a01d512 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Thu, 12 Dec 2024 16:32:24 -0800 Subject: selftests/bpf: make BPF_TARGET_ENDIAN non-recursive to speed up *.bpf.o build BPF_TARGET_ENDIAN is used in CLANG_BPF_BUILD_RULE and co macros. It is defined as a recursively expanded variable, meaning that it is recomputed each time the value is needed. Thus, it is recomputed for each *.bpf.o file compilation. The variable is computed by running a C compiler in a shell. This significantly hinders parallel build performance for *.bpf.o files. This commit changes BPF_TARGET_ENDIAN to be a simply expanded variable. # Build performance stats before this commit $ git clean -xfd; time make -j12 real 1m0.000s ... # Build performance stats after this commit $ git clean -xfd; time make -j12 real 0m43.605s ... Signed-off-by: Eduard Zingerman Acked-by: Yonghong Song Link: https://lore.kernel.org/r/20241213003224.837030-1-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index bb8cf8f5bf11..9e870e519c30 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -461,10 +461,10 @@ $(shell $(1) $(2) -dM -E - Date: Fri, 29 Nov 2024 15:47:49 +0100 Subject: objtool/x86: allow syscall instruction The syscall instruction is used in Xen PV mode for doing hypercalls. Allow syscall to be used in the kernel in case it is tagged with an unwind hint for objtool. This is part of XSA-466 / CVE-2024-53241. Reported-by: Andrew Cooper Signed-off-by: Juergen Gross Co-developed-by: Peter Zijlstra --- tools/objtool/check.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 4ce176ad411f..76060da755b5 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -3820,9 +3820,12 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, break; case INSN_CONTEXT_SWITCH: - if (func && (!next_insn || !next_insn->hint)) { - WARN_INSN(insn, "unsupported instruction in callable function"); - return 1; + if (func) { + if (!next_insn || !next_insn->hint) { + WARN_INSN(insn, "unsupported instruction in callable function"); + return 1; + } + break; } return 0; -- cgit v1.2.3 From ce03573a1917532da06057da9f8e74a2ee9e2ac9 Mon Sep 17 00:00:00 2001 From: Weizhao Ouyang Date: Wed, 11 Dec 2024 19:16:39 +0800 Subject: kselftest/arm64: abi: fix SVCR detection When using svcr_in to check ZA and Streaming Mode, we should make sure that the value in x2 is correct, otherwise it may trigger an Illegal instruction if FEAT_SVE and !FEAT_SME. Fixes: 43e3f85523e4 ("kselftest/arm64: Add SME support to syscall ABI test") Signed-off-by: Weizhao Ouyang Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20241211111639.12344-1-o451686892@gmail.com Signed-off-by: Catalin Marinas --- .../testing/selftests/arm64/abi/syscall-abi-asm.S | 32 ++++++++++------------ 1 file changed, 15 insertions(+), 17 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/arm64/abi/syscall-abi-asm.S b/tools/testing/selftests/arm64/abi/syscall-abi-asm.S index df3230fdac39..66ab2e0bae5f 100644 --- a/tools/testing/selftests/arm64/abi/syscall-abi-asm.S +++ b/tools/testing/selftests/arm64/abi/syscall-abi-asm.S @@ -81,32 +81,31 @@ do_syscall: stp x27, x28, [sp, #96] // Set SVCR if we're doing SME - cbz x1, 1f + cbz x1, load_gpr adrp x2, svcr_in ldr x2, [x2, :lo12:svcr_in] msr S3_3_C4_C2_2, x2 -1: // Load ZA and ZT0 if enabled - uses x12 as scratch due to SME LDR - tbz x2, #SVCR_ZA_SHIFT, 1f + tbz x2, #SVCR_ZA_SHIFT, load_gpr mov w12, #0 ldr x2, =za_in -2: _ldr_za 12, 2 +1: _ldr_za 12, 2 add x2, x2, x1 add x12, x12, #1 cmp x1, x12 - bne 2b + bne 1b // ZT0 mrs x2, S3_0_C0_C4_5 // ID_AA64SMFR0_EL1 ubfx x2, x2, #ID_AA64SMFR0_EL1_SMEver_SHIFT, \ #ID_AA64SMFR0_EL1_SMEver_WIDTH - cbz x2, 1f + cbz x2, load_gpr adrp x2, zt_in add x2, x2, :lo12:zt_in _ldr_zt 2 -1: +load_gpr: // Load GPRs x8-x28, and save our SP/FP for later comparison ldr x2, =gpr_in add x2, x2, #64 @@ -125,9 +124,9 @@ do_syscall: str x30, [x2], #8 // LR // Load FPRs if we're not doing neither SVE nor streaming SVE - cbnz x0, 1f + cbnz x0, check_sve_in ldr x2, =svcr_in - tbnz x2, #SVCR_SM_SHIFT, 1f + tbnz x2, #SVCR_SM_SHIFT, check_sve_in ldr x2, =fpr_in ldp q0, q1, [x2] @@ -148,8 +147,8 @@ do_syscall: ldp q30, q31, [x2, #16 * 30] b 2f -1: +check_sve_in: // Load the SVE registers if we're doing SVE/SME ldr x2, =z_in @@ -256,32 +255,31 @@ do_syscall: stp q30, q31, [x2, #16 * 30] // Save SVCR if we're doing SME - cbz x1, 1f + cbz x1, check_sve_out mrs x2, S3_3_C4_C2_2 adrp x3, svcr_out str x2, [x3, :lo12:svcr_out] -1: // Save ZA if it's enabled - uses x12 as scratch due to SME STR - tbz x2, #SVCR_ZA_SHIFT, 1f + tbz x2, #SVCR_ZA_SHIFT, check_sve_out mov w12, #0 ldr x2, =za_out -2: _str_za 12, 2 +1: _str_za 12, 2 add x2, x2, x1 add x12, x12, #1 cmp x1, x12 - bne 2b + bne 1b // ZT0 mrs x2, S3_0_C0_C4_5 // ID_AA64SMFR0_EL1 ubfx x2, x2, #ID_AA64SMFR0_EL1_SMEver_SHIFT, \ #ID_AA64SMFR0_EL1_SMEver_WIDTH - cbz x2, 1f + cbz x2, check_sve_out adrp x2, zt_out add x2, x2, :lo12:zt_out _str_zt 2 -1: +check_sve_out: // Save the SVE state if we have some cbz x0, 1f -- cgit v1.2.3 From 5e3ad22d82238e8bcb4c7ec26a20533217ddfb18 Mon Sep 17 00:00:00 2001 From: Daniel Xu Date: Fri, 13 Dec 2024 12:44:09 -0700 Subject: bpftool: man: Add missing format argument to command description The command description was missing the optional argument. Add it there for consistency with the rest of the commands. Signed-off-by: Daniel Xu Signed-off-by: Andrii Nakryiko Reviewed-by: Quentin Monnet Link: https://lore.kernel.org/bpf/140402f22fc377fba4c34376b7e1d2eba2c276b1.1734119028.git.dxu@dxuuu.xyz --- tools/bpf/bpftool/Documentation/bpftool-btf.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/Documentation/bpftool-btf.rst b/tools/bpf/bpftool/Documentation/bpftool-btf.rst index 3f6bca03ad2e..245569f43035 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-btf.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-btf.rst @@ -43,7 +43,7 @@ bpftool btf { show | list } [id *BTF_ID*] that hold open file descriptors (FDs) against BTF objects. On such kernels bpftool will automatically emit this information as well. -bpftool btf dump *BTF_SRC* +bpftool btf dump *BTF_SRC* [format *FORMAT*] Dump BTF entries from a given *BTF_SRC*. When **id** is specified, BTF object with that ID will be loaded and all -- cgit v1.2.3 From 7f5819e1ace85632cf58c43ab6c38d2d4b0aa161 Mon Sep 17 00:00:00 2001 From: Daniel Xu Date: Fri, 13 Dec 2024 12:44:10 -0700 Subject: bpftool: btf: Validate root_type_ids early Handle invalid root_type_ids early, as an invalid ID will cause dumpers to half-emit valid boilerplate and then bail with an unclean exit. This is ugly and possibly confusing for users, so preemptively handle the common error case before any dumping begins. Signed-off-by: Daniel Xu Signed-off-by: Andrii Nakryiko Reviewed-by: Quentin Monnet Link: https://lore.kernel.org/bpf/33e09a08a6072f8381cb976218a009709309b7e1.1734119028.git.dxu@dxuuu.xyz --- tools/bpf/bpftool/btf.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'tools') diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c index d005e4fd6128..3e995faf9efa 100644 --- a/tools/bpf/bpftool/btf.c +++ b/tools/bpf/bpftool/btf.c @@ -886,6 +886,7 @@ static int do_dump(int argc, char **argv) const char *src; int fd = -1; int err = 0; + int i; if (!REQ_ARGS(2)) { usage(); @@ -1017,6 +1018,17 @@ static int do_dump(int argc, char **argv) } } + /* Invalid root IDs causes half emitted boilerplate and then unclean + * exit. It's an ugly user experience, so handle common error here. + */ + for (i = 0; i < root_type_cnt; i++) { + if (root_type_ids[i] >= btf__type_cnt(btf)) { + err = -EINVAL; + p_err("invalid root ID: %u", root_type_ids[i]); + goto done; + } + } + if (dump_c) { if (json_output) { p_err("JSON output for C-syntax dump is not supported"); -- cgit v1.2.3 From a812d92ed2aee2d57dccb12b289377265f4ce5e7 Mon Sep 17 00:00:00 2001 From: Daniel Xu Date: Fri, 13 Dec 2024 12:44:11 -0700 Subject: bpftool: btf: Support dumping a specific types from file Some projects, for example xdp-tools [0], prefer to check in a minimized vmlinux.h rather than the complete file which can get rather large. However, when you try to add a minimized version of a complex struct (eg struct xfrm_state), things can get quite complex if you're trying to manually untangle and deduplicate the dependencies. This commit teaches bpftool to do a minimized dump of a specific types by providing a optional root_id argument(s). Example usage: $ ./bpftool btf dump file ~/dev/linux/vmlinux | rg "STRUCT 'xfrm_state'" [12643] STRUCT 'xfrm_state' size=912 vlen=58 $ ./bpftool btf dump file ~/dev/linux/vmlinux root_id 12643 format c #ifndef __VMLINUX_H__ #define __VMLINUX_H__ [..] struct xfrm_type_offload; struct xfrm_sec_ctx; struct xfrm_state { possible_net_t xs_net; union { struct hlist_node gclist; struct hlist_node bydst; }; union { struct hlist_node dev_gclist; struct hlist_node bysrc; }; struct hlist_node byspi; [..] [0]: https://github.com/xdp-project/xdp-tools/blob/master/headers/bpf/vmlinux.h Signed-off-by: Daniel Xu Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/04feb860c0a56a7da66f923551484e1483a72074.1734119028.git.dxu@dxuuu.xyz --- tools/bpf/bpftool/Documentation/bpftool-btf.rst | 9 ++++-- tools/bpf/bpftool/btf.c | 39 +++++++++++++++++++++++-- 2 files changed, 44 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/Documentation/bpftool-btf.rst b/tools/bpf/bpftool/Documentation/bpftool-btf.rst index 245569f43035..d47dddc2b4ee 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-btf.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-btf.rst @@ -24,7 +24,7 @@ BTF COMMANDS ============= | **bpftool** **btf** { **show** | **list** } [**id** *BTF_ID*] -| **bpftool** **btf dump** *BTF_SRC* [**format** *FORMAT*] +| **bpftool** **btf dump** *BTF_SRC* [**format** *FORMAT*] [**root_id** *ROOT_ID*] | **bpftool** **btf help** | | *BTF_SRC* := { **id** *BTF_ID* | **prog** *PROG* | **map** *MAP* [{**key** | **value** | **kv** | **all**}] | **file** *FILE* } @@ -43,7 +43,7 @@ bpftool btf { show | list } [id *BTF_ID*] that hold open file descriptors (FDs) against BTF objects. On such kernels bpftool will automatically emit this information as well. -bpftool btf dump *BTF_SRC* [format *FORMAT*] +bpftool btf dump *BTF_SRC* [format *FORMAT*] [root_id *ROOT_ID*] Dump BTF entries from a given *BTF_SRC*. When **id** is specified, BTF object with that ID will be loaded and all @@ -67,6 +67,11 @@ bpftool btf dump *BTF_SRC* [format *FORMAT*] formatting, the output is sorted by default. Use the **unsorted** option to avoid sorting the output. + **root_id** option can be used to filter a dump to a single type and all + its dependent types. It cannot be used with any other types of filtering + (such as the "key", "value", or "kv" arguments when dumping BTF for a map). + It can be passed multiple times to dump multiple types. + bpftool btf help Print short help message. diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c index 3e995faf9efa..2636655ac180 100644 --- a/tools/bpf/bpftool/btf.c +++ b/tools/bpf/bpftool/btf.c @@ -27,6 +27,8 @@ #define KFUNC_DECL_TAG "bpf_kfunc" #define FASTCALL_DECL_TAG "bpf_fastcall" +#define MAX_ROOT_IDS 16 + static const char * const btf_kind_str[NR_BTF_KINDS] = { [BTF_KIND_UNKN] = "UNKNOWN", [BTF_KIND_INT] = "INT", @@ -880,7 +882,8 @@ static int do_dump(int argc, char **argv) { bool dump_c = false, sort_dump_c = true; struct btf *btf = NULL, *base = NULL; - __u32 root_type_ids[2]; + __u32 root_type_ids[MAX_ROOT_IDS]; + bool have_id_filtering; int root_type_cnt = 0; __u32 btf_id = -1; const char *src; @@ -974,6 +977,8 @@ static int do_dump(int argc, char **argv) goto done; } + have_id_filtering = !!root_type_cnt; + while (argc) { if (is_prefix(*argv, "format")) { NEXT_ARG(); @@ -993,6 +998,36 @@ static int do_dump(int argc, char **argv) goto done; } NEXT_ARG(); + } else if (is_prefix(*argv, "root_id")) { + __u32 root_id; + char *end; + + if (have_id_filtering) { + p_err("cannot use root_id with other type filtering"); + err = -EINVAL; + goto done; + } else if (root_type_cnt == MAX_ROOT_IDS) { + p_err("only %d root_id are supported", MAX_ROOT_IDS); + err = -E2BIG; + goto done; + } + + NEXT_ARG(); + root_id = strtoul(*argv, &end, 0); + if (*end) { + err = -1; + p_err("can't parse %s as root ID", *argv); + goto done; + } + for (i = 0; i < root_type_cnt; i++) { + if (root_type_ids[i] == root_id) { + err = -EINVAL; + p_err("duplicate root_id %d supplied", root_id); + goto done; + } + } + root_type_ids[root_type_cnt++] = root_id; + NEXT_ARG(); } else if (is_prefix(*argv, "unsorted")) { sort_dump_c = false; NEXT_ARG(); @@ -1403,7 +1438,7 @@ static int do_help(int argc, char **argv) fprintf(stderr, "Usage: %1$s %2$s { show | list } [id BTF_ID]\n" - " %1$s %2$s dump BTF_SRC [format FORMAT]\n" + " %1$s %2$s dump BTF_SRC [format FORMAT] [root_id ROOT_ID]\n" " %1$s %2$s help\n" "\n" " BTF_SRC := { id BTF_ID | prog PROG | map MAP [{key | value | kv | all}] | file FILE }\n" -- cgit v1.2.3 From 9d294f6986789e20696f44c2deb4c7f7b8ae4704 Mon Sep 17 00:00:00 2001 From: Daniel Xu Date: Fri, 13 Dec 2024 12:44:12 -0700 Subject: bpftool: bash: Add bash completion for root_id argument This commit updates the bash completion script with the new root_id argument. Signed-off-by: Daniel Xu Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/37016c786620761e621a88240e36f6cb27a8f628.1734119028.git.dxu@dxuuu.xyz --- tools/bpf/bpftool/bash-completion/bpftool | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index 0c541498c301..1ce409a6cbd9 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -930,19 +930,24 @@ _bpftool() format) COMPREPLY=( $( compgen -W "c raw" -- "$cur" ) ) ;; + root_id) + return 0; + ;; c) - COMPREPLY=( $( compgen -W "unsorted" -- "$cur" ) ) + COMPREPLY=( $( compgen -W "unsorted root_id" -- "$cur" ) ) ;; *) # emit extra options case ${words[3]} in id|file) + COMPREPLY=( $( compgen -W "root_id" -- "$cur" ) ) _bpftool_once_attr 'format' ;; map|prog) if [[ ${words[3]} == "map" ]] && [[ $cword == 6 ]]; then COMPREPLY+=( $( compgen -W "key value kv all" -- "$cur" ) ) fi + COMPREPLY=( $( compgen -W "root_id" -- "$cur" ) ) _bpftool_once_attr 'format' ;; *) -- cgit v1.2.3 From 4d3ae294f900fb7232fb6c890dbd3176b8a5f121 Mon Sep 17 00:00:00 2001 From: Anton Protopopov Date: Fri, 13 Dec 2024 13:09:31 +0000 Subject: bpf: Add fd_array_cnt attribute for prog_load The fd_array attribute of the BPF_PROG_LOAD syscall may contain a set of file descriptors: maps or btfs. This field was introduced as a sparse array. Introduce a new attribute, fd_array_cnt, which, if present, indicates that the fd_array is a continuous array of the corresponding length. If fd_array_cnt is non-zero, then every map in the fd_array will be bound to the program, as if it was used by the program. This functionality is similar to the BPF_PROG_BIND_MAP syscall, but such maps can be used by the verifier during the program load. Signed-off-by: Anton Protopopov Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20241213130934.1087929-5-aspsk@isovalent.com --- tools/include/uapi/linux/bpf.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'tools') diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 4162afc6b5d0..2acf9b336371 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1573,6 +1573,16 @@ union bpf_attr { * If provided, prog_flags should have BPF_F_TOKEN_FD flag set. */ __s32 prog_token_fd; + /* The fd_array_cnt can be used to pass the length of the + * fd_array array. In this case all the [map] file descriptors + * passed in this array will be bound to the program, even if + * the maps are not referenced directly. The functionality is + * similar to the BPF_PROG_BIND_MAP syscall, but maps can be + * used by the verifier during the program load. If provided, + * then the fd_array[0,...,fd_array_cnt-1] is expected to be + * continuous. + */ + __u32 fd_array_cnt; }; struct { /* anonymous struct used by BPF_OBJ_* commands */ -- cgit v1.2.3 From f9933acda31a9882b6e08f58cb976e67842a180b Mon Sep 17 00:00:00 2001 From: Anton Protopopov Date: Fri, 13 Dec 2024 13:09:32 +0000 Subject: libbpf: prog load: Allow to use fd_array_cnt Add new fd_array_cnt field to bpf_prog_load_opts and pass it in bpf_attr, if set. Signed-off-by: Anton Protopopov Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20241213130934.1087929-6-aspsk@isovalent.com --- tools/lib/bpf/bpf.c | 3 ++- tools/lib/bpf/bpf.h | 5 ++++- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index becdfa701c75..359f73ead613 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -238,7 +238,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns, size_t insn_cnt, struct bpf_prog_load_opts *opts) { - const size_t attr_sz = offsetofend(union bpf_attr, prog_token_fd); + const size_t attr_sz = offsetofend(union bpf_attr, fd_array_cnt); void *finfo = NULL, *linfo = NULL; const char *func_info, *line_info; __u32 log_size, log_level, attach_prog_fd, attach_btf_obj_fd; @@ -311,6 +311,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type, attr.line_info_cnt = OPTS_GET(opts, line_info_cnt, 0); attr.fd_array = ptr_to_u64(OPTS_GET(opts, fd_array, NULL)); + attr.fd_array_cnt = OPTS_GET(opts, fd_array_cnt, 0); if (log_level) { attr.log_buf = ptr_to_u64(log_buf); diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index a4a7b1ad1b63..435da95d2058 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -107,9 +107,12 @@ struct bpf_prog_load_opts { */ __u32 log_true_size; __u32 token_fd; + + /* if set, provides the length of fd_array */ + __u32 fd_array_cnt; size_t :0; }; -#define bpf_prog_load_opts__last_field token_fd +#define bpf_prog_load_opts__last_field fd_array_cnt LIBBPF_API int bpf_prog_load(enum bpf_prog_type prog_type, const char *prog_name, const char *license, -- cgit v1.2.3 From 1c593d7402b13d97f997b570e9fc7c49e53e1ed1 Mon Sep 17 00:00:00 2001 From: Anton Protopopov Date: Fri, 13 Dec 2024 13:09:33 +0000 Subject: selftests/bpf: Add tests for fd_array_cnt Add a new set of tests to test the new field in PROG_LOAD-related part of bpf_attr: fd_array_cnt. Add the following test cases: * fd_array_cnt/no-fd-array: program is loaded in a normal way, without any fd_array present * fd_array_cnt/fd-array-ok: pass two extra non-used maps, check that they're bound to the program * fd_array_cnt/fd-array-dup-input: pass a few extra maps, only two of which are unique * fd_array_cnt/fd-array-ref-maps-in-array: pass a map in fd_array which is also referenced from within the program * fd_array_cnt/fd-array-trash-input: pass array with some trash * fd_array_cnt/fd-array-2big: pass too large array All the tests above are using the bpf(2) syscall directly, no libbpf involved. Signed-off-by: Anton Protopopov Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20241213130934.1087929-7-aspsk@isovalent.com --- tools/testing/selftests/bpf/prog_tests/fd_array.c | 441 ++++++++++++++++++++++ 1 file changed, 441 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/fd_array.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/fd_array.c b/tools/testing/selftests/bpf/prog_tests/fd_array.c new file mode 100644 index 000000000000..a1d52e73fb16 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/fd_array.c @@ -0,0 +1,441 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include + +#include +#include + +#include "../test_btf.h" + +static inline int new_map(void) +{ + const char *name = NULL; + __u32 max_entries = 1; + __u32 value_size = 8; + __u32 key_size = 4; + + return bpf_map_create(BPF_MAP_TYPE_ARRAY, name, + key_size, value_size, + max_entries, NULL); +} + +static int new_btf(void) +{ + struct btf_blob { + struct btf_header btf_hdr; + __u32 types[8]; + __u32 str; + } raw_btf = { + .btf_hdr = { + .magic = BTF_MAGIC, + .version = BTF_VERSION, + .hdr_len = sizeof(struct btf_header), + .type_len = sizeof(raw_btf.types), + .str_off = offsetof(struct btf_blob, str) - offsetof(struct btf_blob, types), + .str_len = sizeof(raw_btf.str), + }, + .types = { + /* long */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 64, 8), /* [1] */ + /* unsigned long */ + BTF_TYPE_INT_ENC(0, 0, 0, 64, 8), /* [2] */ + }, + }; + + return bpf_btf_load(&raw_btf, sizeof(raw_btf), NULL); +} + +#define Close(FD) do { \ + if ((FD) >= 0) { \ + close(FD); \ + FD = -1; \ + } \ +} while(0) + +static bool map_exists(__u32 id) +{ + int fd; + + fd = bpf_map_get_fd_by_id(id); + if (fd >= 0) { + close(fd); + return true; + } + return false; +} + +static bool btf_exists(__u32 id) +{ + int fd; + + fd = bpf_btf_get_fd_by_id(id); + if (fd >= 0) { + close(fd); + return true; + } + return false; +} + +static inline int bpf_prog_get_map_ids(int prog_fd, __u32 *nr_map_ids, __u32 *map_ids) +{ + __u32 len = sizeof(struct bpf_prog_info); + struct bpf_prog_info info; + int err; + + memset(&info, 0, len); + info.nr_map_ids = *nr_map_ids, + info.map_ids = ptr_to_u64(map_ids), + + err = bpf_prog_get_info_by_fd(prog_fd, &info, &len); + if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd")) + return -1; + + *nr_map_ids = info.nr_map_ids; + + return 0; +} + +static int __load_test_prog(int map_fd, const int *fd_array, int fd_array_cnt) +{ + /* A trivial program which uses one map */ + struct bpf_insn insns[] = { + BPF_LD_MAP_FD(BPF_REG_1, map_fd), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + LIBBPF_OPTS(bpf_prog_load_opts, opts); + + opts.fd_array = fd_array; + opts.fd_array_cnt = fd_array_cnt; + + return bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "GPL", insns, ARRAY_SIZE(insns), &opts); +} + +static int load_test_prog(const int *fd_array, int fd_array_cnt) +{ + int map_fd; + int ret; + + map_fd = new_map(); + if (!ASSERT_GE(map_fd, 0, "new_map")) + return map_fd; + + ret = __load_test_prog(map_fd, fd_array, fd_array_cnt); + close(map_fd); + return ret; +} + +static bool check_expected_map_ids(int prog_fd, int expected, __u32 *map_ids, __u32 *nr_map_ids) +{ + int err; + + err = bpf_prog_get_map_ids(prog_fd, nr_map_ids, map_ids); + if (!ASSERT_OK(err, "bpf_prog_get_map_ids")) + return false; + if (!ASSERT_EQ(*nr_map_ids, expected, "unexpected nr_map_ids")) + return false; + + return true; +} + +/* + * Load a program, which uses one map. No fd_array maps are present. + * On return only one map is expected to be bound to prog. + */ +static void check_fd_array_cnt__no_fd_array(void) +{ + __u32 map_ids[16]; + __u32 nr_map_ids; + int prog_fd = -1; + + prog_fd = load_test_prog(NULL, 0); + if (!ASSERT_GE(prog_fd, 0, "BPF_PROG_LOAD")) + return; + nr_map_ids = ARRAY_SIZE(map_ids); + check_expected_map_ids(prog_fd, 1, map_ids, &nr_map_ids); + close(prog_fd); +} + +/* + * Load a program, which uses one map, and pass two extra, non-equal, maps in + * fd_array with fd_array_cnt=2. On return three maps are expected to be bound + * to the program. + */ +static void check_fd_array_cnt__fd_array_ok(void) +{ + int extra_fds[2] = { -1, -1 }; + __u32 map_ids[16]; + __u32 nr_map_ids; + int prog_fd = -1; + + extra_fds[0] = new_map(); + if (!ASSERT_GE(extra_fds[0], 0, "new_map")) + goto cleanup; + extra_fds[1] = new_map(); + if (!ASSERT_GE(extra_fds[1], 0, "new_map")) + goto cleanup; + prog_fd = load_test_prog(extra_fds, 2); + if (!ASSERT_GE(prog_fd, 0, "BPF_PROG_LOAD")) + goto cleanup; + nr_map_ids = ARRAY_SIZE(map_ids); + if (!check_expected_map_ids(prog_fd, 3, map_ids, &nr_map_ids)) + goto cleanup; + + /* maps should still exist when original file descriptors are closed */ + Close(extra_fds[0]); + Close(extra_fds[1]); + if (!ASSERT_EQ(map_exists(map_ids[0]), true, "map_ids[0] should exist")) + goto cleanup; + if (!ASSERT_EQ(map_exists(map_ids[1]), true, "map_ids[1] should exist")) + goto cleanup; + + /* some fds might be invalid, so ignore return codes */ +cleanup: + Close(extra_fds[1]); + Close(extra_fds[0]); + Close(prog_fd); +} + +/* + * Load a program with a few extra maps duplicated in the fd_array. + * After the load maps should only be referenced once. + */ +static void check_fd_array_cnt__duplicated_maps(void) +{ + int extra_fds[4] = { -1, -1, -1, -1 }; + __u32 map_ids[16]; + __u32 nr_map_ids; + int prog_fd = -1; + + extra_fds[0] = extra_fds[2] = new_map(); + if (!ASSERT_GE(extra_fds[0], 0, "new_map")) + goto cleanup; + extra_fds[1] = extra_fds[3] = new_map(); + if (!ASSERT_GE(extra_fds[1], 0, "new_map")) + goto cleanup; + prog_fd = load_test_prog(extra_fds, 4); + if (!ASSERT_GE(prog_fd, 0, "BPF_PROG_LOAD")) + goto cleanup; + nr_map_ids = ARRAY_SIZE(map_ids); + if (!check_expected_map_ids(prog_fd, 3, map_ids, &nr_map_ids)) + goto cleanup; + + /* maps should still exist when original file descriptors are closed */ + Close(extra_fds[0]); + Close(extra_fds[1]); + if (!ASSERT_EQ(map_exists(map_ids[0]), true, "map should exist")) + goto cleanup; + if (!ASSERT_EQ(map_exists(map_ids[1]), true, "map should exist")) + goto cleanup; + + /* some fds might be invalid, so ignore return codes */ +cleanup: + Close(extra_fds[1]); + Close(extra_fds[0]); + Close(prog_fd); +} + +/* + * Check that if maps which are referenced by a program are + * passed in fd_array, then they will be referenced only once + */ +static void check_fd_array_cnt__referenced_maps_in_fd_array(void) +{ + int extra_fds[1] = { -1 }; + __u32 map_ids[16]; + __u32 nr_map_ids; + int prog_fd = -1; + + extra_fds[0] = new_map(); + if (!ASSERT_GE(extra_fds[0], 0, "new_map")) + goto cleanup; + prog_fd = __load_test_prog(extra_fds[0], extra_fds, 1); + if (!ASSERT_GE(prog_fd, 0, "BPF_PROG_LOAD")) + goto cleanup; + nr_map_ids = ARRAY_SIZE(map_ids); + if (!check_expected_map_ids(prog_fd, 1, map_ids, &nr_map_ids)) + goto cleanup; + + /* map should still exist when original file descriptor is closed */ + Close(extra_fds[0]); + if (!ASSERT_EQ(map_exists(map_ids[0]), true, "map should exist")) + goto cleanup; + + /* some fds might be invalid, so ignore return codes */ +cleanup: + Close(extra_fds[0]); + Close(prog_fd); +} + +static int get_btf_id_by_fd(int btf_fd, __u32 *id) +{ + struct bpf_btf_info info; + __u32 info_len = sizeof(info); + int err; + + memset(&info, 0, info_len); + err = bpf_btf_get_info_by_fd(btf_fd, &info, &info_len); + if (err) + return err; + if (id) + *id = info.id; + return 0; +} + +/* + * Check that fd_array operates properly for btfs. Namely, to check that + * passing a btf fd in fd_array increases its reference count, do the + * following: + * 1) Create a new btf, it's referenced only by a file descriptor, so refcnt=1 + * 2) Load a BPF prog with fd_array[0] = btf_fd; now btf's refcnt=2 + * 3) Close the btf_fd, now refcnt=1 + * Wait and check that BTF stil exists. + */ +static void check_fd_array_cnt__referenced_btfs(void) +{ + int extra_fds[1] = { -1 }; + int prog_fd = -1; + __u32 btf_id; + int tries; + int err; + + extra_fds[0] = new_btf(); + if (!ASSERT_GE(extra_fds[0], 0, "new_btf")) + goto cleanup; + prog_fd = load_test_prog(extra_fds, 1); + if (!ASSERT_GE(prog_fd, 0, "BPF_PROG_LOAD")) + goto cleanup; + + /* btf should still exist when original file descriptor is closed */ + err = get_btf_id_by_fd(extra_fds[0], &btf_id); + if (!ASSERT_GE(err, 0, "get_btf_id_by_fd")) + goto cleanup; + + Close(extra_fds[0]); + + if (!ASSERT_GE(kern_sync_rcu(), 0, "kern_sync_rcu 1")) + goto cleanup; + + if (!ASSERT_EQ(btf_exists(btf_id), true, "btf should exist")) + goto cleanup; + + Close(prog_fd); + + /* The program is freed by a workqueue, so no reliable + * way to sync, so just wait a bit (max ~1 second). */ + for (tries = 100; tries >= 0; tries--) { + usleep(1000); + + if (!btf_exists(btf_id)) + break; + + if (tries) + continue; + + PRINT_FAIL("btf should have been freed"); + } + + /* some fds might be invalid, so ignore return codes */ +cleanup: + Close(extra_fds[0]); + Close(prog_fd); +} + +/* + * Test that a program with trash in fd_array can't be loaded: + * only map and BTF file descriptors should be accepted. + */ +static void check_fd_array_cnt__fd_array_with_trash(void) +{ + int extra_fds[3] = { -1, -1, -1 }; + int prog_fd = -1; + + extra_fds[0] = new_map(); + if (!ASSERT_GE(extra_fds[0], 0, "new_map")) + goto cleanup; + extra_fds[1] = new_btf(); + if (!ASSERT_GE(extra_fds[1], 0, "new_btf")) + goto cleanup; + + /* trash 1: not a file descriptor */ + extra_fds[2] = 0xbeef; + prog_fd = load_test_prog(extra_fds, 3); + if (!ASSERT_EQ(prog_fd, -EBADF, "prog should have been rejected with -EBADF")) + goto cleanup; + + /* trash 2: not a map or btf */ + extra_fds[2] = socket(AF_INET, SOCK_STREAM, 0); + if (!ASSERT_GE(extra_fds[2], 0, "socket")) + goto cleanup; + + prog_fd = load_test_prog(extra_fds, 3); + if (!ASSERT_EQ(prog_fd, -EINVAL, "prog should have been rejected with -EINVAL")) + goto cleanup; + + /* Validate that the prog is ok if trash is removed */ + Close(extra_fds[2]); + extra_fds[2] = new_btf(); + if (!ASSERT_GE(extra_fds[2], 0, "new_btf")) + goto cleanup; + + prog_fd = load_test_prog(extra_fds, 3); + if (!ASSERT_GE(prog_fd, 0, "prog should have been loaded")) + goto cleanup; + + /* some fds might be invalid, so ignore return codes */ +cleanup: + Close(extra_fds[2]); + Close(extra_fds[1]); + Close(extra_fds[0]); +} + +/* + * Test that a program with too big fd_array can't be loaded. + */ +static void check_fd_array_cnt__fd_array_too_big(void) +{ + int extra_fds[65]; + int prog_fd = -1; + int i; + + for (i = 0; i < 65; i++) { + extra_fds[i] = new_map(); + if (!ASSERT_GE(extra_fds[i], 0, "new_map")) + goto cleanup_fds; + } + + prog_fd = load_test_prog(extra_fds, 65); + ASSERT_EQ(prog_fd, -E2BIG, "prog should have been rejected with -E2BIG"); + +cleanup_fds: + while (i > 0) + Close(extra_fds[--i]); +} + +void test_fd_array_cnt(void) +{ + if (test__start_subtest("no-fd-array")) + check_fd_array_cnt__no_fd_array(); + + if (test__start_subtest("fd-array-ok")) + check_fd_array_cnt__fd_array_ok(); + + if (test__start_subtest("fd-array-dup-input")) + check_fd_array_cnt__duplicated_maps(); + + if (test__start_subtest("fd-array-ref-maps-in-array")) + check_fd_array_cnt__referenced_maps_in_fd_array(); + + if (test__start_subtest("fd-array-ref-btfs")) + check_fd_array_cnt__referenced_btfs(); + + if (test__start_subtest("fd-array-trash-input")) + check_fd_array_cnt__fd_array_with_trash(); + + if (test__start_subtest("fd-array-2big")) + check_fd_array_cnt__fd_array_too_big(); +} -- cgit v1.2.3 From d677a10f80abf1ef65ae9bcf51b5a83ecf10e99a Mon Sep 17 00:00:00 2001 From: Anton Protopopov Date: Fri, 13 Dec 2024 13:09:34 +0000 Subject: selftest/bpf: Replace magic constants by macros Replace magic constants in a BTF structure initialization code by proper macros, as is done in other similar selftests. Suggested-by: Eduard Zingerman Signed-off-by: Anton Protopopov Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20241213130934.1087929-8-aspsk@isovalent.com --- tools/testing/selftests/bpf/progs/syscall.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/syscall.c b/tools/testing/selftests/bpf/progs/syscall.c index 0f4dfb770c32..b698cc62a371 100644 --- a/tools/testing/selftests/bpf/progs/syscall.c +++ b/tools/testing/selftests/bpf/progs/syscall.c @@ -76,9 +76,9 @@ static int btf_load(void) .magic = BTF_MAGIC, .version = BTF_VERSION, .hdr_len = sizeof(struct btf_header), - .type_len = sizeof(__u32) * 8, - .str_off = sizeof(__u32) * 8, - .str_len = sizeof(__u32), + .type_len = sizeof(raw_btf.types), + .str_off = offsetof(struct btf_blob, str) - offsetof(struct btf_blob, types), + .str_len = sizeof(raw_btf.str), }, .types = { /* long */ -- cgit v1.2.3 From c00d738e1673ab801e1577e4e3c780ccf88b1a5b Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Fri, 13 Dec 2024 14:19:27 -0800 Subject: bpf: Revert "bpf: Mark raw_tp arguments with PTR_MAYBE_NULL" This patch reverts commit cb4158ce8ec8 ("bpf: Mark raw_tp arguments with PTR_MAYBE_NULL"). The patch was well-intended and meant to be as a stop-gap fixing branch prediction when the pointer may actually be NULL at runtime. Eventually, it was supposed to be replaced by an automated script or compiler pass detecting possibly NULL arguments and marking them accordingly. However, it caused two main issues observed for production programs and failed to preserve backwards compatibility. First, programs relied on the verifier not exploring == NULL branch when pointer is not NULL, thus they started failing with a 'dereference of scalar' error. Next, allowing raw_tp arguments to be modified surfaced the warning in the verifier that warns against reg->off when PTR_MAYBE_NULL is set. More information, context, and discusson on both problems is available in [0]. Overall, this approach had several shortcomings, and the fixes would further complicate the verifier's logic, and the entire masking scheme would have to be removed eventually anyway. Hence, revert the patch in preparation of a better fix avoiding these issues to replace this commit. [0]: https://lore.kernel.org/bpf/20241206161053.809580-1-memxor@gmail.com Reported-by: Manu Bretelle Fixes: cb4158ce8ec8 ("bpf: Mark raw_tp arguments with PTR_MAYBE_NULL") Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20241213221929.3495062-2-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c b/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c index 5aaf2b065f86..bba3e37f749b 100644 --- a/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c +++ b/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c @@ -7,11 +7,7 @@ #include "bpf_misc.h" SEC("tp_btf/bpf_testmod_test_nullable_bare") -/* This used to be a failure test, but raw_tp nullable arguments can now - * directly be dereferenced, whether they have nullable annotation or not, - * and don't need to be explicitly checked. - */ -__success +__failure __msg("R1 invalid mem access 'trusted_ptr_or_null_'") int BPF_PROG(handle_tp_btf_nullable_bare1, struct bpf_testmod_test_read_ctx *nullable_ctx) { return nullable_ctx->len; -- cgit v1.2.3 From 838a10bd2ebfe11a60dd67687533a7cfc220cc86 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Fri, 13 Dec 2024 14:19:28 -0800 Subject: bpf: Augment raw_tp arguments with PTR_MAYBE_NULL Arguments to a raw tracepoint are tagged as trusted, which carries the semantics that the pointer will be non-NULL. However, in certain cases, a raw tracepoint argument may end up being NULL. More context about this issue is available in [0]. Thus, there is a discrepancy between the reality, that raw_tp arguments can actually be NULL, and the verifier's knowledge, that they are never NULL, causing explicit NULL check branch to be dead code eliminated. A previous attempt [1], i.e. the second fixed commit, was made to simulate symbolic execution as if in most accesses, the argument is a non-NULL raw_tp, except for conditional jumps. This tried to suppress branch prediction while preserving compatibility, but surfaced issues with production programs that were difficult to solve without increasing verifier complexity. A more complete discussion of issues and fixes is available at [2]. Fix this by maintaining an explicit list of tracepoints where the arguments are known to be NULL, and mark the positional arguments as PTR_MAYBE_NULL. Additionally, capture the tracepoints where arguments are known to be ERR_PTR, and mark these arguments as scalar values to prevent potential dereference. Each hex digit is used to encode NULL-ness (0x1) or ERR_PTR-ness (0x2), shifted by the zero-indexed argument number x 4. This can be represented as follows: 1st arg: 0x1 2nd arg: 0x10 3rd arg: 0x100 ... and so on (likewise for ERR_PTR case). In the future, an automated pass will be used to produce such a list, or insert __nullable annotations automatically for tracepoints. Each compilation unit will be analyzed and results will be collated to find whether a tracepoint pointer is definitely not null, maybe null, or an unknown state where verifier conservatively marks it PTR_MAYBE_NULL. A proof of concept of this tool from Eduard is available at [3]. Note that in case we don't find a specification in the raw_tp_null_args array and the tracepoint belongs to a kernel module, we will conservatively mark the arguments as PTR_MAYBE_NULL. This is because unlike for in-tree modules, out-of-tree module tracepoints may pass NULL freely to the tracepoint. We don't protect against such tracepoints passing ERR_PTR (which is uncommon anyway), lest we mark all such arguments as SCALAR_VALUE. While we are it, let's adjust the test raw_tp_null to not perform dereference of the skb->mark, as that won't be allowed anymore, and make it more robust by using inline assembly to test the dead code elimination behavior, which should still stay the same. [0]: https://lore.kernel.org/bpf/ZrCZS6nisraEqehw@jlelli-thinkpadt14gen4.remote.csb [1]: https://lore.kernel.org/all/20241104171959.2938862-1-memxor@gmail.com [2]: https://lore.kernel.org/bpf/20241206161053.809580-1-memxor@gmail.com [3]: https://github.com/eddyz87/llvm-project/tree/nullness-for-tracepoint-params Reported-by: Juri Lelli # original bug Reported-by: Manu Bretelle # bugs in masking fix Fixes: 3f00c5239344 ("bpf: Allow trusted pointers to be passed to KF_TRUSTED_ARGS kfuncs") Fixes: cb4158ce8ec8 ("bpf: Mark raw_tp arguments with PTR_MAYBE_NULL") Reviewed-by: Eduard Zingerman Co-developed-by: Jiri Olsa Signed-off-by: Jiri Olsa Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20241213221929.3495062-3-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/raw_tp_null.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/raw_tp_null.c b/tools/testing/selftests/bpf/progs/raw_tp_null.c index 457f34c151e3..5927054b6dd9 100644 --- a/tools/testing/selftests/bpf/progs/raw_tp_null.c +++ b/tools/testing/selftests/bpf/progs/raw_tp_null.c @@ -3,6 +3,7 @@ #include #include +#include "bpf_misc.h" char _license[] SEC("license") = "GPL"; @@ -17,16 +18,14 @@ int BPF_PROG(test_raw_tp_null, struct sk_buff *skb) if (task->pid != tid) return 0; - i = i + skb->mark + 1; - /* The compiler may move the NULL check before this deref, which causes - * the load to fail as deref of scalar. Prevent that by using a barrier. + /* If dead code elimination kicks in, the increment +=2 will be + * removed. For raw_tp programs attaching to tracepoints in kernel + * modules, we mark input arguments as PTR_MAYBE_NULL, so branch + * prediction should never kick in. */ - barrier(); - /* If dead code elimination kicks in, the increment below will - * be removed. For raw_tp programs, we mark input arguments as - * PTR_MAYBE_NULL, so branch prediction should never kick in. - */ - if (!skb) - i += 2; + asm volatile ("%[i] += 1; if %[ctx] != 0 goto +1; %[i] += 2;" + : [i]"+r"(i) + : [ctx]"r"(skb) + : "memory"); return 0; } -- cgit v1.2.3 From 0da1955b5bd2af3a1c3d13916df06e34ffa6df3d Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Fri, 13 Dec 2024 14:19:29 -0800 Subject: selftests/bpf: Add tests for raw_tp NULL args Add tests to ensure that arguments are correctly marked based on their specified positions, and whether they get marked correctly as maybe null. For modules, all tracepoint parameters should be marked PTR_MAYBE_NULL by default. Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20241213221929.3495062-4-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- .../testing/selftests/bpf/prog_tests/raw_tp_null.c | 3 +++ .../testing/selftests/bpf/progs/raw_tp_null_fail.c | 24 ++++++++++++++++++++++ 2 files changed, 27 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/raw_tp_null_fail.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/raw_tp_null.c b/tools/testing/selftests/bpf/prog_tests/raw_tp_null.c index 6fa19449297e..43676a9922dc 100644 --- a/tools/testing/selftests/bpf/prog_tests/raw_tp_null.c +++ b/tools/testing/selftests/bpf/prog_tests/raw_tp_null.c @@ -3,11 +3,14 @@ #include #include "raw_tp_null.skel.h" +#include "raw_tp_null_fail.skel.h" void test_raw_tp_null(void) { struct raw_tp_null *skel; + RUN_TESTS(raw_tp_null_fail); + skel = raw_tp_null__open_and_load(); if (!ASSERT_OK_PTR(skel, "raw_tp_null__open_and_load")) return; diff --git a/tools/testing/selftests/bpf/progs/raw_tp_null_fail.c b/tools/testing/selftests/bpf/progs/raw_tp_null_fail.c new file mode 100644 index 000000000000..38d669957bf1 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/raw_tp_null_fail.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ + +#include +#include +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +/* Ensure module parameter has PTR_MAYBE_NULL */ +SEC("tp_btf/bpf_testmod_test_raw_tp_null") +__failure __msg("R1 invalid mem access 'trusted_ptr_or_null_'") +int test_raw_tp_null_bpf_testmod_test_raw_tp_null_arg_1(void *ctx) { + asm volatile("r1 = *(u64 *)(r1 +0); r1 = *(u64 *)(r1 +0);" ::: __clobber_all); + return 0; +} + +/* Check NULL marking */ +SEC("tp_btf/sched_pi_setprio") +__failure __msg("R1 invalid mem access 'trusted_ptr_or_null_'") +int test_raw_tp_null_sched_pi_setprio_arg_2(void *ctx) { + asm volatile("r1 = *(u64 *)(r1 +8); r1 = *(u64 *)(r1 +0);" ::: __clobber_all); + return 0; +} -- cgit v1.2.3 From 6ca774f06a7df650f41b38b67bec0665d862ac23 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 9 Oct 2024 11:56:28 -0700 Subject: torture: Make kvm-remote.sh give up on unresponsive system Currently, a system that stops responding at the wrong time will hang kvm-remote.sh. This can happen when the system in question is forced offline for maintenance, and there is currently no way for the user to kick this script into moving ahead. This commit therefore causes kvm-remote.sh to wait at most 15 minutes for a non-responsive system, that is, a system for which ssh gives an exit code of 255. Signed-off-by: Paul E. McKenney Signed-off-by: Uladzislau Rezki (Sony) --- .../testing/selftests/rcutorture/bin/kvm-remote.sh | 25 ++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh index 134cdef5a6e0..48a8052d5dae 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh @@ -181,10 +181,11 @@ done # Function to check for presence of a file on the specified system. # Complain if the system cannot be reached, and retry after a wait. -# Currently just waits forever if a machine disappears. +# Currently just waits 15 minutes if a machine disappears. # # Usage: checkremotefile system pathname checkremotefile () { + local nsshfails=0 local ret local sleeptime=60 @@ -195,6 +196,11 @@ checkremotefile () { if test "$ret" -eq 255 then echo " ---" ssh failure to $1 checking for file $2, retry after $sleeptime seconds. `date` | tee -a "$oldrun/remote-log" + nsshfails=$((nsshfails+1)) + if ((nsshfails > 15)) + then + return 255 + fi elif test "$ret" -eq 0 then return 0 @@ -268,12 +274,23 @@ echo All batches started. `date` | tee -a "$oldrun/remote-log" for i in $systems do echo " ---" Waiting for $i `date` | tee -a "$oldrun/remote-log" - while checkremotefile "$i" "$resdir/$ds/remote.run" + while : do + checkremotefile "$i" "$resdir/$ds/remote.run" + ret=$? + if test "$ret" -eq 1 + then + echo " ---" Collecting results from $i `date` | tee -a "$oldrun/remote-log" + ( cd "$oldrun"; ssh -o BatchMode=yes $i "cd $rundir; tar -czf - kvm-remote-*.sh.out */console.log */kvm-test-1-run*.sh.out */qemu[_-]pid */qemu-retval */qemu-affinity; rm -rf $T > /dev/null 2>&1" | tar -xzf - ) + break; + fi + if test "$ret" -eq 255 + then + echo System $i persistent ssh failure, lost results `date` | tee -a "$oldrun/remote-log" + break; + fi sleep 30 done - echo " ---" Collecting results from $i `date` | tee -a "$oldrun/remote-log" - ( cd "$oldrun"; ssh -o BatchMode=yes $i "cd $rundir; tar -czf - kvm-remote-*.sh.out */console.log */kvm-test-1-run*.sh.out */qemu[_-]pid */qemu-retval */qemu-affinity; rm -rf $T > /dev/null 2>&1" | tar -xzf - ) done ( kvm-end-run-stats.sh "$oldrun" "$starttime"; echo $? > $T/exitcode ) | tee -a "$oldrun/remote-log" -- cgit v1.2.3 From 5ec090011bd2bb6ea6c2c607371db57ee0506a89 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 10 Oct 2024 11:49:54 -0700 Subject: rcutorture: Make the TREE03 scenario do preemption This commit adds the rcutorture.preempt_duration module parameter to rcutorture's TREE03.boot parameter list in order to better test preemption of RCU read-side critical sections. Signed-off-by: Paul E. McKenney Signed-off-by: Uladzislau Rezki (Sony) --- tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot index 8e50bfd4b710..90318591dae2 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot @@ -5,3 +5,4 @@ rcutree.gp_cleanup_delay=3 rcutree.kthread_prio=2 threadirqs rcutree.use_softirq=0 +rcutorture.preempt_duration=10 -- cgit v1.2.3 From 663ad7481f068057f6f692c5368c47150e855370 Mon Sep 17 00:00:00 2001 From: Donald Hunter Date: Fri, 13 Dec 2024 13:07:11 +0000 Subject: tools/net/ynl: fix sub-message key lookup for nested attributes Use the correct attribute space for sub-message key lookup in nested attributes when adding attributes. This fixes rt_link where the "kind" key and "data" sub-message are nested attributes in "linkinfo". For example: ./tools/net/ynl/cli.py \ --create \ --spec Documentation/netlink/specs/rt_link.yaml \ --do newlink \ --json '{"link": 99, "linkinfo": { "kind": "vlan", "data": {"id": 4 } } }' Signed-off-by: Donald Hunter Fixes: ab463c4342d1 ("tools/net/ynl: Add support for encoding sub-messages") Link: https://patch.msgid.link/20241213130711.40267-1-donald.hunter@gmail.com Signed-off-by: Jakub Kicinski --- tools/net/ynl/lib/ynl.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py index 01ec01a90e76..eea29359a899 100644 --- a/tools/net/ynl/lib/ynl.py +++ b/tools/net/ynl/lib/ynl.py @@ -556,10 +556,10 @@ class YnlFamily(SpecFamily): if attr["type"] == 'nest': nl_type |= Netlink.NLA_F_NESTED attr_payload = b'' - sub_attrs = SpaceAttrs(self.attr_sets[space], value, search_attrs) + sub_space = attr['nested-attributes'] + sub_attrs = SpaceAttrs(self.attr_sets[sub_space], value, search_attrs) for subname, subvalue in value.items(): - attr_payload += self._add_attr(attr['nested-attributes'], - subname, subvalue, sub_attrs) + attr_payload += self._add_attr(sub_space, subname, subvalue, sub_attrs) elif attr["type"] == 'flag': if not value: # If value is absent or false then skip attribute creation. -- cgit v1.2.3 From b2e584aa3c710802600b690f34a56fb526aebf2f Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Thu, 12 Dec 2024 16:36:08 +0100 Subject: selftests: tls: add key_generation argument to tls_crypto_info_init This allows us to generate different keys, so that we can test that rekey is using the correct one. Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller --- tools/testing/selftests/net/tls.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 1a706d03bb6b..b1f52d2bb096 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -44,9 +44,11 @@ struct tls_crypto_info_keys { }; static void tls_crypto_info_init(uint16_t tls_version, uint16_t cipher_type, - struct tls_crypto_info_keys *tls12) + struct tls_crypto_info_keys *tls12, + char key_generation) { - memset(tls12, 0, sizeof(*tls12)); + memset(tls12, key_generation, sizeof(*tls12)); + memset(tls12, 0, sizeof(struct tls_crypto_info)); switch (cipher_type) { case TLS_CIPHER_CHACHA20_POLY1305: @@ -275,7 +277,7 @@ TEST_F(tls_basic, recseq_wrap) if (self->notls) SKIP(return, "no TLS support"); - tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_128, &tls12); + tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_128, &tls12, 0); memset(&tls12.aes128.rec_seq, 0xff, sizeof(tls12.aes128.rec_seq)); ASSERT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); @@ -391,7 +393,7 @@ FIXTURE_SETUP(tls) SKIP(return, "Unsupported cipher in FIPS mode"); tls_crypto_info_init(variant->tls_version, variant->cipher_type, - &tls12); + &tls12, 0); ulp_sock_pair(_metadata, &self->fd, &self->cfd, &self->notls); @@ -1175,7 +1177,7 @@ TEST_F(tls, bidir) struct tls_crypto_info_keys tls12; tls_crypto_info_init(variant->tls_version, variant->cipher_type, - &tls12); + &tls12, 0); ret = setsockopt(self->fd, SOL_TLS, TLS_RX, &tls12, tls12.len); @@ -1614,7 +1616,7 @@ TEST_F(tls, getsockopt) EXPECT_EQ(get.crypto_info.cipher_type, variant->cipher_type); /* get the full crypto_info */ - tls_crypto_info_init(variant->tls_version, variant->cipher_type, &expect); + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &expect, 0); len = expect.len; memrnd(&get, sizeof(get)); EXPECT_EQ(getsockopt(self->fd, SOL_TLS, TLS_TX, &get, &len), 0); @@ -1696,7 +1698,7 @@ FIXTURE_SETUP(tls_err) int ret; tls_crypto_info_init(variant->tls_version, TLS_CIPHER_AES_GCM_128, - &tls12); + &tls12, 0); ulp_sock_pair(_metadata, &self->fd, &self->cfd, &self->notls); ulp_sock_pair(_metadata, &self->fd2, &self->cfd2, &self->notls); @@ -2118,7 +2120,7 @@ TEST(tls_v6ops) { int sfd, ret, fd; socklen_t len, len2; - tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_128, &tls12); + tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_128, &tls12, 0); addr.sin6_family = AF_INET6; addr.sin6_addr = in6addr_any; @@ -2177,7 +2179,7 @@ TEST(prequeue) { len = sizeof(addr); memrnd(buf, sizeof(buf)); - tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_256, &tls12); + tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_256, &tls12, 0); addr.sin_family = AF_INET; addr.sin_addr.s_addr = htonl(INADDR_ANY); -- cgit v1.2.3 From 555f0edb9ff043196655a5b7cc65f67dfd05b530 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Thu, 12 Dec 2024 16:36:09 +0100 Subject: selftests: tls: add rekey tests Test the kernel's ability to: - update the key (but not the version or cipher), only for TLS1.3 - pause decryption after receiving a KeyUpdate message, until a new RX key has been provided - reflect the pause/non-readable socket in poll() Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller --- tools/testing/selftests/net/tls.c | 458 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 458 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index b1f52d2bb096..9a85f93c33d8 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -1670,6 +1670,464 @@ TEST_F(tls, recv_efault) EXPECT_EQ(memcmp(rec2, recv_mem + 9, ret - 9), 0); } +#define TLS_RECORD_TYPE_HANDSHAKE 0x16 +/* key_update, length 1, update_not_requested */ +static const char key_update_msg[] = "\x18\x00\x00\x01\x00"; +static void tls_send_keyupdate(struct __test_metadata *_metadata, int fd) +{ + size_t len = sizeof(key_update_msg); + + EXPECT_EQ(tls_send_cmsg(fd, TLS_RECORD_TYPE_HANDSHAKE, + (char *)key_update_msg, len, 0), + len); +} + +static void tls_recv_keyupdate(struct __test_metadata *_metadata, int fd, int flags) +{ + char buf[100]; + + EXPECT_EQ(tls_recv_cmsg(_metadata, fd, TLS_RECORD_TYPE_HANDSHAKE, buf, sizeof(buf), flags), + sizeof(key_update_msg)); + EXPECT_EQ(memcmp(buf, key_update_msg, sizeof(key_update_msg)), 0); +} + +/* set the key to 0 then 1 for RX, immediately to 1 for TX */ +TEST_F(tls_basic, rekey_rx) +{ + struct tls_crypto_info_keys tls12_0, tls12_1; + char const *test_str = "test_message"; + int send_len = strlen(test_str) + 1; + char buf[20]; + int ret; + + if (self->notls) + return; + + tls_crypto_info_init(TLS_1_3_VERSION, TLS_CIPHER_AES_GCM_128, + &tls12_0, 0); + tls_crypto_info_init(TLS_1_3_VERSION, TLS_CIPHER_AES_GCM_128, + &tls12_1, 1); + + ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12_1, tls12_1.len); + ASSERT_EQ(ret, 0); + + ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12_0, tls12_0.len); + ASSERT_EQ(ret, 0); + + ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12_1, tls12_1.len); + EXPECT_EQ(ret, 0); + + EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len); + EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len); + EXPECT_EQ(memcmp(buf, test_str, send_len), 0); +} + +/* set the key to 0 then 1 for TX, immediately to 1 for RX */ +TEST_F(tls_basic, rekey_tx) +{ + struct tls_crypto_info_keys tls12_0, tls12_1; + char const *test_str = "test_message"; + int send_len = strlen(test_str) + 1; + char buf[20]; + int ret; + + if (self->notls) + return; + + tls_crypto_info_init(TLS_1_3_VERSION, TLS_CIPHER_AES_GCM_128, + &tls12_0, 0); + tls_crypto_info_init(TLS_1_3_VERSION, TLS_CIPHER_AES_GCM_128, + &tls12_1, 1); + + ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12_0, tls12_0.len); + ASSERT_EQ(ret, 0); + + ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12_1, tls12_1.len); + ASSERT_EQ(ret, 0); + + ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12_1, tls12_1.len); + EXPECT_EQ(ret, 0); + + EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len); + EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len); + EXPECT_EQ(memcmp(buf, test_str, send_len), 0); +} + +TEST_F(tls, rekey) +{ + char const *test_str_1 = "test_message_before_rekey"; + char const *test_str_2 = "test_message_after_rekey"; + struct tls_crypto_info_keys tls12; + int send_len; + char buf[100]; + + if (variant->tls_version != TLS_1_3_VERSION) + return; + + /* initial send/recv */ + send_len = strlen(test_str_1) + 1; + EXPECT_EQ(send(self->fd, test_str_1, send_len, 0), send_len); + EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len); + EXPECT_EQ(memcmp(buf, test_str_1, send_len), 0); + + /* update TX key */ + tls_send_keyupdate(_metadata, self->fd); + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); + + /* send after rekey */ + send_len = strlen(test_str_2) + 1; + EXPECT_EQ(send(self->fd, test_str_2, send_len, 0), send_len); + + /* can't receive the KeyUpdate without a control message */ + EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1); + + /* get KeyUpdate */ + tls_recv_keyupdate(_metadata, self->cfd, 0); + + /* recv blocking -> -EKEYEXPIRED */ + EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), 0), -1); + EXPECT_EQ(errno, EKEYEXPIRED); + + /* recv non-blocking -> -EKEYEXPIRED */ + EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_DONTWAIT), -1); + EXPECT_EQ(errno, EKEYEXPIRED); + + /* update RX key */ + EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0); + + /* recv after rekey */ + EXPECT_NE(recv(self->cfd, buf, send_len, 0), -1); + EXPECT_EQ(memcmp(buf, test_str_2, send_len), 0); +} + +TEST_F(tls, rekey_fail) +{ + char const *test_str_1 = "test_message_before_rekey"; + char const *test_str_2 = "test_message_after_rekey"; + struct tls_crypto_info_keys tls12; + int send_len; + char buf[100]; + + /* initial send/recv */ + send_len = strlen(test_str_1) + 1; + EXPECT_EQ(send(self->fd, test_str_1, send_len, 0), send_len); + EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len); + EXPECT_EQ(memcmp(buf, test_str_1, send_len), 0); + + /* update TX key */ + tls_send_keyupdate(_metadata, self->fd); + + if (variant->tls_version != TLS_1_3_VERSION) { + /* just check that rekey is not supported and return */ + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), -1); + EXPECT_EQ(errno, EBUSY); + return; + } + + /* successful update */ + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); + + /* invalid update: change of version */ + tls_crypto_info_init(TLS_1_2_VERSION, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), -1); + EXPECT_EQ(errno, EINVAL); + + /* invalid update (RX socket): change of version */ + tls_crypto_info_init(TLS_1_2_VERSION, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), -1); + EXPECT_EQ(errno, EINVAL); + + /* invalid update: change of cipher */ + if (variant->cipher_type == TLS_CIPHER_AES_GCM_256) + tls_crypto_info_init(variant->tls_version, TLS_CIPHER_CHACHA20_POLY1305, &tls12, 1); + else + tls_crypto_info_init(variant->tls_version, TLS_CIPHER_AES_GCM_256, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), -1); + EXPECT_EQ(errno, EINVAL); + + /* send after rekey, the invalid updates shouldn't have an effect */ + send_len = strlen(test_str_2) + 1; + EXPECT_EQ(send(self->fd, test_str_2, send_len, 0), send_len); + + /* can't receive the KeyUpdate without a control message */ + EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1); + + /* get KeyUpdate */ + tls_recv_keyupdate(_metadata, self->cfd, 0); + + /* recv blocking -> -EKEYEXPIRED */ + EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), 0), -1); + EXPECT_EQ(errno, EKEYEXPIRED); + + /* recv non-blocking -> -EKEYEXPIRED */ + EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_DONTWAIT), -1); + EXPECT_EQ(errno, EKEYEXPIRED); + + /* update RX key */ + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0); + + /* recv after rekey */ + EXPECT_NE(recv(self->cfd, buf, send_len, 0), -1); + EXPECT_EQ(memcmp(buf, test_str_2, send_len), 0); +} + +TEST_F(tls, rekey_peek) +{ + char const *test_str_1 = "test_message_before_rekey"; + struct tls_crypto_info_keys tls12; + int send_len; + char buf[100]; + + if (variant->tls_version != TLS_1_3_VERSION) + return; + + send_len = strlen(test_str_1) + 1; + EXPECT_EQ(send(self->fd, test_str_1, send_len, 0), send_len); + + /* update TX key */ + tls_send_keyupdate(_metadata, self->fd); + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); + + EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_PEEK), send_len); + EXPECT_EQ(memcmp(buf, test_str_1, send_len), 0); + + EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len); + EXPECT_EQ(memcmp(buf, test_str_1, send_len), 0); + + /* can't receive the KeyUpdate without a control message */ + EXPECT_EQ(recv(self->cfd, buf, send_len, MSG_PEEK), -1); + + /* peek KeyUpdate */ + tls_recv_keyupdate(_metadata, self->cfd, MSG_PEEK); + + /* get KeyUpdate */ + tls_recv_keyupdate(_metadata, self->cfd, 0); + + /* update RX key */ + EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0); +} + +TEST_F(tls, splice_rekey) +{ + int send_len = TLS_PAYLOAD_MAX_LEN / 2; + char mem_send[TLS_PAYLOAD_MAX_LEN]; + char mem_recv[TLS_PAYLOAD_MAX_LEN]; + struct tls_crypto_info_keys tls12; + int p[2]; + + if (variant->tls_version != TLS_1_3_VERSION) + return; + + memrnd(mem_send, sizeof(mem_send)); + + ASSERT_GE(pipe(p), 0); + EXPECT_EQ(send(self->fd, mem_send, send_len, 0), send_len); + + /* update TX key */ + tls_send_keyupdate(_metadata, self->fd); + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); + + EXPECT_EQ(send(self->fd, mem_send, send_len, 0), send_len); + + EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, TLS_PAYLOAD_MAX_LEN, 0), send_len); + EXPECT_EQ(read(p[0], mem_recv, send_len), send_len); + EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0); + + /* can't splice the KeyUpdate */ + EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, TLS_PAYLOAD_MAX_LEN, 0), -1); + EXPECT_EQ(errno, EINVAL); + + /* peek KeyUpdate */ + tls_recv_keyupdate(_metadata, self->cfd, MSG_PEEK); + + /* get KeyUpdate */ + tls_recv_keyupdate(_metadata, self->cfd, 0); + + /* can't splice before updating the key */ + EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, TLS_PAYLOAD_MAX_LEN, 0), -1); + EXPECT_EQ(errno, EKEYEXPIRED); + + /* update RX key */ + EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0); + + EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, TLS_PAYLOAD_MAX_LEN, 0), send_len); + EXPECT_EQ(read(p[0], mem_recv, send_len), send_len); + EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0); +} + +TEST_F(tls, rekey_peek_splice) +{ + char const *test_str_1 = "test_message_before_rekey"; + struct tls_crypto_info_keys tls12; + int send_len; + char buf[100]; + char mem_recv[TLS_PAYLOAD_MAX_LEN]; + int p[2]; + + if (variant->tls_version != TLS_1_3_VERSION) + return; + + ASSERT_GE(pipe(p), 0); + + send_len = strlen(test_str_1) + 1; + EXPECT_EQ(send(self->fd, test_str_1, send_len, 0), send_len); + + /* update TX key */ + tls_send_keyupdate(_metadata, self->fd); + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); + + EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_PEEK), send_len); + EXPECT_EQ(memcmp(buf, test_str_1, send_len), 0); + + EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, TLS_PAYLOAD_MAX_LEN, 0), send_len); + EXPECT_EQ(read(p[0], mem_recv, send_len), send_len); + EXPECT_EQ(memcmp(mem_recv, test_str_1, send_len), 0); +} + +TEST_F(tls, rekey_getsockopt) +{ + struct tls_crypto_info_keys tls12; + struct tls_crypto_info_keys tls12_get; + socklen_t len; + + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 0); + + len = tls12.len; + EXPECT_EQ(getsockopt(self->fd, SOL_TLS, TLS_TX, &tls12_get, &len), 0); + EXPECT_EQ(len, tls12.len); + EXPECT_EQ(memcmp(&tls12_get, &tls12, tls12.len), 0); + + len = tls12.len; + EXPECT_EQ(getsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12_get, &len), 0); + EXPECT_EQ(len, tls12.len); + EXPECT_EQ(memcmp(&tls12_get, &tls12, tls12.len), 0); + + if (variant->tls_version != TLS_1_3_VERSION) + return; + + tls_send_keyupdate(_metadata, self->fd); + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); + + tls_recv_keyupdate(_metadata, self->cfd, 0); + EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0); + + len = tls12.len; + EXPECT_EQ(getsockopt(self->fd, SOL_TLS, TLS_TX, &tls12_get, &len), 0); + EXPECT_EQ(len, tls12.len); + EXPECT_EQ(memcmp(&tls12_get, &tls12, tls12.len), 0); + + len = tls12.len; + EXPECT_EQ(getsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12_get, &len), 0); + EXPECT_EQ(len, tls12.len); + EXPECT_EQ(memcmp(&tls12_get, &tls12, tls12.len), 0); +} + +TEST_F(tls, rekey_poll_pending) +{ + char const *test_str = "test_message_after_rekey"; + struct tls_crypto_info_keys tls12; + struct pollfd pfd = { }; + int send_len; + int ret; + + if (variant->tls_version != TLS_1_3_VERSION) + return; + + /* update TX key */ + tls_send_keyupdate(_metadata, self->fd); + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); + + /* get KeyUpdate */ + tls_recv_keyupdate(_metadata, self->cfd, 0); + + /* send immediately after rekey */ + send_len = strlen(test_str) + 1; + EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len); + + /* key hasn't been updated, expect cfd to be non-readable */ + pfd.fd = self->cfd; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 0), 0); + + ret = fork(); + ASSERT_GE(ret, 0); + + if (ret) { + int pid2, status; + + /* wait before installing the new key */ + sleep(1); + + /* update RX key while poll() is sleeping */ + EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0); + + pid2 = wait(&status); + EXPECT_EQ(pid2, ret); + EXPECT_EQ(status, 0); + } else { + pfd.fd = self->cfd; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 5000), 1); + + exit(!__test_passed(_metadata)); + } +} + +TEST_F(tls, rekey_poll_delay) +{ + char const *test_str = "test_message_after_rekey"; + struct tls_crypto_info_keys tls12; + struct pollfd pfd = { }; + int send_len; + int ret; + + if (variant->tls_version != TLS_1_3_VERSION) + return; + + /* update TX key */ + tls_send_keyupdate(_metadata, self->fd); + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); + + /* get KeyUpdate */ + tls_recv_keyupdate(_metadata, self->cfd, 0); + + ret = fork(); + ASSERT_GE(ret, 0); + + if (ret) { + int pid2, status; + + /* wait before installing the new key */ + sleep(1); + + /* update RX key while poll() is sleeping */ + EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0); + + sleep(1); + send_len = strlen(test_str) + 1; + EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len); + + pid2 = wait(&status); + EXPECT_EQ(pid2, ret); + EXPECT_EQ(status, 0); + } else { + pfd.fd = self->cfd; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 5000), 1); + exit(!__test_passed(_metadata)); + } +} + FIXTURE(tls_err) { int fd, cfd; -- cgit v1.2.3 From 9d6c0e58514f8b57cd9c2c755e41623d6a966025 Mon Sep 17 00:00:00 2001 From: He Rongguang Date: Thu, 12 Dec 2024 10:14:59 +0800 Subject: cpupower: fix TSC MHz calculation Commit 'cpupower: Make TSC read per CPU for Mperf monitor' (c2adb1877b7) changes TSC counter reads per cpu, but left time diff global (from start of all cpus to end of all cpus), thus diff(time) is too large for a cpu's tsc counting, resulting in far less than acutal TSC_Mhz and thus `cpupower monitor` showing far less than actual cpu realtime frequency. /proc/cpuinfo shows frequency: cat /proc/cpuinfo | egrep -e 'processor' -e 'MHz' ... processor : 171 cpu MHz : 4108.498 ... before fix (System 100% busy): | Mperf || Idle_Stats CPU| C0 | Cx | Freq || POLL | C1 | C2 171| 0.77| 99.23| 2279|| 0.00| 0.00| 0.00 after fix (System 100% busy): | Mperf || Idle_Stats CPU| C0 | Cx | Freq || POLL | C1 | C2 171| 0.46| 99.54| 4095|| 0.00| 0.00| 0.00 Fixes: c2adb1877b76 ("cpupower: Make TSC read per CPU for Mperf monitor") Signed-off-by: He Rongguang Signed-off-by: Shuah Khan --- tools/power/cpupower/utils/idle_monitor/mperf_monitor.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c b/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c index 0a03573ebcc2..73b6b10cbdd2 100644 --- a/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c +++ b/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c @@ -33,7 +33,7 @@ static int mperf_get_count_percent(unsigned int self_id, double *percent, unsigned int cpu); static int mperf_get_count_freq(unsigned int id, unsigned long long *count, unsigned int cpu); -static struct timespec time_start, time_end; +static struct timespec *time_start, *time_end; static cstate_t mperf_cstates[MPERF_CSTATE_COUNT] = { { @@ -174,7 +174,7 @@ static int mperf_get_count_percent(unsigned int id, double *percent, dprint("%s: TSC Ref - mperf_diff: %llu, tsc_diff: %llu\n", mperf_cstates[id].name, mperf_diff, tsc_diff); } else if (max_freq_mode == MAX_FREQ_SYSFS) { - timediff = max_frequency * timespec_diff_us(time_start, time_end); + timediff = max_frequency * timespec_diff_us(time_start[cpu], time_end[cpu]); *percent = 100.0 * mperf_diff / timediff; dprint("%s: MAXFREQ - mperf_diff: %llu, time_diff: %llu\n", mperf_cstates[id].name, mperf_diff, timediff); @@ -207,7 +207,7 @@ static int mperf_get_count_freq(unsigned int id, unsigned long long *count, if (max_freq_mode == MAX_FREQ_TSC_REF) { /* Calculate max_freq from TSC count */ tsc_diff = tsc_at_measure_end[cpu] - tsc_at_measure_start[cpu]; - time_diff = timespec_diff_us(time_start, time_end); + time_diff = timespec_diff_us(time_start[cpu], time_end[cpu]); max_frequency = tsc_diff / time_diff; } @@ -226,9 +226,8 @@ static int mperf_start(void) { int cpu; - clock_gettime(CLOCK_REALTIME, &time_start); - for (cpu = 0; cpu < cpu_count; cpu++) { + clock_gettime(CLOCK_REALTIME, &time_start[cpu]); mperf_get_tsc(&tsc_at_measure_start[cpu]); mperf_init_stats(cpu); } @@ -243,9 +242,9 @@ static int mperf_stop(void) for (cpu = 0; cpu < cpu_count; cpu++) { mperf_measure_stats(cpu); mperf_get_tsc(&tsc_at_measure_end[cpu]); + clock_gettime(CLOCK_REALTIME, &time_end[cpu]); } - clock_gettime(CLOCK_REALTIME, &time_end); return 0; } @@ -349,6 +348,8 @@ struct cpuidle_monitor *mperf_register(void) aperf_current_count = calloc(cpu_count, sizeof(unsigned long long)); tsc_at_measure_start = calloc(cpu_count, sizeof(unsigned long long)); tsc_at_measure_end = calloc(cpu_count, sizeof(unsigned long long)); + time_start = calloc(cpu_count, sizeof(struct timespec)); + time_end = calloc(cpu_count, sizeof(struct timespec)); mperf_monitor.name_len = strlen(mperf_monitor.name); return &mperf_monitor; } @@ -361,6 +362,8 @@ void mperf_unregister(void) free(aperf_current_count); free(tsc_at_measure_start); free(tsc_at_measure_end); + free(time_start); + free(time_end); free(is_valid); } -- cgit v1.2.3 From 8dccbecbb9692a96cf477eb826352a7c556a31e2 Mon Sep 17 00:00:00 2001 From: Bastien Curutchet Date: Fri, 13 Dec 2024 16:06:20 +0100 Subject: selftests/bpf: test_xdp_meta: Rename BPF sections SEC("t") and SEC("x") can't be loaded by the __load() helper. Rename these sections SEC("tc") and SEC("xdp") so they can be interpreted by the __load() helper in upcoming patch. Update the test_xdp_meta.sh to fit these new names. Signed-off-by: Bastien Curutchet Signed-off-by: Martin KaFai Lau Link: https://patch.msgid.link/20241213-xdp_meta-v2-1-634582725b90@bootlin.com --- tools/testing/selftests/bpf/progs/test_xdp_meta.c | 4 ++-- tools/testing/selftests/bpf/test_xdp_meta.sh | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/test_xdp_meta.c b/tools/testing/selftests/bpf/progs/test_xdp_meta.c index a7c4a7d49fe6..fe2d71ae0e71 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_meta.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_meta.c @@ -8,7 +8,7 @@ #define round_up(x, y) ((((x) - 1) | __round_mask(x, y)) + 1) #define ctx_ptr(ctx, mem) (void *)(unsigned long)ctx->mem -SEC("t") +SEC("tc") int ing_cls(struct __sk_buff *ctx) { __u8 *data, *data_meta, *data_end; @@ -28,7 +28,7 @@ int ing_cls(struct __sk_buff *ctx) return diff ? TC_ACT_SHOT : TC_ACT_OK; } -SEC("x") +SEC("xdp") int ing_xdp(struct xdp_md *ctx) { __u8 *data, *data_meta, *data_end; diff --git a/tools/testing/selftests/bpf/test_xdp_meta.sh b/tools/testing/selftests/bpf/test_xdp_meta.sh index 2740322c1878..6039b92f1094 100755 --- a/tools/testing/selftests/bpf/test_xdp_meta.sh +++ b/tools/testing/selftests/bpf/test_xdp_meta.sh @@ -43,11 +43,11 @@ ip netns exec ${NS2} ip addr add 10.1.1.22/24 dev veth2 ip netns exec ${NS1} tc qdisc add dev veth1 clsact ip netns exec ${NS2} tc qdisc add dev veth2 clsact -ip netns exec ${NS1} tc filter add dev veth1 ingress bpf da obj ${BPF_FILE} sec t -ip netns exec ${NS2} tc filter add dev veth2 ingress bpf da obj ${BPF_FILE} sec t +ip netns exec ${NS1} tc filter add dev veth1 ingress bpf da obj ${BPF_FILE} sec tc +ip netns exec ${NS2} tc filter add dev veth2 ingress bpf da obj ${BPF_FILE} sec tc -ip netns exec ${NS1} ip link set dev veth1 xdp obj ${BPF_FILE} sec x -ip netns exec ${NS2} ip link set dev veth2 xdp obj ${BPF_FILE} sec x +ip netns exec ${NS1} ip link set dev veth1 xdp obj ${BPF_FILE} sec xdp +ip netns exec ${NS2} ip link set dev veth2 xdp obj ${BPF_FILE} sec xdp ip netns exec ${NS1} ip link set dev veth1 up ip netns exec ${NS2} ip link set dev veth2 up -- cgit v1.2.3 From df539cefb0abbd16be9fbcc6ec46a5a35495800f Mon Sep 17 00:00:00 2001 From: Bastien Curutchet Date: Fri, 13 Dec 2024 16:06:21 +0100 Subject: selftests/bpf: Migrate test_xdp_meta.sh into xdp_context_test_run.c test_xdp_meta.sh can't be used by the BPF CI. Migrate test_xdp_meta.sh in a new test case in xdp_context_test_run.c. It uses the same BPF programs located in progs/test_xdp_meta.c and the same network topology. Remove test_xdp_meta.sh and its Makefile entry. Signed-off-by: Bastien Curutchet Signed-off-by: Martin KaFai Lau Link: https://patch.msgid.link/20241213-xdp_meta-v2-2-634582725b90@bootlin.com --- tools/testing/selftests/bpf/Makefile | 1 - .../bpf/prog_tests/xdp_context_test_run.c | 87 ++++++++++++++++++++++ tools/testing/selftests/bpf/test_xdp_meta.sh | 58 --------------- 3 files changed, 87 insertions(+), 59 deletions(-) delete mode 100755 tools/testing/selftests/bpf/test_xdp_meta.sh (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 6ad3b1ba1920..772bfc6b63fa 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -129,7 +129,6 @@ TEST_FILES = xsk_prereqs.sh $(wildcard progs/btf_dump_test_case_*.c) TEST_PROGS := test_kmod.sh \ test_xdp_redirect.sh \ test_xdp_redirect_multi.sh \ - test_xdp_meta.sh \ test_tunnel.sh \ test_lwt_seg6local.sh \ test_lirc_mode2.sh \ diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c index e6a783c7f5db..937da9b7532a 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c @@ -2,6 +2,14 @@ #include #include #include "test_xdp_context_test_run.skel.h" +#include "test_xdp_meta.skel.h" + +#define TX_ADDR "10.0.0.1" +#define RX_ADDR "10.0.0.2" +#define RX_NAME "veth0" +#define TX_NAME "veth1" +#define TX_NETNS "xdp_context_tx" +#define RX_NETNS "xdp_context_rx" void test_xdp_context_error(int prog_fd, struct bpf_test_run_opts opts, __u32 data_meta, __u32 data, __u32 data_end, @@ -103,3 +111,82 @@ void test_xdp_context_test_run(void) test_xdp_context_test_run__destroy(skel); } + +void test_xdp_context_functional(void) +{ + LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS); + LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1); + struct netns_obj *rx_ns = NULL, *tx_ns = NULL; + struct bpf_program *tc_prog, *xdp_prog; + struct test_xdp_meta *skel = NULL; + struct nstoken *nstoken = NULL; + int rx_ifindex; + int ret; + + tx_ns = netns_new(TX_NETNS, false); + if (!ASSERT_OK_PTR(tx_ns, "create tx_ns")) + return; + + rx_ns = netns_new(RX_NETNS, false); + if (!ASSERT_OK_PTR(rx_ns, "create rx_ns")) + goto close; + + SYS(close, "ip link add " RX_NAME " netns " RX_NETNS + " type veth peer name " TX_NAME " netns " TX_NETNS); + + nstoken = open_netns(RX_NETNS); + if (!ASSERT_OK_PTR(nstoken, "setns rx_ns")) + goto close; + + SYS(close, "ip addr add " RX_ADDR "/24 dev " RX_NAME); + SYS(close, "ip link set dev " RX_NAME " up"); + + skel = test_xdp_meta__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open and load skeleton")) + goto close; + + rx_ifindex = if_nametoindex(RX_NAME); + if (!ASSERT_GE(rx_ifindex, 0, "if_nametoindex rx")) + goto close; + + tc_hook.ifindex = rx_ifindex; + ret = bpf_tc_hook_create(&tc_hook); + if (!ASSERT_OK(ret, "bpf_tc_hook_create")) + goto close; + + tc_prog = bpf_object__find_program_by_name(skel->obj, "ing_cls"); + if (!ASSERT_OK_PTR(tc_prog, "open ing_cls prog")) + goto close; + + tc_opts.prog_fd = bpf_program__fd(tc_prog); + ret = bpf_tc_attach(&tc_hook, &tc_opts); + if (!ASSERT_OK(ret, "bpf_tc_attach")) + goto close; + + xdp_prog = bpf_object__find_program_by_name(skel->obj, "ing_xdp"); + if (!ASSERT_OK_PTR(xdp_prog, "open ing_xdp prog")) + goto close; + + ret = bpf_xdp_attach(rx_ifindex, + bpf_program__fd(xdp_prog), + 0, NULL); + if (!ASSERT_GE(ret, 0, "bpf_xdp_attach")) + goto close; + + close_netns(nstoken); + + nstoken = open_netns(TX_NETNS); + if (!ASSERT_OK_PTR(nstoken, "setns tx_ns")) + goto close; + + SYS(close, "ip addr add " TX_ADDR "/24 dev " TX_NAME); + SYS(close, "ip link set dev " TX_NAME " up"); + ASSERT_OK(SYS_NOFAIL("ping -c 1 " RX_ADDR), "ping"); + +close: + close_netns(nstoken); + test_xdp_meta__destroy(skel); + netns_free(rx_ns); + netns_free(tx_ns); +} + diff --git a/tools/testing/selftests/bpf/test_xdp_meta.sh b/tools/testing/selftests/bpf/test_xdp_meta.sh deleted file mode 100755 index 6039b92f1094..000000000000 --- a/tools/testing/selftests/bpf/test_xdp_meta.sh +++ /dev/null @@ -1,58 +0,0 @@ -#!/bin/sh - -BPF_FILE="test_xdp_meta.bpf.o" -# Kselftest framework requirement - SKIP code is 4. -readonly KSFT_SKIP=4 -readonly NS1="ns1-$(mktemp -u XXXXXX)" -readonly NS2="ns2-$(mktemp -u XXXXXX)" - -cleanup() -{ - if [ "$?" = "0" ]; then - echo "selftests: test_xdp_meta [PASS]"; - else - echo "selftests: test_xdp_meta [FAILED]"; - fi - - set +e - ip link del veth1 2> /dev/null - ip netns del ${NS1} 2> /dev/null - ip netns del ${NS2} 2> /dev/null -} - -ip link set dev lo xdp off 2>/dev/null > /dev/null -if [ $? -ne 0 ];then - echo "selftests: [SKIP] Could not run test without the ip xdp support" - exit $KSFT_SKIP -fi -set -e - -ip netns add ${NS1} -ip netns add ${NS2} - -trap cleanup 0 2 3 6 9 - -ip link add veth1 type veth peer name veth2 - -ip link set veth1 netns ${NS1} -ip link set veth2 netns ${NS2} - -ip netns exec ${NS1} ip addr add 10.1.1.11/24 dev veth1 -ip netns exec ${NS2} ip addr add 10.1.1.22/24 dev veth2 - -ip netns exec ${NS1} tc qdisc add dev veth1 clsact -ip netns exec ${NS2} tc qdisc add dev veth2 clsact - -ip netns exec ${NS1} tc filter add dev veth1 ingress bpf da obj ${BPF_FILE} sec tc -ip netns exec ${NS2} tc filter add dev veth2 ingress bpf da obj ${BPF_FILE} sec tc - -ip netns exec ${NS1} ip link set dev veth1 xdp obj ${BPF_FILE} sec xdp -ip netns exec ${NS2} ip link set dev veth2 xdp obj ${BPF_FILE} sec xdp - -ip netns exec ${NS1} ip link set dev veth1 up -ip netns exec ${NS2} ip link set dev veth2 up - -ip netns exec ${NS1} ping -c 1 10.1.1.22 -ip netns exec ${NS2} ping -c 1 10.1.1.11 - -exit 0 -- cgit v1.2.3 From 184a9358e506b77ade22c07dda4f34d133bc31c0 Mon Sep 17 00:00:00 2001 From: Tycho Andersen Date: Wed, 30 Oct 2024 14:37:32 -0600 Subject: selftests/exec: add a test for execveat()'s comm In the previous patch we've updated AT_EMPTY_PATH execs to use the dentry filename. Test for this and just to be sure keeps working with symlinks, which was a concern in [1], I've added a test for that as well. The test itself is a bit ugly, because the existing check_execveat_fail() helpers use a hardcoded envp and argv, and we want to "pass" things via the environment to test various argument values, but it seemed cleaner than passing one in everywhere in all the existing tests. Output looks like: ok 51 Check success of execveat(6, 'home/tycho/packages/...yyyyyyyyyyyyyyyyyyyy', 0)... # Check execveat(AT_EMPTY_PATH)'s comm is execveat ok 52 Check success of execveat(9, '', 4096)... # Check execveat(AT_EMPTY_PATH)'s comm is execveat ok 53 Check success of execveat(11, '', 4096)... # Check execveat(AT_EMPTY_PATH)'s comm is execveat [ 25.579272] process 'execveat' launched '/dev/fd/9' with NULL argv: empty string added ok 54 Check success of execveat(9, '', 4096)... Link: https://lore.kernel.org/all/20240925.152228-private.conflict.frozen.trios-TdUGhuI5Sb4v@cyphar.com/ [1] Signed-off-by: Tycho Andersen Link: https://lore.kernel.org/r/20241030203732.248767-2-tycho@tycho.pizza Signed-off-by: Kees Cook --- tools/testing/selftests/exec/execveat.c | 75 +++++++++++++++++++++++++++++++-- 1 file changed, 72 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/exec/execveat.c b/tools/testing/selftests/exec/execveat.c index 071e03532cba..8fb7395fd35b 100644 --- a/tools/testing/selftests/exec/execveat.c +++ b/tools/testing/selftests/exec/execveat.c @@ -23,9 +23,11 @@ #include "../kselftest.h" -#define TESTS_EXPECTED 51 +#define TESTS_EXPECTED 54 #define TEST_NAME_LEN (PATH_MAX * 4) +#define CHECK_COMM "CHECK_COMM" + static char longpath[2 * PATH_MAX] = ""; static char *envp[] = { "IN_TEST=yes", NULL, NULL }; static char *argv[] = { "execveat", "99", NULL }; @@ -237,6 +239,29 @@ static int check_execveat_pathmax(int root_dfd, const char *src, int is_script) return fail; } +static int check_execveat_comm(int fd, char *argv0, char *expected) +{ + char buf[128], *old_env, *old_argv0; + int ret; + + snprintf(buf, sizeof(buf), CHECK_COMM "=%s", expected); + + old_env = envp[1]; + envp[1] = buf; + + old_argv0 = argv[0]; + argv[0] = argv0; + + ksft_print_msg("Check execveat(AT_EMPTY_PATH)'s comm is %s\n", + expected); + ret = check_execveat_invoked_rc(fd, "", AT_EMPTY_PATH, 0, 0); + + envp[1] = old_env; + argv[0] = old_argv0; + + return ret; +} + static int run_tests(void) { int fail = 0; @@ -389,6 +414,14 @@ static int run_tests(void) fail += check_execveat_pathmax(root_dfd, "execveat", 0); fail += check_execveat_pathmax(root_dfd, "script", 1); + + /* /proc/pid/comm gives filename by default */ + fail += check_execveat_comm(fd, "sentinel", "execveat"); + /* /proc/pid/comm gives argv[0] when invoked via link */ + fail += check_execveat_comm(fd_symlink, "sentinel", "execveat"); + /* /proc/pid/comm gives filename if NULL is passed */ + fail += check_execveat_comm(fd, NULL, "execveat"); + return fail; } @@ -415,9 +448,13 @@ int main(int argc, char **argv) int ii; int rc; const char *verbose = getenv("VERBOSE"); + const char *check_comm = getenv(CHECK_COMM); - if (argc >= 2) { - /* If we are invoked with an argument, don't run tests. */ + if (argc >= 2 || check_comm) { + /* + * If we are invoked with an argument, or no arguments but a + * command to check, don't run tests. + */ const char *in_test = getenv("IN_TEST"); if (verbose) { @@ -426,6 +463,38 @@ int main(int argc, char **argv) ksft_print_msg("\t[%d]='%s\n'", ii, argv[ii]); } + /* If the tests wanted us to check the command, do so. */ + if (check_comm) { + /* TASK_COMM_LEN == 16 */ + char buf[32]; + int fd, ret; + + fd = open("/proc/self/comm", O_RDONLY); + if (fd < 0) { + ksft_perror("open() comm failed"); + exit(1); + } + + ret = read(fd, buf, sizeof(buf)); + if (ret < 0) { + ksft_perror("read() comm failed"); + close(fd); + exit(1); + } + close(fd); + + // trim off the \n + buf[ret-1] = 0; + + if (strcmp(buf, check_comm)) { + ksft_print_msg("bad comm, got: %s expected: %s\n", + buf, check_comm); + exit(1); + } + + exit(0); + } + /* Check expected environment transferred. */ if (!in_test || strcmp(in_test, "yes") != 0) { ksft_print_msg("no IN_TEST=yes in env\n"); -- cgit v1.2.3 From 0518863407b8dcc7070fdbc1c015046d66777e78 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 13 Dec 2024 07:22:42 -0800 Subject: selftests: net: support setting recv_size in YNL recv_size parameter allows constraining the buffer size for dumps. It's useful in testing kernel handling of dump continuation, IOW testing dumps which span multiple skbs. Let the tests set this parameter when initializing the YNL family. Keep the normal default, we don't want tests to unintentionally behave very differently than normal code. Reviewed-by: Joe Damato Reviewed-by: Petr Machata Link: https://patch.msgid.link/20241213152244.3080955-4-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/lib/py/ynl.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/lib/py/ynl.py b/tools/testing/selftests/net/lib/py/ynl.py index a0d689d58c57..076a7e8dc3eb 100644 --- a/tools/testing/selftests/net/lib/py/ynl.py +++ b/tools/testing/selftests/net/lib/py/ynl.py @@ -32,23 +32,23 @@ except ModuleNotFoundError as e: # Set schema='' to avoid jsonschema validation, it's slow # class EthtoolFamily(YnlFamily): - def __init__(self): + def __init__(self, recv_size=0): super().__init__((SPEC_PATH / Path('ethtool.yaml')).as_posix(), - schema='') + schema='', recv_size=recv_size) class RtnlFamily(YnlFamily): - def __init__(self): + def __init__(self, recv_size=0): super().__init__((SPEC_PATH / Path('rt_link.yaml')).as_posix(), - schema='') + schema='', recv_size=recv_size) class NetdevFamily(YnlFamily): - def __init__(self): + def __init__(self, recv_size=0): super().__init__((SPEC_PATH / Path('netdev.yaml')).as_posix(), - schema='') + schema='', recv_size=recv_size) class NetshaperFamily(YnlFamily): - def __init__(self): + def __init__(self, recv_size=0): super().__init__((SPEC_PATH / Path('net_shaper.yaml')).as_posix(), - schema='') + schema='', recv_size=recv_size) -- cgit v1.2.3 From 1234810b1649e9d781aeafd4b23fb1fcfbf95d8f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 13 Dec 2024 07:22:43 -0800 Subject: selftests: net-drv: queues: sanity check netlink dumps This test already catches a netlink bug fixed by this series, but only when running on HW with many queues. Make sure the netdevsim instance created has a lot of queues, and constrain the size of the recv_buffer used by netlink. While at it test both rx and tx queues. Reviewed-by: Joe Damato Reviewed-by: Petr Machata Link: https://patch.msgid.link/20241213152244.3080955-5-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/queues.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/queues.py b/tools/testing/selftests/drivers/net/queues.py index 30f29096e27c..9c5473abbd78 100755 --- a/tools/testing/selftests/drivers/net/queues.py +++ b/tools/testing/selftests/drivers/net/queues.py @@ -8,25 +8,28 @@ from lib.py import cmd import glob -def sys_get_queues(ifname) -> int: - folders = glob.glob(f'/sys/class/net/{ifname}/queues/rx-*') +def sys_get_queues(ifname, qtype='rx') -> int: + folders = glob.glob(f'/sys/class/net/{ifname}/queues/{qtype}-*') return len(folders) -def nl_get_queues(cfg, nl): +def nl_get_queues(cfg, nl, qtype='rx'): queues = nl.queue_get({'ifindex': cfg.ifindex}, dump=True) if queues: - return len([q for q in queues if q['type'] == 'rx']) + return len([q for q in queues if q['type'] == qtype]) return None def get_queues(cfg, nl) -> None: - queues = nl_get_queues(cfg, nl) - if not queues: - raise KsftSkipEx('queue-get not supported by device') + snl = NetdevFamily(recv_size=4096) - expected = sys_get_queues(cfg.dev['ifname']) - ksft_eq(queues, expected) + for qtype in ['rx', 'tx']: + queues = nl_get_queues(cfg, snl, qtype) + if not queues: + raise KsftSkipEx('queue-get not supported by device') + + expected = sys_get_queues(cfg.dev['ifname'], qtype) + ksft_eq(queues, expected) def addremove_queues(cfg, nl) -> None: @@ -57,7 +60,7 @@ def addremove_queues(cfg, nl) -> None: def main() -> None: - with NetDrvEnv(__file__, queue_count=3) as cfg: + with NetDrvEnv(__file__, queue_count=100) as cfg: ksft_run([get_queues, addremove_queues], args=(cfg, NetdevFamily())) ksft_exit() -- cgit v1.2.3 From 5712e323d4c3ad03bba4d28f83e80593171ac3f1 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 13 Dec 2024 07:22:44 -0800 Subject: selftests: net-drv: stats: sanity check netlink dumps Sanity check netlink dumps, to make sure dumps don't have repeated entries or gaps in IDs. Reviewed-by: Petr Machata Link: https://patch.msgid.link/20241213152244.3080955-6-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/stats.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/stats.py b/tools/testing/selftests/drivers/net/stats.py index 63e3c045a3b2..031ac9def6c0 100755 --- a/tools/testing/selftests/drivers/net/stats.py +++ b/tools/testing/selftests/drivers/net/stats.py @@ -110,6 +110,23 @@ def qstat_by_ifindex(cfg) -> None: ksft_ge(triple[1][key], triple[0][key], comment="bad key: " + key) ksft_ge(triple[2][key], triple[1][key], comment="bad key: " + key) + # Sanity check the dumps + queues = NetdevFamily(recv_size=4096).qstats_get({"scope": "queue"}, dump=True) + # Reformat the output into {ifindex: {rx: [id, id, ...], tx: [id, id, ...]}} + parsed = {} + for entry in queues: + ifindex = entry["ifindex"] + if ifindex not in parsed: + parsed[ifindex] = {"rx":[], "tx": []} + parsed[ifindex][entry["queue-type"]].append(entry['queue-id']) + # Now, validate + for ifindex, queues in parsed.items(): + for qtype in ['rx', 'tx']: + ksft_eq(len(queues[qtype]), len(set(queues[qtype])), + comment="repeated queue keys") + ksft_eq(len(queues[qtype]), max(queues[qtype]) + 1, + comment="missing queue keys") + # Test invalid dumps # 0 is invalid with ksft_raises(NlError) as cm: @@ -158,7 +175,7 @@ def check_down(cfg) -> None: def main() -> None: - with NetDrvEnv(__file__) as cfg: + with NetDrvEnv(__file__, queue_count=100) as cfg: ksft_run([check_pause, check_fec, pkt_byte_sum, qstat_by_ifindex, check_down], args=(cfg, )) -- cgit v1.2.3 From cda7d5abe089cc8bd6d623cd6577627d8125d155 Mon Sep 17 00:00:00 2001 From: Anna Emese Nyiri Date: Fri, 13 Dec 2024 09:44:56 +0100 Subject: selftests: net: test SO_PRIORITY ancillary data with cmsg_sender Extend cmsg_sender.c with a new option '-Q' to send SO_PRIORITY ancillary data. cmsg_so_priority.sh script added to validate SO_PRIORITY behavior by creating VLAN device with egress QoS mapping and testing packet priorities using flower filters. Verify that packets with different priorities are correctly matched and counted by filters for multiple protocols and IP versions. Reviewed-by: Willem de Bruijn Acked-by: Willem de Bruijn Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Suggested-by: Ido Schimmel Signed-off-by: Anna Emese Nyiri Link: https://patch.msgid.link/20241213084457.45120-4-annaemesenyiri@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/Makefile | 1 + tools/testing/selftests/net/cmsg_sender.c | 11 +- tools/testing/selftests/net/cmsg_so_priority.sh | 151 ++++++++++++++++++++++++ 3 files changed, 162 insertions(+), 1 deletion(-) create mode 100755 tools/testing/selftests/net/cmsg_so_priority.sh (limited to 'tools') diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index cb2fc601de66..f09bd96cc978 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -32,6 +32,7 @@ TEST_PROGS += ioam6.sh TEST_PROGS += gro.sh TEST_PROGS += gre_gso.sh TEST_PROGS += cmsg_so_mark.sh +TEST_PROGS += cmsg_so_priority.sh TEST_PROGS += cmsg_time.sh cmsg_ipv6.sh TEST_PROGS += netns-name.sh TEST_PROGS += nl_netdev.py diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c index 876c2db02a63..bc314382e4e1 100644 --- a/tools/testing/selftests/net/cmsg_sender.c +++ b/tools/testing/selftests/net/cmsg_sender.c @@ -59,6 +59,7 @@ struct options { unsigned int proto; } sock; struct option_cmsg_u32 mark; + struct option_cmsg_u32 priority; struct { bool ena; unsigned int delay; @@ -97,6 +98,8 @@ static void __attribute__((noreturn)) cs_usage(const char *bin) "\n" "\t\t-m val Set SO_MARK with given value\n" "\t\t-M val Set SO_MARK via setsockopt\n" + "\t\t-P val Set SO_PRIORITY via setsockopt\n" + "\t\t-Q val Set SO_PRIORITY via cmsg\n" "\t\t-d val Set SO_TXTIME with given delay (usec)\n" "\t\t-t Enable time stamp reporting\n" "\t\t-f val Set don't fragment via cmsg\n" @@ -115,7 +118,7 @@ static void cs_parse_args(int argc, char *argv[]) { int o; - while ((o = getopt(argc, argv, "46sS:p:P:m:M:n:d:tf:F:c:C:l:L:H:")) != -1) { + while ((o = getopt(argc, argv, "46sS:p:P:m:M:n:d:tf:F:c:C:l:L:H:Q:")) != -1) { switch (o) { case 's': opt.silent_send = true; @@ -148,6 +151,10 @@ static void cs_parse_args(int argc, char *argv[]) opt.mark.ena = true; opt.mark.val = atoi(optarg); break; + case 'Q': + opt.priority.ena = true; + opt.priority.val = atoi(optarg); + break; case 'M': opt.sockopt.mark = atoi(optarg); break; @@ -252,6 +259,8 @@ cs_write_cmsg(int fd, struct msghdr *msg, char *cbuf, size_t cbuf_sz) ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len, SOL_SOCKET, SO_MARK, &opt.mark); + ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len, + SOL_SOCKET, SO_PRIORITY, &opt.priority); ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len, SOL_IPV6, IPV6_DONTFRAG, &opt.v6.dontfrag); ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len, diff --git a/tools/testing/selftests/net/cmsg_so_priority.sh b/tools/testing/selftests/net/cmsg_so_priority.sh new file mode 100755 index 000000000000..ee07d8653262 --- /dev/null +++ b/tools/testing/selftests/net/cmsg_so_priority.sh @@ -0,0 +1,151 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh + +readonly KSFT_SKIP=4 + +IP4=192.0.2.1/24 +TGT4=192.0.2.2 +TGT4_RAW=192.0.2.3 +IP6=2001:db8::1/64 +TGT6=2001:db8::2 +TGT6_RAW=2001:db8::3 +PORT=1234 +TOTAL_TESTS=0 +FAILED_TESTS=0 + +if ! command -v jq &> /dev/null; then + echo "SKIP cmsg_so_priroity.sh test: jq is not installed." >&2 + exit "$KSFT_SKIP" +fi + +check_result() { + ((TOTAL_TESTS++)) + if [ "$1" -ne 0 ]; then + ((FAILED_TESTS++)) + fi +} + +cleanup() +{ + cleanup_ns $NS +} + +trap cleanup EXIT + +setup_ns NS + +create_filter() { + local handle=$1 + local vlan_prio=$2 + local ip_type=$3 + local proto=$4 + local dst_ip=$5 + local ip_proto + + if [[ "$proto" == "u" ]]; then + ip_proto="udp" + elif [[ "$ip_type" == "ipv4" && "$proto" == "i" ]]; then + ip_proto="icmp" + elif [[ "$ip_type" == "ipv6" && "$proto" == "i" ]]; then + ip_proto="icmpv6" + fi + + tc -n $NS filter add dev dummy1 \ + egress pref 1 handle "$handle" proto 802.1q \ + flower vlan_prio "$vlan_prio" vlan_ethtype "$ip_type" \ + dst_ip "$dst_ip" ${ip_proto:+ip_proto $ip_proto} \ + action pass +} + +ip -n $NS link set dev lo up +ip -n $NS link add name dummy1 up type dummy + +ip -n $NS link add link dummy1 name dummy1.10 up type vlan id 10 \ + egress-qos-map 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7 + +ip -n $NS address add $IP4 dev dummy1.10 +ip -n $NS address add $IP6 dev dummy1.10 nodad + +ip netns exec $NS sysctl -wq net.ipv4.ping_group_range='0 2147483647' + +ip -n $NS neigh add $TGT4 lladdr 00:11:22:33:44:55 nud permanent \ + dev dummy1.10 +ip -n $NS neigh add $TGT6 lladdr 00:11:22:33:44:55 nud permanent \ + dev dummy1.10 +ip -n $NS neigh add $TGT4_RAW lladdr 00:11:22:33:44:66 nud permanent \ + dev dummy1.10 +ip -n $NS neigh add $TGT6_RAW lladdr 00:11:22:33:44:66 nud permanent \ + dev dummy1.10 + +tc -n $NS qdisc add dev dummy1 clsact + +FILTER_COUNTER=10 + +for i in 4 6; do + for proto in u i r; do + echo "Test IPV$i, prot: $proto" + for priority in {0..7}; do + if [[ $i == 4 && $proto == "r" ]]; then + TGT=$TGT4_RAW + elif [[ $i == 6 && $proto == "r" ]]; then + TGT=$TGT6_RAW + elif [ $i == 4 ]; then + TGT=$TGT4 + else + TGT=$TGT6 + fi + + handle="${FILTER_COUNTER}${priority}" + + create_filter $handle $priority ipv$i $proto $TGT + + pkts=$(tc -n $NS -j -s filter show dev dummy1 egress \ + | jq ".[] | select(.options.handle == ${handle}) | \ + .options.actions[0].stats.packets") + + if [[ $pkts == 0 ]]; then + check_result 0 + else + echo "prio $priority: expected 0, got $pkts" + check_result 1 + fi + + ip netns exec $NS ./cmsg_sender -$i -Q $priority \ + -p $proto $TGT $PORT + + pkts=$(tc -n $NS -j -s filter show dev dummy1 egress \ + | jq ".[] | select(.options.handle == ${handle}) | \ + .options.actions[0].stats.packets") + if [[ $pkts == 1 ]]; then + check_result 0 + else + echo "prio $priority -Q: expected 1, got $pkts" + check_result 1 + fi + + ip netns exec $NS ./cmsg_sender -$i -P $priority \ + -p $proto $TGT $PORT + + pkts=$(tc -n $NS -j -s filter show dev dummy1 egress \ + | jq ".[] | select(.options.handle == ${handle}) | \ + .options.actions[0].stats.packets") + if [[ $pkts == 2 ]]; then + check_result 0 + else + echo "prio $priority -P: expected 2, got $pkts" + check_result 1 + fi + done + FILTER_COUNTER=$((FILTER_COUNTER + 10)) + done +done + +if [ $FAILED_TESTS -ne 0 ]; then + echo "FAIL - $FAILED_TESTS/$TOTAL_TESTS tests failed" + exit 1 +else + echo "OK - All $TOTAL_TESTS tests passed" + exit 0 +fi -- cgit v1.2.3 From e45469e594b255ef8d750ed5576698743450d2ac Mon Sep 17 00:00:00 2001 From: Anna Emese Nyiri Date: Fri, 13 Dec 2024 09:44:57 +0100 Subject: sock: Introduce SO_RCVPRIORITY socket option Add new socket option, SO_RCVPRIORITY, to include SO_PRIORITY in the ancillary data returned by recvmsg(). This is analogous to the existing support for SO_RCVMARK, as implemented in commit 6fd1d51cfa253 ("net: SO_RCVMARK socket option for SO_MARK with recvmsg()"). Reviewed-by: Willem de Bruijn Suggested-by: Ferenc Fejes Signed-off-by: Anna Emese Nyiri Link: https://patch.msgid.link/20241213084457.45120-5-annaemesenyiri@gmail.com Signed-off-by: Jakub Kicinski --- tools/include/uapi/asm-generic/socket.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/include/uapi/asm-generic/socket.h b/tools/include/uapi/asm-generic/socket.h index 281df9139d2b..ffff554a5230 100644 --- a/tools/include/uapi/asm-generic/socket.h +++ b/tools/include/uapi/asm-generic/socket.h @@ -126,6 +126,8 @@ #define SCM_TS_OPT_ID 78 +#define SO_RCVPRIORITY 79 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) -- cgit v1.2.3 From 59a42b0e78888e2d9a459b12e8d1eb09fb4a3c7b Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 2 Dec 2024 23:44:52 +0100 Subject: selftests/pidfd: add pidfs file handle selftests Add selftests for pidfs file handles. Link: https://lore.kernel.org/r/20241202-imstande-einsicht-d78753e1c632@brauner Reviewed-by: Amir Goldstein Signed-off-by: Christian Brauner --- tools/testing/selftests/pidfd/.gitignore | 1 + tools/testing/selftests/pidfd/Makefile | 3 +- tools/testing/selftests/pidfd/pidfd.h | 39 ++ .../selftests/pidfd/pidfd_file_handle_test.c | 503 +++++++++++++++++++++ tools/testing/selftests/pidfd/pidfd_setns_test.c | 47 +- tools/testing/selftests/pidfd/pidfd_wait.c | 47 +- 6 files changed, 567 insertions(+), 73 deletions(-) create mode 100644 tools/testing/selftests/pidfd/pidfd_file_handle_test.c (limited to 'tools') diff --git a/tools/testing/selftests/pidfd/.gitignore b/tools/testing/selftests/pidfd/.gitignore index 973198a3ec3d..224260e1a4a2 100644 --- a/tools/testing/selftests/pidfd/.gitignore +++ b/tools/testing/selftests/pidfd/.gitignore @@ -6,3 +6,4 @@ pidfd_wait pidfd_fdinfo_test pidfd_getfd_test pidfd_setns_test +pidfd_file_handle_test diff --git a/tools/testing/selftests/pidfd/Makefile b/tools/testing/selftests/pidfd/Makefile index d731e3e76d5b..3c16d8e77684 100644 --- a/tools/testing/selftests/pidfd/Makefile +++ b/tools/testing/selftests/pidfd/Makefile @@ -2,7 +2,8 @@ CFLAGS += -g $(KHDR_INCLUDES) -pthread -Wall TEST_GEN_PROGS := pidfd_test pidfd_fdinfo_test pidfd_open_test \ - pidfd_poll_test pidfd_wait pidfd_getfd_test pidfd_setns_test + pidfd_poll_test pidfd_wait pidfd_getfd_test pidfd_setns_test \ + pidfd_file_handle_test include ../lib.mk diff --git a/tools/testing/selftests/pidfd/pidfd.h b/tools/testing/selftests/pidfd/pidfd.h index 88d6830ee004..28a471c88c51 100644 --- a/tools/testing/selftests/pidfd/pidfd.h +++ b/tools/testing/selftests/pidfd/pidfd.h @@ -17,6 +17,7 @@ #include #include "../kselftest.h" +#include "../clone3/clone3_selftests.h" #ifndef P_PIDFD #define P_PIDFD 3 @@ -68,6 +69,11 @@ #define PIDFD_SKIP 3 #define PIDFD_XFAIL 4 +static inline int sys_waitid(int which, pid_t pid, siginfo_t *info, int options) +{ + return syscall(__NR_waitid, which, pid, info, options, NULL); +} + static inline int wait_for_pid(pid_t pid) { int status, ret; @@ -114,4 +120,37 @@ static inline int sys_memfd_create(const char *name, unsigned int flags) return syscall(__NR_memfd_create, name, flags); } +static inline pid_t create_child(int *pidfd, unsigned flags) +{ + struct __clone_args args = { + .flags = CLONE_PIDFD | flags, + .exit_signal = SIGCHLD, + .pidfd = ptr_to_u64(pidfd), + }; + + return sys_clone3(&args, sizeof(struct __clone_args)); +} + +static inline ssize_t read_nointr(int fd, void *buf, size_t count) +{ + ssize_t ret; + + do { + ret = read(fd, buf, count); + } while (ret < 0 && errno == EINTR); + + return ret; +} + +static inline ssize_t write_nointr(int fd, const void *buf, size_t count) +{ + ssize_t ret; + + do { + ret = write(fd, buf, count); + } while (ret < 0 && errno == EINTR); + + return ret; +} + #endif /* __PIDFD_H */ diff --git a/tools/testing/selftests/pidfd/pidfd_file_handle_test.c b/tools/testing/selftests/pidfd/pidfd_file_handle_test.c new file mode 100644 index 000000000000..439b9c6c0457 --- /dev/null +++ b/tools/testing/selftests/pidfd/pidfd_file_handle_test.c @@ -0,0 +1,503 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pidfd.h" +#include "../kselftest_harness.h" + +FIXTURE(file_handle) +{ + pid_t pid; + int pidfd; + + pid_t child_pid1; + int child_pidfd1; + + pid_t child_pid2; + int child_pidfd2; + + pid_t child_pid3; + int child_pidfd3; +}; + +FIXTURE_SETUP(file_handle) +{ + int ret; + int ipc_sockets[2]; + char c; + + self->pid = getpid(); + self->pidfd = sys_pidfd_open(self->pid, 0); + ASSERT_GE(self->pidfd, 0); + + ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); + EXPECT_EQ(ret, 0); + + self->child_pid1 = create_child(&self->child_pidfd1, CLONE_NEWUSER); + EXPECT_GE(self->child_pid1, 0); + + if (self->child_pid1 == 0) { + close(ipc_sockets[0]); + + if (write_nointr(ipc_sockets[1], "1", 1) < 0) + _exit(EXIT_FAILURE); + + close(ipc_sockets[1]); + + pause(); + _exit(EXIT_SUCCESS); + } + + close(ipc_sockets[1]); + ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1); + close(ipc_sockets[0]); + + ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); + EXPECT_EQ(ret, 0); + + self->child_pid2 = create_child(&self->child_pidfd2, CLONE_NEWUSER | CLONE_NEWPID); + EXPECT_GE(self->child_pid2, 0); + + if (self->child_pid2 == 0) { + close(ipc_sockets[0]); + + if (write_nointr(ipc_sockets[1], "1", 1) < 0) + _exit(EXIT_FAILURE); + + close(ipc_sockets[1]); + + pause(); + _exit(EXIT_SUCCESS); + } + + close(ipc_sockets[1]); + ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1); + close(ipc_sockets[0]); + + ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); + EXPECT_EQ(ret, 0); + + self->child_pid3 = create_child(&self->child_pidfd3, CLONE_NEWUSER | CLONE_NEWPID); + EXPECT_GE(self->child_pid3, 0); + + if (self->child_pid3 == 0) { + close(ipc_sockets[0]); + + if (write_nointr(ipc_sockets[1], "1", 1) < 0) + _exit(EXIT_FAILURE); + + close(ipc_sockets[1]); + + pause(); + _exit(EXIT_SUCCESS); + } + + close(ipc_sockets[1]); + ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1); + close(ipc_sockets[0]); +} + +FIXTURE_TEARDOWN(file_handle) +{ + EXPECT_EQ(close(self->pidfd), 0); + + EXPECT_EQ(sys_pidfd_send_signal(self->child_pidfd1, SIGKILL, NULL, 0), 0); + if (self->child_pidfd1 >= 0) + EXPECT_EQ(0, close(self->child_pidfd1)); + + EXPECT_EQ(sys_waitid(P_PID, self->child_pid1, NULL, WEXITED), 0); + + EXPECT_EQ(sys_pidfd_send_signal(self->child_pidfd2, SIGKILL, NULL, 0), 0); + if (self->child_pidfd2 >= 0) + EXPECT_EQ(0, close(self->child_pidfd2)); + + EXPECT_EQ(sys_waitid(P_PID, self->child_pid2, NULL, WEXITED), 0); + + if (self->child_pidfd3 >= 0) { + EXPECT_EQ(sys_pidfd_send_signal(self->child_pidfd3, SIGKILL, NULL, 0), 0); + EXPECT_EQ(0, close(self->child_pidfd3)); + EXPECT_EQ(sys_waitid(P_PID, self->child_pid3, NULL, WEXITED), 0); + } +} + +/* + * Test that we can decode a pidfs file handle in the same pid + * namespace. + */ +TEST_F(file_handle, file_handle_same_pidns) +{ + int mnt_id; + struct file_handle *fh; + int pidfd = -EBADF; + struct stat st1, st2; + + fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ); + ASSERT_NE(fh, NULL); + memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ); + fh->handle_bytes = MAX_HANDLE_SZ; + + ASSERT_EQ(name_to_handle_at(self->child_pidfd1, "", fh, &mnt_id, AT_EMPTY_PATH), 0); + + ASSERT_EQ(fstat(self->child_pidfd1, &st1), 0); + + pidfd = open_by_handle_at(self->pidfd, fh, 0); + ASSERT_GE(pidfd, 0); + + ASSERT_EQ(fstat(pidfd, &st2), 0); + ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); + + ASSERT_EQ(close(pidfd), 0); + + pidfd = open_by_handle_at(self->pidfd, fh, O_CLOEXEC); + ASSERT_GE(pidfd, 0); + + ASSERT_EQ(fstat(pidfd, &st2), 0); + ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); + + ASSERT_EQ(close(pidfd), 0); + + pidfd = open_by_handle_at(self->pidfd, fh, O_NONBLOCK); + ASSERT_GE(pidfd, 0); + + ASSERT_EQ(fstat(pidfd, &st2), 0); + ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); + + ASSERT_EQ(close(pidfd), 0); + + free(fh); +} + +/* + * Test that we can decode a pidfs file handle from a child pid + * namespace. + */ +TEST_F(file_handle, file_handle_child_pidns) +{ + int mnt_id; + struct file_handle *fh; + int pidfd = -EBADF; + struct stat st1, st2; + + fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ); + ASSERT_NE(fh, NULL); + memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ); + fh->handle_bytes = MAX_HANDLE_SZ; + + ASSERT_EQ(name_to_handle_at(self->child_pidfd2, "", fh, &mnt_id, AT_EMPTY_PATH), 0); + + ASSERT_EQ(fstat(self->child_pidfd2, &st1), 0); + + pidfd = open_by_handle_at(self->pidfd, fh, 0); + ASSERT_GE(pidfd, 0); + + ASSERT_EQ(fstat(pidfd, &st2), 0); + ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); + + ASSERT_EQ(close(pidfd), 0); + + pidfd = open_by_handle_at(self->pidfd, fh, O_CLOEXEC); + ASSERT_GE(pidfd, 0); + + ASSERT_EQ(fstat(pidfd, &st2), 0); + ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); + + ASSERT_EQ(close(pidfd), 0); + + pidfd = open_by_handle_at(self->pidfd, fh, O_NONBLOCK); + ASSERT_GE(pidfd, 0); + + ASSERT_EQ(fstat(pidfd, &st2), 0); + ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); + + ASSERT_EQ(close(pidfd), 0); + + free(fh); +} + +/* + * Test that we fail to decode a pidfs file handle from an ancestor + * child pid namespace. + */ +TEST_F(file_handle, file_handle_foreign_pidns) +{ + int mnt_id; + struct file_handle *fh; + pid_t pid; + + fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ); + ASSERT_NE(fh, NULL); + memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ); + fh->handle_bytes = MAX_HANDLE_SZ; + + ASSERT_EQ(name_to_handle_at(self->pidfd, "", fh, &mnt_id, AT_EMPTY_PATH), 0); + + ASSERT_EQ(setns(self->child_pidfd2, CLONE_NEWUSER | CLONE_NEWPID), 0); + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + int pidfd = open_by_handle_at(self->pidfd, fh, 0); + if (pidfd >= 0) { + TH_LOG("Managed to open pidfd outside of the caller's pid namespace hierarchy"); + _exit(1); + } + _exit(0); + } + + ASSERT_EQ(wait_for_pid(pid), 0); + + free(fh); +} + +/* + * Test that we can decode a pidfs file handle of a process that has + * exited but not been reaped. + */ +TEST_F(file_handle, pid_has_exited) +{ + int mnt_id, pidfd, child_pidfd3; + struct file_handle *fh; + struct stat st1, st2; + + fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ); + ASSERT_NE(fh, NULL); + memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ); + fh->handle_bytes = MAX_HANDLE_SZ; + + ASSERT_EQ(name_to_handle_at(self->child_pidfd3, "", fh, &mnt_id, AT_EMPTY_PATH), 0); + + ASSERT_EQ(fstat(self->child_pidfd3, &st1), 0); + + pidfd = open_by_handle_at(self->pidfd, fh, 0); + ASSERT_GE(pidfd, 0); + + ASSERT_EQ(fstat(pidfd, &st2), 0); + ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); + + ASSERT_EQ(close(pidfd), 0); + + child_pidfd3 = self->child_pidfd3; + self->child_pidfd3 = -EBADF; + EXPECT_EQ(sys_pidfd_send_signal(child_pidfd3, SIGKILL, NULL, 0), 0); + EXPECT_EQ(close(child_pidfd3), 0); + EXPECT_EQ(sys_waitid(P_PID, self->child_pid3, NULL, WEXITED | WNOWAIT), 0); + + pidfd = open_by_handle_at(self->pidfd, fh, 0); + ASSERT_GE(pidfd, 0); + + EXPECT_EQ(sys_waitid(P_PID, self->child_pid3, NULL, WEXITED), 0); +} + +/* + * Test that we fail to decode a pidfs file handle of a process that has + * already been reaped. + */ +TEST_F(file_handle, pid_has_been_reaped) +{ + int mnt_id, pidfd, child_pidfd3; + struct file_handle *fh; + struct stat st1, st2; + + fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ); + ASSERT_NE(fh, NULL); + memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ); + fh->handle_bytes = MAX_HANDLE_SZ; + + ASSERT_EQ(name_to_handle_at(self->child_pidfd3, "", fh, &mnt_id, AT_EMPTY_PATH), 0); + + ASSERT_EQ(fstat(self->child_pidfd3, &st1), 0); + + pidfd = open_by_handle_at(self->pidfd, fh, 0); + ASSERT_GE(pidfd, 0); + + ASSERT_EQ(fstat(pidfd, &st2), 0); + ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); + + ASSERT_EQ(close(pidfd), 0); + + child_pidfd3 = self->child_pidfd3; + self->child_pidfd3 = -EBADF; + EXPECT_EQ(sys_pidfd_send_signal(child_pidfd3, SIGKILL, NULL, 0), 0); + EXPECT_EQ(close(child_pidfd3), 0); + EXPECT_EQ(sys_waitid(P_PID, self->child_pid3, NULL, WEXITED), 0); + + pidfd = open_by_handle_at(self->pidfd, fh, 0); + ASSERT_LT(pidfd, 0); +} + +/* + * Test valid flags to open a pidfd file handle. Note, that + * PIDFD_NONBLOCK is defined as O_NONBLOCK and O_NONBLOCK is an alias to + * O_NDELAY. Also note that PIDFD_THREAD is an alias for O_EXCL. + */ +TEST_F(file_handle, open_by_handle_at_valid_flags) +{ + int mnt_id; + struct file_handle *fh; + int pidfd = -EBADF; + struct stat st1, st2; + + fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ); + ASSERT_NE(fh, NULL); + memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ); + fh->handle_bytes = MAX_HANDLE_SZ; + + ASSERT_EQ(name_to_handle_at(self->child_pidfd2, "", fh, &mnt_id, AT_EMPTY_PATH), 0); + + ASSERT_EQ(fstat(self->child_pidfd2, &st1), 0); + + pidfd = open_by_handle_at(self->pidfd, fh, + O_RDONLY | + O_WRONLY | + O_RDWR | + O_NONBLOCK | + O_NDELAY | + O_CLOEXEC | + O_EXCL); + ASSERT_GE(pidfd, 0); + + ASSERT_EQ(fstat(pidfd, &st2), 0); + ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); + + ASSERT_EQ(close(pidfd), 0); +} + +/* + * Test that invalid flags passed to open a pidfd file handle are + * rejected. + */ +TEST_F(file_handle, open_by_handle_at_invalid_flags) +{ + int mnt_id; + struct file_handle *fh; + int pidfd = -EBADF; + static const struct invalid_pidfs_file_handle_flags { + int oflag; + const char *oflag_name; + } invalid_pidfs_file_handle_flags[] = { + { FASYNC, "FASYNC" }, + { O_CREAT, "O_CREAT" }, + { O_NOCTTY, "O_NOCTTY" }, + { O_CREAT, "O_CREAT" }, + { O_TRUNC, "O_TRUNC" }, + { O_APPEND, "O_APPEND" }, + { O_SYNC, "O_SYNC" }, + { O_DSYNC, "O_DSYNC" }, + { O_DIRECT, "O_DIRECT" }, + { O_DIRECTORY, "O_DIRECTORY" }, + { O_NOFOLLOW, "O_NOFOLLOW" }, + { O_NOATIME, "O_NOATIME" }, + { O_PATH, "O_PATH" }, + { O_TMPFILE, "O_TMPFILE" }, + /* + * O_LARGEFILE is added implicitly by + * open_by_handle_at() so pidfs simply masks it off. + */ + }; + + fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ); + ASSERT_NE(fh, NULL); + memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ); + fh->handle_bytes = MAX_HANDLE_SZ; + + ASSERT_EQ(name_to_handle_at(self->child_pidfd2, "", fh, &mnt_id, AT_EMPTY_PATH), 0); + + for (int i = 0; i < ARRAY_SIZE(invalid_pidfs_file_handle_flags); i++) { + pidfd = open_by_handle_at(self->pidfd, fh, invalid_pidfs_file_handle_flags[i].oflag); + ASSERT_LT(pidfd, 0) { + TH_LOG("open_by_handle_at() succeeded with invalid flags: %s", invalid_pidfs_file_handle_flags[i].oflag_name); + } + } +} + +/* Test that lookup fails. */ +TEST_F(file_handle, lookup_must_fail) +{ + int mnt_id; + struct file_handle *fh; + + fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ); + ASSERT_NE(fh, NULL); + memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ); + fh->handle_bytes = MAX_HANDLE_SZ; + + ASSERT_NE(name_to_handle_at(self->child_pidfd2, "lookup-is-not-possible-with-pidfs", fh, &mnt_id, AT_EMPTY_PATH), 0); + ASSERT_EQ(errno, ENOTDIR); + ASSERT_NE(name_to_handle_at(self->child_pidfd2, "lookup-is-not-possible-with-pidfs", fh, &mnt_id, 0), 0); + ASSERT_EQ(errno, ENOTDIR); +} + +#ifndef AT_HANDLE_CONNECTABLE +#define AT_HANDLE_CONNECTABLE 0x002 +#endif + +/* + * Test that AT_HANDLE_CONNECTABLE is rejected. Connectable file handles + * don't make sense for pidfs. Note that currently AT_HANDLE_CONNECTABLE + * is rejected because it is incompatible with AT_EMPTY_PATH which is + * required with pidfds as we don't support lookup. + */ +TEST_F(file_handle, invalid_name_to_handle_at_flags) +{ + int mnt_id; + struct file_handle *fh; + + fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ); + ASSERT_NE(fh, NULL); + memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ); + fh->handle_bytes = MAX_HANDLE_SZ; + + ASSERT_NE(name_to_handle_at(self->child_pidfd2, "", fh, &mnt_id, AT_EMPTY_PATH | AT_HANDLE_CONNECTABLE), 0); +} + +#ifndef AT_HANDLE_FID +#define AT_HANDLE_FID 0x200 +#endif + +/* + * Test that a request with AT_HANDLE_FID always leads to decodable file + * handle as pidfs always provides export operations. + */ +TEST_F(file_handle, valid_name_to_handle_at_flags) +{ + int mnt_id, pidfd; + struct file_handle *fh; + struct stat st1, st2; + + fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ); + ASSERT_NE(fh, NULL); + memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ); + fh->handle_bytes = MAX_HANDLE_SZ; + + ASSERT_EQ(name_to_handle_at(self->child_pidfd2, "", fh, &mnt_id, AT_EMPTY_PATH | AT_HANDLE_FID), 0); + + ASSERT_EQ(fstat(self->child_pidfd2, &st1), 0); + + pidfd = open_by_handle_at(self->pidfd, fh, 0); + ASSERT_GE(pidfd, 0); + + ASSERT_EQ(fstat(pidfd, &st2), 0); + ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); + + ASSERT_EQ(close(pidfd), 0); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/pidfd/pidfd_setns_test.c b/tools/testing/selftests/pidfd/pidfd_setns_test.c index 7c2a4349170a..222f8131283b 100644 --- a/tools/testing/selftests/pidfd/pidfd_setns_test.c +++ b/tools/testing/selftests/pidfd/pidfd_setns_test.c @@ -19,7 +19,6 @@ #include #include "pidfd.h" -#include "../clone3/clone3_selftests.h" #include "../kselftest_harness.h" #ifndef PIDFS_IOCTL_MAGIC @@ -118,22 +117,6 @@ FIXTURE(current_nsset) int child_pidfd_derived_nsfds2[PIDFD_NS_MAX]; }; -static int sys_waitid(int which, pid_t pid, int options) -{ - return syscall(__NR_waitid, which, pid, NULL, options, NULL); -} - -pid_t create_child(int *pidfd, unsigned flags) -{ - struct __clone_args args = { - .flags = CLONE_PIDFD | flags, - .exit_signal = SIGCHLD, - .pidfd = ptr_to_u64(pidfd), - }; - - return sys_clone3(&args, sizeof(struct clone_args)); -} - static bool switch_timens(void) { int fd, ret; @@ -150,28 +133,6 @@ static bool switch_timens(void) return ret == 0; } -static ssize_t read_nointr(int fd, void *buf, size_t count) -{ - ssize_t ret; - - do { - ret = read(fd, buf, count); - } while (ret < 0 && errno == EINTR); - - return ret; -} - -static ssize_t write_nointr(int fd, const void *buf, size_t count) -{ - ssize_t ret; - - do { - ret = write(fd, buf, count); - } while (ret < 0 && errno == EINTR); - - return ret; -} - FIXTURE_SETUP(current_nsset) { int i, proc_fd, ret; @@ -229,7 +190,7 @@ FIXTURE_SETUP(current_nsset) _exit(EXIT_SUCCESS); } - ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, WEXITED | WNOWAIT), 0); + ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, NULL, WEXITED | WNOWAIT), 0); self->pidfd = sys_pidfd_open(self->pid, 0); EXPECT_GE(self->pidfd, 0) { @@ -432,9 +393,9 @@ FIXTURE_TEARDOWN(current_nsset) EXPECT_EQ(0, close(self->child_pidfd1)); if (self->child_pidfd2 >= 0) EXPECT_EQ(0, close(self->child_pidfd2)); - ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, WEXITED), 0); - ASSERT_EQ(sys_waitid(P_PID, self->child_pid1, WEXITED), 0); - ASSERT_EQ(sys_waitid(P_PID, self->child_pid2, WEXITED), 0); + ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, NULL, WEXITED), 0); + ASSERT_EQ(sys_waitid(P_PID, self->child_pid1, NULL, WEXITED), 0); + ASSERT_EQ(sys_waitid(P_PID, self->child_pid2, NULL, WEXITED), 0); } static int preserve_ns(const int pid, const char *ns) diff --git a/tools/testing/selftests/pidfd/pidfd_wait.c b/tools/testing/selftests/pidfd/pidfd_wait.c index 0dcb8365ddc3..1e2d49751cde 100644 --- a/tools/testing/selftests/pidfd/pidfd_wait.c +++ b/tools/testing/selftests/pidfd/pidfd_wait.c @@ -26,22 +26,11 @@ #define SKIP(s, ...) XFAIL(s, ##__VA_ARGS__) #endif -static pid_t sys_clone3(struct clone_args *args) -{ - return syscall(__NR_clone3, args, sizeof(struct clone_args)); -} - -static int sys_waitid(int which, pid_t pid, siginfo_t *info, int options, - struct rusage *ru) -{ - return syscall(__NR_waitid, which, pid, info, options, ru); -} - TEST(wait_simple) { int pidfd = -1; pid_t parent_tid = -1; - struct clone_args args = { + struct __clone_args args = { .parent_tid = ptr_to_u64(&parent_tid), .pidfd = ptr_to_u64(&pidfd), .flags = CLONE_PIDFD | CLONE_PARENT_SETTID, @@ -55,7 +44,7 @@ TEST(wait_simple) pidfd = open("/proc/self", O_DIRECTORY | O_RDONLY | O_CLOEXEC); ASSERT_GE(pidfd, 0); - pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL); + pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED); ASSERT_NE(pid, 0); EXPECT_EQ(close(pidfd), 0); pidfd = -1; @@ -63,18 +52,18 @@ TEST(wait_simple) pidfd = open("/dev/null", O_RDONLY | O_CLOEXEC); ASSERT_GE(pidfd, 0); - pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL); + pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED); ASSERT_NE(pid, 0); EXPECT_EQ(close(pidfd), 0); pidfd = -1; - pid = sys_clone3(&args); + pid = sys_clone3(&args, sizeof(args)); ASSERT_GE(pid, 0); if (pid == 0) exit(EXIT_SUCCESS); - pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL); + pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED); ASSERT_GE(pid, 0); ASSERT_EQ(WIFEXITED(info.si_status), true); ASSERT_EQ(WEXITSTATUS(info.si_status), 0); @@ -89,7 +78,7 @@ TEST(wait_states) { int pidfd = -1; pid_t parent_tid = -1; - struct clone_args args = { + struct __clone_args args = { .parent_tid = ptr_to_u64(&parent_tid), .pidfd = ptr_to_u64(&pidfd), .flags = CLONE_PIDFD | CLONE_PARENT_SETTID, @@ -102,7 +91,7 @@ TEST(wait_states) }; ASSERT_EQ(pipe(pfd), 0); - pid = sys_clone3(&args); + pid = sys_clone3(&args, sizeof(args)); ASSERT_GE(pid, 0); if (pid == 0) { @@ -117,28 +106,28 @@ TEST(wait_states) } close(pfd[0]); - ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WSTOPPED, NULL), 0); + ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WSTOPPED), 0); ASSERT_EQ(info.si_signo, SIGCHLD); ASSERT_EQ(info.si_code, CLD_STOPPED); ASSERT_EQ(info.si_pid, parent_tid); ASSERT_EQ(sys_pidfd_send_signal(pidfd, SIGCONT, NULL, 0), 0); - ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WCONTINUED, NULL), 0); + ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WCONTINUED), 0); ASSERT_EQ(write(pfd[1], "C", 1), 1); close(pfd[1]); ASSERT_EQ(info.si_signo, SIGCHLD); ASSERT_EQ(info.si_code, CLD_CONTINUED); ASSERT_EQ(info.si_pid, parent_tid); - ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WUNTRACED, NULL), 0); + ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WUNTRACED), 0); ASSERT_EQ(info.si_signo, SIGCHLD); ASSERT_EQ(info.si_code, CLD_STOPPED); ASSERT_EQ(info.si_pid, parent_tid); ASSERT_EQ(sys_pidfd_send_signal(pidfd, SIGKILL, NULL, 0), 0); - ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL), 0); + ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WEXITED), 0); ASSERT_EQ(info.si_signo, SIGCHLD); ASSERT_EQ(info.si_code, CLD_KILLED); ASSERT_EQ(info.si_pid, parent_tid); @@ -151,7 +140,7 @@ TEST(wait_nonblock) int pidfd; unsigned int flags = 0; pid_t parent_tid = -1; - struct clone_args args = { + struct __clone_args args = { .parent_tid = ptr_to_u64(&parent_tid), .flags = CLONE_PARENT_SETTID, .exit_signal = SIGCHLD, @@ -173,12 +162,12 @@ TEST(wait_nonblock) SKIP(return, "Skipping PIDFD_NONBLOCK test"); } - ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL); + ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED); ASSERT_LT(ret, 0); ASSERT_EQ(errno, ECHILD); EXPECT_EQ(close(pidfd), 0); - pid = sys_clone3(&args); + pid = sys_clone3(&args, sizeof(args)); ASSERT_GE(pid, 0); if (pid == 0) { @@ -201,7 +190,7 @@ TEST(wait_nonblock) * Callers need to see EAGAIN/EWOULDBLOCK with non-blocking pidfd when * child processes exist but none have exited. */ - ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL); + ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED); ASSERT_LT(ret, 0); ASSERT_EQ(errno, EAGAIN); @@ -210,19 +199,19 @@ TEST(wait_nonblock) * WNOHANG raised explicitly when child processes exist but none have * exited. */ - ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED | WNOHANG, NULL); + ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED | WNOHANG); ASSERT_EQ(ret, 0); ASSERT_EQ(fcntl(pidfd, F_SETFL, (flags & ~O_NONBLOCK)), 0); - ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WSTOPPED, NULL), 0); + ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WSTOPPED), 0); ASSERT_EQ(info.si_signo, SIGCHLD); ASSERT_EQ(info.si_code, CLD_STOPPED); ASSERT_EQ(info.si_pid, parent_tid); ASSERT_EQ(sys_pidfd_send_signal(pidfd, SIGCONT, NULL, 0), 0); - ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL), 0); + ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WEXITED), 0); ASSERT_EQ(info.si_signo, SIGCHLD); ASSERT_EQ(info.si_code, CLD_EXITED); ASSERT_EQ(info.si_pid, parent_tid); -- cgit v1.2.3 From 212fbabe1dfecdda35bf5aaa900f745a3bab5ac4 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 16 Dec 2024 19:28:24 +0000 Subject: KVM: arm64: Fix set_id_regs selftest for ASIDBITS becoming unwritable In commit 03c7527e97f7 ("KVM: arm64: Do not allow ID_AA64MMFR0_EL1.ASIDbits to be overridden") we made that bitfield in the ID registers unwritable however the change neglected to make the corresponding update to set_id_regs resulting in it failing: ok 56 ID_AA64MMFR0_EL1_BIGEND ==== Test Assertion Failure ==== aarch64/set_id_regs.c:434: masks[idx] & ftr_bits[j].mask == ftr_bits[j].mask pid=5566 tid=5566 errno=22 - Invalid argument 1 0x00000000004034a7: test_vm_ftr_id_regs at set_id_regs.c:434 2 0x0000000000401b53: main at set_id_regs.c:684 3 0x0000ffff8e6b7543: ?? ??:0 4 0x0000ffff8e6b7617: ?? ??:0 5 0x0000000000401e6f: _start at ??:? not ok 8 selftests: kvm: set_id_regs # exit=254 Remove ID_AA64MMFR1_EL1.ASIDBITS from the set of bitfields we test for writeability. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20241216-kvm-arm64-fix-set-id-asidbits-v1-1-8b105b888fc3@kernel.org Acked-by: Marc Zyngier Signed-off-by: Oliver Upton --- tools/testing/selftests/kvm/aarch64/set_id_regs.c | 1 - 1 file changed, 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/aarch64/set_id_regs.c b/tools/testing/selftests/kvm/aarch64/set_id_regs.c index a79b7f18452d..3a97c160b5fe 100644 --- a/tools/testing/selftests/kvm/aarch64/set_id_regs.c +++ b/tools/testing/selftests/kvm/aarch64/set_id_regs.c @@ -152,7 +152,6 @@ static const struct reg_ftr_bits ftr_id_aa64mmfr0_el1[] = { REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, BIGENDEL0, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, SNSMEM, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, BIGEND, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, ASIDBITS, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, PARANGE, 0), REG_FTR_END, }; -- cgit v1.2.3 From 498d5b14db8c9118be139f668720c67bea2dc344 Mon Sep 17 00:00:00 2001 From: Charlie Jenkins Date: Wed, 11 Dec 2024 23:01:43 -0800 Subject: riscv: selftests: Fix warnings pointer masking test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When compiling the pointer masking tests with -Wall this warning is present: pointer_masking.c: In function ‘test_tagged_addr_abi_sysctl’: pointer_masking.c:203:9: warning: ignoring return value of ‘pwrite’ declared with attribute ‘warn_unused_result’ [-Wunused-result] 203 | pwrite(fd, &value, 1, 0); | ^~~~~~~~~~~~~~~~~~~~~~~~ pointer_masking.c:208:9: warning: ignoring return value of ‘pwrite’ declared with attribute ‘warn_unused_result’ [-Wunused-result] 208 | pwrite(fd, &value, 1, 0); I came across this on riscv64-linux-gnu-gcc (Ubuntu 11.4.0-1ubuntu1~22.04). Fix this by checking that the number of bytes written equal the expected number of bytes written. Fixes: 7470b5afd150 ("riscv: selftests: Add a pointer masking test") Signed-off-by: Charlie Jenkins Reviewed-by: Andrew Jones Link: https://lore.kernel.org/r/20241211-fix_warnings_pointer_masking_tests-v6-1-c7ae708fbd2f@rivosinc.com Signed-off-by: Palmer Dabbelt --- .../testing/selftests/riscv/abi/pointer_masking.c | 28 +++++++++++++++++----- 1 file changed, 22 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/riscv/abi/pointer_masking.c b/tools/testing/selftests/riscv/abi/pointer_masking.c index dee41b7ee3e3..059d2e87eb1f 100644 --- a/tools/testing/selftests/riscv/abi/pointer_masking.c +++ b/tools/testing/selftests/riscv/abi/pointer_masking.c @@ -185,8 +185,20 @@ static void test_fork_exec(void) } } +static bool pwrite_wrapper(int fd, void *buf, size_t count, const char *msg) +{ + int ret = pwrite(fd, buf, count, 0); + + if (ret != count) { + ksft_perror(msg); + return false; + } + return true; +} + static void test_tagged_addr_abi_sysctl(void) { + char *err_pwrite_msg = "failed to write to /proc/sys/abi/tagged_addr_disabled\n"; char value; int fd; @@ -200,14 +212,18 @@ static void test_tagged_addr_abi_sysctl(void) } value = '1'; - pwrite(fd, &value, 1, 0); - ksft_test_result(set_tagged_addr_ctrl(min_pmlen, true) == -EINVAL, - "sysctl disabled\n"); + if (!pwrite_wrapper(fd, &value, 1, "write '1'")) + ksft_test_result_fail(err_pwrite_msg); + else + ksft_test_result(set_tagged_addr_ctrl(min_pmlen, true) == -EINVAL, + "sysctl disabled\n"); value = '0'; - pwrite(fd, &value, 1, 0); - ksft_test_result(set_tagged_addr_ctrl(min_pmlen, true) == 0, - "sysctl enabled\n"); + if (!pwrite_wrapper(fd, &value, 1, "write '0'")) + ksft_test_result_fail(err_pwrite_msg); + else + ksft_test_result(set_tagged_addr_ctrl(min_pmlen, true) == 0, + "sysctl enabled\n"); set_tagged_addr_ctrl(0, false); -- cgit v1.2.3 From a7c205120d339b6ad2557fe3f33fdf20394f1a0f Mon Sep 17 00:00:00 2001 From: Mykyta Yatsenko Date: Tue, 17 Dec 2024 18:11:13 +0000 Subject: veristat: Fix top source line stat collection Fix comparator implementation to return most popular source code lines instead of least. Introduce min/max macro for building veristat outside of Linux repository. Signed-off-by: Mykyta Yatsenko Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20241217181113.364651-1-mykyta.yatsenko5@gmail.com --- tools/testing/selftests/bpf/veristat.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c index 162fe27d06f8..9d17b4dfc170 100644 --- a/tools/testing/selftests/bpf/veristat.c +++ b/tools/testing/selftests/bpf/veristat.c @@ -26,6 +26,14 @@ #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) #endif +#ifndef max +#define max(a, b) ((a) > (b) ? (a) : (b)) +#endif + +#ifndef min +#define min(a, b) ((a) < (b) ? (a) : (b)) +#endif + enum stat_id { VERDICT, DURATION, @@ -904,7 +912,7 @@ static int line_cnt_cmp(const void *a, const void *b) const struct line_cnt *b_cnt = (const struct line_cnt *)b; if (a_cnt->cnt != b_cnt->cnt) - return a_cnt->cnt < b_cnt->cnt ? -1 : 1; + return a_cnt->cnt > b_cnt->cnt ? -1 : 1; return strcmp(a_cnt->line, b_cnt->line); } -- cgit v1.2.3 From 026ac4dda8f666f737b375731e30ef8f5698b215 Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Mon, 16 Dec 2024 21:32:55 +0530 Subject: selftest/powerpc/ptrace/core-pkey: Remove duplicate macros ./powerpc/ptrace/Makefile includes flags.mk. In flags.mk, -I$(selfdir)/powerpc/include is always included as part of CFLAGS. So it will pick up the "pkeys.h" defined in powerpc/include. core-pkey.c test has couple of macros defined which are part of "pkeys.h" header file. Remove those duplicates and include "pkeys.h" Reviewed-by: Ritesh Harjani (IBM) Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20241216160257.87252-1-maddy@linux.ibm.com --- tools/testing/selftests/powerpc/ptrace/core-pkey.c | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/powerpc/ptrace/core-pkey.c b/tools/testing/selftests/powerpc/ptrace/core-pkey.c index f6da4cb30cd6..31c9bf6d95db 100644 --- a/tools/testing/selftests/powerpc/ptrace/core-pkey.c +++ b/tools/testing/selftests/powerpc/ptrace/core-pkey.c @@ -16,14 +16,7 @@ #include #include "ptrace.h" #include "child.h" - -#ifndef __NR_pkey_alloc -#define __NR_pkey_alloc 384 -#endif - -#ifndef __NR_pkey_free -#define __NR_pkey_free 385 -#endif +#include "pkeys.h" #ifndef NT_PPC_PKEY #define NT_PPC_PKEY 0x110 @@ -61,16 +54,6 @@ struct shared_info { time_t core_time; }; -static int sys_pkey_alloc(unsigned long flags, unsigned long init_access_rights) -{ - return syscall(__NR_pkey_alloc, flags, init_access_rights); -} - -static int sys_pkey_free(int pkey) -{ - return syscall(__NR_pkey_free, pkey); -} - static int increase_core_file_limit(void) { struct rlimit rlim; -- cgit v1.2.3 From b0e1b95b1597ad3d87ff91d52f6b67cc9423c31e Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Mon, 16 Dec 2024 21:32:56 +0530 Subject: selftest/powerpc/ptrace/ptrace-pkey: Remove duplicate macros ./powerpc/ptrace/Makefile includes flags.mk. In flags.mk, -I$(selfdir)/powerpc/include is always included as part of CFLAGS. So it will pick up the "pkeys.h" defined in powerpc/include. ptrace-pkey.c test has macros defined which are part of "pkeys.h" header file. Remove those duplicates and include "pkeys.h" Reviewed-by: Ritesh Harjani (IBM) Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20241216160257.87252-2-maddy@linux.ibm.com --- tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c index d89474377f11..6893ed096457 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c @@ -7,14 +7,7 @@ */ #include "ptrace.h" #include "child.h" - -#ifndef __NR_pkey_alloc -#define __NR_pkey_alloc 384 -#endif - -#ifndef __NR_pkey_free -#define __NR_pkey_free 385 -#endif +#include "pkeys.h" #ifndef NT_PPC_PKEY #define NT_PPC_PKEY 0x110 @@ -61,11 +54,6 @@ struct shared_info { unsigned long invalid_uamor; }; -static int sys_pkey_alloc(unsigned long flags, unsigned long init_access_rights) -{ - return syscall(__NR_pkey_alloc, flags, init_access_rights); -} - static int child(struct shared_info *info) { unsigned long reg; -- cgit v1.2.3 From 65f5038352e8f635fb827f7482f1d08fae4d16bf Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Mon, 16 Dec 2024 21:32:57 +0530 Subject: selftest/powerpc/ptrace: Cleanup duplicate macro definitions Both core-pkey.c and ptrace-pkey.c tests have similar macro definitions, move them to "pkeys.h" and remove the macro definitions from the C file. Reviewed-by: Ritesh Harjani (IBM) Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20241216160257.87252-3-maddy@linux.ibm.com --- tools/testing/selftests/powerpc/include/pkeys.h | 8 ++++++++ tools/testing/selftests/powerpc/ptrace/core-pkey.c | 12 ------------ tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c | 12 ------------ 3 files changed, 8 insertions(+), 24 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/powerpc/include/pkeys.h b/tools/testing/selftests/powerpc/include/pkeys.h index 51729d9a7111..3a0129467de6 100644 --- a/tools/testing/selftests/powerpc/include/pkeys.h +++ b/tools/testing/selftests/powerpc/include/pkeys.h @@ -35,10 +35,18 @@ #define __NR_pkey_alloc 384 #define __NR_pkey_free 385 +#ifndef NT_PPC_PKEY +#define NT_PPC_PKEY 0x110 +#endif + #define PKEY_BITS_PER_PKEY 2 #define NR_PKEYS 32 #define PKEY_BITS_MASK ((1UL << PKEY_BITS_PER_PKEY) - 1) +#define AMR_BITS_PER_PKEY 2 +#define PKEY_REG_BITS (sizeof(u64) * 8) +#define pkeyshift(pkey) (PKEY_REG_BITS - ((pkey + 1) * AMR_BITS_PER_PKEY)) + inline unsigned long pkeyreg_get(void) { return mfspr(SPRN_AMR); diff --git a/tools/testing/selftests/powerpc/ptrace/core-pkey.c b/tools/testing/selftests/powerpc/ptrace/core-pkey.c index 31c9bf6d95db..f061434af452 100644 --- a/tools/testing/selftests/powerpc/ptrace/core-pkey.c +++ b/tools/testing/selftests/powerpc/ptrace/core-pkey.c @@ -18,18 +18,6 @@ #include "child.h" #include "pkeys.h" -#ifndef NT_PPC_PKEY -#define NT_PPC_PKEY 0x110 -#endif - -#ifndef PKEY_DISABLE_EXECUTE -#define PKEY_DISABLE_EXECUTE 0x4 -#endif - -#define AMR_BITS_PER_PKEY 2 -#define PKEY_REG_BITS (sizeof(u64) * 8) -#define pkeyshift(pkey) (PKEY_REG_BITS - ((pkey + 1) * AMR_BITS_PER_PKEY)) - #define CORE_FILE_LIMIT (5 * 1024 * 1024) /* 5 MB should be enough */ static const char core_pattern_file[] = "/proc/sys/kernel/core_pattern"; diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c index 6893ed096457..fc633014424f 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c @@ -9,18 +9,6 @@ #include "child.h" #include "pkeys.h" -#ifndef NT_PPC_PKEY -#define NT_PPC_PKEY 0x110 -#endif - -#ifndef PKEY_DISABLE_EXECUTE -#define PKEY_DISABLE_EXECUTE 0x4 -#endif - -#define AMR_BITS_PER_PKEY 2 -#define PKEY_REG_BITS (sizeof(u64) * 8) -#define pkeyshift(pkey) (PKEY_REG_BITS - ((pkey + 1) * AMR_BITS_PER_PKEY)) - static const char user_read[] = "[User Read (Running)]"; static const char user_write[] = "[User Write (Running)]"; static const char ptrace_read_running[] = "[Ptrace Read (Running)]"; -- cgit v1.2.3 From 88395c071f08d9ea2314045230206cc5a3f82ef0 Mon Sep 17 00:00:00 2001 From: Soham Chakradeo Date: Tue, 17 Dec 2024 18:51:58 +0000 Subject: selftests/net: packetdrill: import tcp/ecn, tcp/close, tcp/sack, tcp/tcp_info Same as initial tests, import verbatim from github.com/google/packetdrill, aside from: - update `source ./defaults.sh` path to adjust for flat dir - add SPDX headers - remove author statements if any - drop blank lines at EOF Same test process as previous tests. Both with and without debug mode. Recording the steps once: make mrproper vng --build \ --config tools/testing/selftests/net/packetdrill/config \ --config kernel/configs/debug.config vng -v --run . --user root --cpus 4 -- \ make -C tools/testing/selftests TARGETS=net/packetdrill run_tests Signed-off-by: Willem de Bruijn Signed-off-by: Soham Chakradeo Link: https://patch.msgid.link/20241217185203.297935-2-sohamch.kernel@gmail.com Signed-off-by: Jakub Kicinski --- ...tcp_close_close-local-close-then-remote-fin.pkt | 23 ++++++++ .../packetdrill/tcp_close_close-on-syn-sent.pkt | 21 +++++++ .../tcp_close_close-remote-fin-then-close.pkt | 36 ++++++++++++ .../net/packetdrill/tcp_ecn_ecn-uses-ect0.pkt | 21 +++++++ .../tcp_sack_sack-route-refresh-ip-tos.pkt | 37 ++++++++++++ ...tcp_sack_sack-shift-sacked-2-6-8-3-9-nofack.pkt | 64 +++++++++++++++++++++ .../tcp_sack_sack-shift-sacked-7-3-4-8-9-fack.pkt | 66 ++++++++++++++++++++++ .../tcp_sack_sack-shift-sacked-7-5-6-8-9-fack.pkt | 62 ++++++++++++++++++++ .../tcp_tcp_info_tcp-info-last_data_recv.pkt | 20 +++++++ .../tcp_tcp_info_tcp-info-rwnd-limited.pkt | 54 ++++++++++++++++++ .../tcp_tcp_info_tcp-info-sndbuf-limited.pkt | 38 +++++++++++++ 11 files changed, 442 insertions(+) create mode 100644 tools/testing/selftests/net/packetdrill/tcp_close_close-local-close-then-remote-fin.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_close_close-on-syn-sent.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_close_close-remote-fin-then-close.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_ecn_ecn-uses-ect0.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_sack_sack-route-refresh-ip-tos.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-2-6-8-3-9-nofack.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-3-4-8-9-fack.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-5-6-8-9-fack.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-last_data_recv.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-rwnd-limited.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-sndbuf-limited.pkt (limited to 'tools') diff --git a/tools/testing/selftests/net/packetdrill/tcp_close_close-local-close-then-remote-fin.pkt b/tools/testing/selftests/net/packetdrill/tcp_close_close-local-close-then-remote-fin.pkt new file mode 100644 index 000000000000..8514d6bdbb6d --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_close_close-local-close-then-remote-fin.pkt @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test basic connection teardown where local process closes first: +// the local process calls close() first, so we send a FIN, and receive an ACK. +// Then we receive a FIN and ACK it. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +.01...0.011 connect(3, ..., ...) = 0 + +0 > S 0:0(0) <...> + +0 < S. 0:0(0) ack 1 win 32768 + +0 > . 1:1(0) ack 1 + + +0 write(3, ..., 1000) = 1000 + +0 > P. 1:1001(1000) ack 1 + +0 < . 1:1(0) ack 1001 win 257 + + +0 close(3) = 0 + +0 > F. 1001:1001(0) ack 1 + +0 < . 1:1(0) ack 1002 win 257 + + +0 < F. 1:1(0) ack 1002 win 257 + +0 > . 1002:1002(0) ack 2 diff --git a/tools/testing/selftests/net/packetdrill/tcp_close_close-on-syn-sent.pkt b/tools/testing/selftests/net/packetdrill/tcp_close_close-on-syn-sent.pkt new file mode 100644 index 000000000000..04103134bd99 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_close_close-on-syn-sent.pkt @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test to make sure no RST is being sent when close() +// is called on a socket with SYN_SENT state. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + + +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) + +0 > S 0:0(0) <...> + +// Application decideds to close the socket in SYN_SENT state +// Make sure no RST is sent after close(). + +0 close(3) = 0 + +// Receive syn-ack to trigger the send side packet examination: +// If a RESET were sent right after close(), it would have failed with +// a mismatched timestamp. + +.1 < S. 0:0(0) ack 1 win 32000 + +0 > R 1:1(0) diff --git a/tools/testing/selftests/net/packetdrill/tcp_close_close-remote-fin-then-close.pkt b/tools/testing/selftests/net/packetdrill/tcp_close_close-remote-fin-then-close.pkt new file mode 100644 index 000000000000..5f3a2914213a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_close_close-remote-fin-then-close.pkt @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0 +// Verify behavior for the sequence: remote side sends FIN, then we close(). +// Since the remote side (client) closes first, we test our LAST_ACK code path. + +`./defaults.sh` + +// Initialize a server socket. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 + +0 > S. 0:0(0) ack 1 + +0 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + +// Client closes first. + +.01 < F. 1:1(0) ack 1 win 257 + +0 > . 1:1(0) ack 2 + +// App notices that client closed. + +0 read(4, ..., 1000) = 0 + +// Then we close. + +.01 close(4) = 0 + +0 > F. 1:1(0) ack 2 + +// Client ACKs our FIN. + +.01 < . 2:2(0) ack 2 win 257 + +// Verify that we send RST in response to any incoming segments +// (because the kernel no longer has any record of this socket). + +.01 < . 2:2(0) ack 2 win 257 + +0 > R 2:2(0) diff --git a/tools/testing/selftests/net/packetdrill/tcp_ecn_ecn-uses-ect0.pkt b/tools/testing/selftests/net/packetdrill/tcp_ecn_ecn-uses-ect0.pkt new file mode 100644 index 000000000000..643baf3267cf --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_ecn_ecn-uses-ect0.pkt @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test ECN: verify that Linux TCP ECN sending code uses ECT0 (not ECT1). +// +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=1 # fully enabled +` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 + +// ECN handshake: send EW flags in SYN packet, E flag in SYN-ACK response ++.002 ... 0.004 connect(4, ..., ...) = 0 + + +0 > SEW 0:0(0) ++.002 < SE. 0:0(0) ack 1 win 32767 + +0 > . 1:1(0) ack 1 + +// Write 1 MSS. ++.002 write(4, ..., 1000) = 1000 +// Send 1 MSS with ect0. + +0 > [ect0] P. 1:1001(1000) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_sack_sack-route-refresh-ip-tos.pkt b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-route-refresh-ip-tos.pkt new file mode 100644 index 000000000000..310ef31518da --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-route-refresh-ip-tos.pkt @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +// Verify that setsockopt calls that force a route refresh do not +// cause problems matching SACKs with packets in the write queue. +// This variant tests IP_TOS. + +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_IP, IP_MTU_DISCOVER, [IP_PMTUDISC_DONT], 1) = 0 + +0...0.010 connect(3, ..., ...) = 0 + + +0 > S 0:0(0) + +.01 < S. 0:0(0) ack 1 win 65535 + +0 > . 1:1(0) ack 1 + + +.01 write(3, ..., 5840) = 5840 + +0 > P. 1:5841(5840) ack 1 + +.01 < . 1:1(0) ack 5841 win 65535 + + +.01 write(3, ..., 5840) = 5840 + +0 > P. 5841:11681(5840) ack 1 + +.01 < . 1:1(0) ack 11681 win 65535 + + +.01 write(3, ..., 14600) = 14600 + +0 > P. 11681:26281(14600) ack 1 + +// Try the socket option that we know can force a route refresh. + +0 setsockopt(3, SOL_IP, IP_TOS, [4], 1) = 0 +// Then revert to avoid routing/mangling/etc implications of that setting. + +0 setsockopt(3, SOL_IP, IP_TOS, [0], 1) = 0 + +// Verify that we do not retransmit the SACKed segments. + +.01 < . 1:1(0) ack 13141 win 65535 + +0 > . 13141:16061(2920) ack 1 + +0 > P. 17521:20441(2920) ack 1 + +.01 < . 1:1(0) ack 26281 win 65535 diff --git a/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-2-6-8-3-9-nofack.pkt b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-2-6-8-3-9-nofack.pkt new file mode 100644 index 000000000000..f185e1ac57ea --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-2-6-8-3-9-nofack.pkt @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test shifting of newly-SACKed ranges onto the previous already-SACKed skb. +// This variant tests non-FACK SACK with SACKs coming in the order +// 2 6 8 3 9, to test what happens when we get a new SACKed range +// (for packet 3) that is on the right of an existing SACKed range +// (for packet 2). + +`./defaults.sh` + +// Establish a connection and send 10 MSS. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 + +0 > S. 0:0(0) ack 1 + +.1 < . 1:1(0) ack 1 win 1024 + +0 accept(3, ..., ...) = 4 + + +0 write(4, ..., 10000) = 10000 + +0 > P. 1:10001(10000) ack 1 + + +.1 < . 1:1(0) ack 1 win 257 ++.001 < . 1:1(0) ack 1 win 257 ++.001 < . 1:1(0) ack 1 win 257 + +// 3 SACKed packets, so we enter Fast Recovery. + +0 > . 1:1001(1000) ack 1 + +0 %{ assert tcpi_ca_state == TCP_CA_Recovery, tcpi_ca_state }% + +0 %{ assert tcpi_lost == 6, tcpi_lost }% + +// SACK for 3001:4001. +// This SACK for an adjacent range causes the sender to +// shift the newly-SACKed range onto the previous skb. ++.007 < . 1:1(0) ack 1 win 257 + +0 > . 1001:2001(1000) ack 1 + +0 %{ assert tcpi_lost == 5, tcpi_lost }% + +0 %{ assert tcpi_reordering == 6, tcpi_reordering }% // 8001:9001 -> 3001:4001 is 6 + +// SACK for 9001:10001. + +.01 < . 1:1(0) ack 1 win 257 + +0 %{ assert tcpi_lost == 5, tcpi_lost }% + +// ACK for 1:1001 as packets from t=0.303 arrive. ++.083 < . 1:1(0) ack 1001 win 257 + +0 %{ assert tcpi_lost == 4,tcpi_lost }% + +// ACK for 1:4001 as packets from t=0.310 arrive. ++.017 < . 1:1(0) ack 4001 win 257 + +0 %{ assert tcpi_lost == 3,tcpi_lost }% + +// ACK for 1:7001 as packets from t=0.320 arrive. + +.01 < . 1:1(0) ack 7001 win 257 + +// ACK for all data as packets from t=0.403 arrive. + +.1 < . 1:1(0) ack 10001 win 257 + +0 %{ +assert tcpi_ca_state == TCP_CA_Open, tcpi_ca_state +assert tcpi_unacked == 0, tcpi_unacked +assert tcpi_sacked == 0, tcpi_sacked +assert tcpi_lost == 0, tcpi_lost +assert tcpi_retrans == 0, tcpi_retrans +}% diff --git a/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-3-4-8-9-fack.pkt b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-3-4-8-9-fack.pkt new file mode 100644 index 000000000000..0093b4973934 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-3-4-8-9-fack.pkt @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test shifting of newly-SACKed ranges onto the previous already-SACKed skb. +// This variant tests the case where we mark packets 0-4 lost, then +// get a SACK for 3, and then a SACK for 4. + +`./defaults.sh` + +// Establish a connection and send 10 MSS. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 + +0 > S. 0:0(0) ack 1 + +.1 < . 1:1(0) ack 1 win 1024 + +0 accept(3, ..., ...) = 4 + + +0 write(4, ..., 10000) = 10000 + +0 > P. 1:10001(10000) ack 1 + +// SACK for 7001:8001. Using RACK we delay the fast retransmit. + +.1 < . 1:1(0) ack 1 win 257 +// RACK reordering timer ++.027 > . 1:1001(1000) ack 1 + +0 %{ +assert tcpi_ca_state == TCP_CA_Recovery, tcpi_ca_state +assert tcpi_lost == 7, tcpi_lost # RACK thinks 1:7001 are lost +assert tcpi_reordering == 3, tcpi_reordering +}% + +// SACK for 3001:4001. ++.002 < . 1:1(0) ack 1 win 257 + +0 > . 1001:2001(1000) ack 1 + +0 %{ +assert tcpi_lost == 6, tcpi_lost # since 3001:4001 is no longer lost +assert tcpi_reordering == 5, tcpi_reordering # 7001:8001 -> 3001:4001 +}% + +// SACK for 4001:5001. +// This SACK for an adjacent range causes the sender to +// shift the newly-SACKed range onto the previous skb. +// It uses the RFC3517 algorithm to mark 1:3001 lost +// because >=3 higher-sequence packets are SACKed. ++.002 < . 1:1(0) ack 1 win 257 + +0 > . 2001:3001(1000) ack 1 + +0 %{ +assert tcpi_lost == 5,tcpi_lost # SACK/RFC3517 thinks 1:3001 are lost +}% + +// SACK for 8001:9001. ++.002 < . 1:1(0) ack 1 win 257 + +// SACK for 9001:10001. ++.002 < . 1:1(0) ack 1 win 257 + +0 > . 5001:6001(1000) ack 1 + +// To simplify clean-up, say we get an ACK for all data. + +.1 < . 1:1(0) ack 10001 win 257 + +0 %{ +assert tcpi_ca_state == TCP_CA_Open, tcpi_ca_state +assert tcpi_unacked == 0, tcpi_unacked +assert tcpi_sacked == 0, tcpi_sacked +assert tcpi_lost == 0, tcpi_lost +assert tcpi_retrans == 0, tcpi_retrans +}% diff --git a/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-5-6-8-9-fack.pkt b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-5-6-8-9-fack.pkt new file mode 100644 index 000000000000..980a832dc81c --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-5-6-8-9-fack.pkt @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test shifting of newly-SACKed ranges onto the previous already-SACKed skb. +// This variant tests the case where we mark packets 0-4 lost, then +// get a SACK for 5, and then a SACK for 6. + +`./defaults.sh` + +// Establish a connection and send 10 MSS. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 + +0 > S. 0:0(0) ack 1 + +.1 < . 1:1(0) ack 1 win 1024 + +0 accept(3, ..., ...) = 4 + + +0 write(4, ..., 10000) = 10000 + +0 > P. 1:10001(10000) ack 1 + +// SACK for 7001:8001. Using RACK we delay a fast retransmit. + +.1 < . 1:1(0) ack 1 win 257 ++.027 > . 1:1001(1000) ack 1 + +0 %{ +assert tcpi_ca_state == TCP_CA_Recovery, tcpi_ca_state +assert tcpi_lost == 7,tcpi_lost # RACK thinks 1:7001 are lost +assert tcpi_reordering == 3, tcpi_reordering +}% + +// SACK for 5001:6001. + +0 < . 1:1(0) ack 1 win 257 + +0 > . 1001:2001(1000) ack 1 + +0 %{ +assert tcpi_lost == 6, tcpi_lost +assert tcpi_reordering == 3, tcpi_reordering # 7001:8001 -> 5001:6001 is 3 +}% + +// SACK for 6001:7001. +// This SACK for an adjacent range causes the sender to +// shift the newly-SACKed range onto the previous skb. + +0 < . 1:1(0) ack 1 win 257 + +0 > . 2001:3001(1000) ack 1 + +0 %{ assert tcpi_lost == 5, tcpi_lost }% + +// SACK for 8001:9001. + +0 < . 1:1(0) ack 1 win 257 + +0 > . 3001:4001(1000) ack 1 + +// SACK for 9001:10001. + +0 < . 1:1(0) ack 1 win 257 + +0 > . 4001:5001(1000) ack 1 + +// To simplify clean-up, say we get an ACK for all data. + +.1 < . 1:1(0) ack 10001 win 257 + +0 %{ +assert tcpi_ca_state == TCP_CA_Open, tcpi_ca_state +assert tcpi_unacked == 0, tcpi_unacked +assert tcpi_sacked == 0, tcpi_sacked +assert tcpi_lost == 0, tcpi_lost +assert tcpi_retrans == 0, tcpi_retrans +}% diff --git a/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-last_data_recv.pkt b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-last_data_recv.pkt new file mode 100644 index 000000000000..d7fdb43a8e89 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-last_data_recv.pkt @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test tcpi_last_data_recv for active session +`./defaults.sh` + +// Create a socket and set it to non-blocking. ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR) ++0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + ++0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) ++0 > S 0:0(0) ++.030 < S. 0:0(0) ack 1 win 10000 ++0 > . 1:1(0) ack 1 + ++1 %{ assert 990 <= tcpi_last_data_recv <= 1010, tcpi_last_data_recv }% + ++0 < . 1:1001(1000) ack 1 win 300 ++0 > . 1:1(0) ack 1001 + ++0 %{ assert tcpi_last_data_recv <= 10, tcpi_last_data_recv }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-rwnd-limited.pkt b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-rwnd-limited.pkt new file mode 100644 index 000000000000..a9bcd46f6cb6 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-rwnd-limited.pkt @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test rwnd limited time in tcp_info for client side. + +`./defaults.sh` + +// Create a socket and set it to non-blocking. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR) + +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + + +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) + +0 > S 0:0(0) + +// Server advertises 0 receive window. + +.01 < S. 0:0(0) ack 1 win 0 + + +0 > . 1:1(0) ack 1 + +0 getsockopt(3, SOL_SOCKET, SO_ERROR, [0], [4]) = 0 + +0 fcntl(3, F_SETFL, O_RDWR) = 0 // set back to blocking + +// Make sure that initial rwnd limited time is 0. + +0 %{ assert tcpi_rwnd_limited == 0, tcpi_rwnd_limited }% + +// Receive window limited time starts here. + +0 write(3, ..., 1000) = 1000 + +// Check that rwnd limited time in tcp_info is around 0.1s. + +.1 %{ assert 98000 <= tcpi_rwnd_limited <= 110000, tcpi_rwnd_limited }% + +// Server opens the receive window. + +.1 < . 1:1(0) ack 1 win 2000 + +// Check that rwnd limited time in tcp_info is around 0.2s. + +0 %{ assert 198000 <= tcpi_rwnd_limited <= 210000, tcpi_rwnd_limited }% + + +0 > P. 1:1001(1000) ack 1 + +// Server advertises a very small receive window. + +.03 < . 1:1(0) ack 1001 win 10 + +// Receive window limited time starts again. + +0 write(3, ..., 1000) = 1000 + +// Server opens the receive window again. + +.1 < . 1:1(0) ack 1001 win 2000 +// Check that rwnd limited time in tcp_info is around 0.3s +// and busy time is 0.3 + 0.03 (server opened small window temporarily). + +0 %{ assert 298000 <= tcpi_rwnd_limited <= 310000, tcpi_rwnd_limited;\ + assert 328000 <= tcpi_busy_time <= 340000, tcpi_busy_time;\ +}% + + +0 > P. 1001:2001(1000) ack 1 + +.02 < . 1:1(0) ack 2001 win 2000 + +0 %{ assert 348000 <= tcpi_busy_time <= 360000, tcpi_busy_time }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-sndbuf-limited.pkt b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-sndbuf-limited.pkt new file mode 100644 index 000000000000..f0de2acd0f8e --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-sndbuf-limited.pkt @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test send-buffer-limited time in tcp_info for client side. +`./defaults.sh` + +// Create a socket and set it to non-blocking. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR) + +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + + +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) + +0 > S 0:0(0) + +.01 < S. 0:0(0) ack 1 win 10000 + +0 > . 1:1(0) ack 1 + +0 getsockopt(3, SOL_SOCKET, SO_ERROR, [0], [4]) = 0 + +0 fcntl(3, F_SETFL, O_RDWR) = 0 // set back to blocking + +0 setsockopt(3, SOL_SOCKET, SO_SNDBUF, [10000], 4) = 0 + +0 getsockopt(3, SOL_SOCKET, SO_SNDBUF, [20000], [4]) = 0 + + +.09...0.14 write(3, ..., 150000) = 150000 + + +.01 < . 1:1(0) ack 10001 win 10000 + + +.01 < . 1:1(0) ack 30001 win 10000 + +// cwnd goes from 40(60KB) to 80(120KB), and that we hit the tiny sndbuf limit 10KB + +.01 < . 1:1(0) ack 70001 win 10000 + + +.02 < . 1:1(0) ack 95001 win 10000 + +0 %{ assert 19000 <= tcpi_sndbuf_limited <= 21000, tcpi_sndbuf_limited; \ + assert 49000 <= tcpi_busy_time <= 52000, tcpi_busy_time; \ + assert 0 == tcpi_rwnd_limited, tcpi_rwnd_limited }% + +// This ack frees up enough buffer so we are no longer +// buffer limited (socket flag SOCK_NOSPACE is cleared) + +.02 < . 1:1(0) ack 150001 win 10000 + +0 %{ assert 19000 <= tcpi_sndbuf_limited <= 21000, tcpi_sndbuf_limited;\ + assert 69000 <= tcpi_busy_time <= 73000, tcpi_busy_time;\ + assert 0 == tcpi_rwnd_limited, tcpi_rwnd_limited }% -- cgit v1.2.3 From eab35989cc37e168550b7bfa690905ea2d1ae603 Mon Sep 17 00:00:00 2001 From: Soham Chakradeo Date: Tue, 17 Dec 2024 18:51:59 +0000 Subject: selftests/net: packetdrill: import tcp/fast_recovery, tcp/nagle, tcp/timestamping Use the standard import and testing method, as described in the import of tcp/ecn , tcp/close , tcp/sack , tcp/tcp_info. Signed-off-by: Willem de Bruijn Signed-off-by: Soham Chakradeo Link: https://patch.msgid.link/20241217185203.297935-3-sohamch.kernel@gmail.com Signed-off-by: Jakub Kicinski --- .../tcp_fast_recovery_prr-ss-10pkt-lost-1.pkt | 72 ++++++++++ ...p_fast_recovery_prr-ss-30pkt-lost-1_4-11_16.pkt | 50 +++++++ .../tcp_fast_recovery_prr-ss-30pkt-lost1_4.pkt | 43 ++++++ ...ast_recovery_prr-ss-ack-below-snd_una-cubic.pkt | 41 ++++++ .../net/packetdrill/tcp_nagle_https_client.pkt | 40 ++++++ .../net/packetdrill/tcp_nagle_sendmsg_msg_more.pkt | 66 ++++++++++ .../packetdrill/tcp_nagle_sockopt_cork_nodelay.pkt | 43 ++++++ .../tcp_timestamping_client-only-last-byte.pkt | 92 +++++++++++++ .../net/packetdrill/tcp_timestamping_partial.pkt | 91 +++++++++++++ .../net/packetdrill/tcp_timestamping_server.pkt | 145 +++++++++++++++++++++ 10 files changed, 683 insertions(+) create mode 100644 tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-10pkt-lost-1.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost-1_4-11_16.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost1_4.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-ack-below-snd_una-cubic.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_nagle_https_client.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_nagle_sendmsg_msg_more.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_nagle_sockopt_cork_nodelay.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_timestamping_client-only-last-byte.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_timestamping_partial.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_timestamping_server.pkt (limited to 'tools') diff --git a/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-10pkt-lost-1.pkt b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-10pkt-lost-1.pkt new file mode 100644 index 000000000000..0d3c8077e830 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-10pkt-lost-1.pkt @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test PRR-slowstart implementation. +// In this variant we test a simple case where in-flight == ssthresh +// all the way through recovery, so during fast recovery we send one segment +// for each segment SACKed/ACKed. + +// Set up config. +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 32792 + +0 > S. 0:0(0) ack 1 +// RTT 100ms + +.1 < . 1:1(0) ack 1 win 320 + +0 accept(3, ..., ...) = 4 + +// Send 10 data segments. + +0 write(4, ..., 10000) = 10000 + +0 > P. 1:10001(10000) ack 1 + +// Lost packet 1:1001. + +.11 < . 1:1(0) ack 1 win 320 + +.01 < . 1:1(0) ack 1 win 320 + +.01 < . 1:1(0) ack 1 win 320 +// Enter fast recovery. + +0 > . 1:1001(1000) ack 1 + +.01 %{ +assert tcpi_ca_state == TCP_CA_Recovery, tcpi_ca_state +assert tcpi_snd_cwnd == 7, tcpi_snd_cwnd +assert tcpi_snd_ssthresh == 7, tcpi_snd_ssthresh +}% + +// Write some more, which we will send 1 MSS at a time, +// as in-flight segments are SACKed or ACKed. + +.01 write(4, ..., 7000) = 7000 + + +.01 < . 1:1(0) ack 1 win 320 + +0 > . 10001:11001(1000) ack 1 + + +.01 < . 1:1(0) ack 1 win 320 + +0 > . 11001:12001(1000) ack 1 + + +.01 < . 1:1(0) ack 1 win 320 + +0 > . 12001:13001(1000) ack 1 + + +.01 < . 1:1(0) ack 1 win 320 + +0 > . 13001:14001(1000) ack 1 + + +.01 < . 1:1(0) ack 1 win 320 + +0 > . 14001:15001(1000) ack 1 + + +.01 < . 1:1(0) ack 1 win 320 + +0 > . 15001:16001(1000) ack 1 + + +.02 < . 1:1(0) ack 10001 win 320 + +0 > P. 16001:17001(1000) ack 1 +// Leave fast recovery. + +.01 %{ +assert tcpi_ca_state == TCP_CA_Open, tcpi_ca_state +assert tcpi_snd_cwnd == 7, tcpi_snd_cwnd +assert tcpi_snd_ssthresh == 7, tcpi_snd_ssthresh +}% + + +.03 < . 1:1(0) ack 12001 win 320 + +.02 < . 1:1(0) ack 14001 win 320 + +.02 < . 1:1(0) ack 16001 win 320 + +.02 < . 1:1(0) ack 17001 win 320 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost-1_4-11_16.pkt b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost-1_4-11_16.pkt new file mode 100644 index 000000000000..7842a10b6967 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost-1_4-11_16.pkt @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test PRR-slowstart implementation. The sender sends 20 packets. Packet +// 1 to 4, and 11 to 16 are dropped. +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 + +0 > S. 0:0(0) ack 1 + + +.01 < . 1:1(0) ack 1 win 320 + +0 accept(3, ..., ...) = 4 + +// Write 20 data segments. + +0 write(4, ..., 20000) = 20000 + +0 > P. 1:10001(10000) ack 1 + +// Receive first DUPACK, entering PRR part + +.01 < . 1:1(0) ack 1 win 320 + +0 > . 10001:11001(1000) ack 1 ++.002 < . 1:1(0) ack 1 win 320 + +0 > . 11001:12001(1000) ack 1 ++.002 < . 1:1(0) ack 1 win 320 + +0 > . 1:1001(1000) ack 1 ++.002 < . 1:1(0) ack 1 win 320 + +0 > . 1001:2001(1000) ack 1 ++.002 < . 1:1(0) ack 1 win 320 + +0 > . 2001:3001(1000) ack 1 ++.002 < . 1:1(0) ack 1 win 320 + +0 > . 3001:4001(1000) ack 1 +// Enter PRR CRB ++.002 < . 1:1(0) ack 1 win 320 + +0 > . 12001:13001(1000) ack 1 ++.002 < . 1:1(0) ack 1 win 320 + +0 > . 13001:14001(1000) ack 1 +// Enter PRR slow start + +.01 < . 1:1(0) ack 1001 win 320 + +0 > P. 14001:16001(2000) ack 1 ++.002 < . 1:1(0) ack 1001 win 320 + +0 > . 1001:2001(1000) ack 1 + +0 > . 16001:17001(1000) ack 1 +// inflight reaches ssthresh, goes into packet conservation mode ++.002 < . 1:1(0) ack 1001 win 320 + +0 > . 17001:18001(1000) ack 1 ++.002 < . 1:1(0) ack 1001 win 320 + +0 > . 18001:19001(1000) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost1_4.pkt b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost1_4.pkt new file mode 100644 index 000000000000..b66d7644c3b6 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost1_4.pkt @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test PRR-slowstart implementation. The sender sends 20 packets. Packet +// 1 to 4 are lost. The sender writes another 10 packets. +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 + +0 > S. 0:0(0) ack 1 + + +.01 < . 1:1(0) ack 1 win 320 + +0 accept(3, ..., ...) = 4 + +// Send 20 data segments. + +0 write(4, ..., 10000) = 10000 + +0 > P. 1:10001(10000) ack 1 + +// Lost packet 1,2,3,4 + +.01 < . 1:1(0) ack 1 win 320 ++.002 < . 1:1(0) ack 1 win 320 + +0 < . 1:1(0) ack 1 win 320 + +0 > . 1:1001(1000) ack 1 + +0 < . 1:1(0) ack 1 win 320 + +0 > . 1001:2001(1000) ack 1 + +0 < . 1:1(0) ack 1 win 320 + +0 > . 2001:3001(1000) ack 1 + +0 < . 1:1(0) ack 1 win 320 + +0 > . 3001:4001(1000) ack 1 + +// Receiver ACKs all data. + +.01 < . 1:1(0) ack 1001 win 320 + +0 < . 1:1(0) ack 2001 win 320 + +0 < . 1:1(0) ack 3001 win 320 + +0 < . 1:1(0) ack 10001 win 320 + +// Writes another 10 packets, which the ssthresh*mss amount +// should be sent right away + +.01 write(4, ..., 10000) = 10000 + +0 > . 10001:17001(7000) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-ack-below-snd_una-cubic.pkt b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-ack-below-snd_una-cubic.pkt new file mode 100644 index 000000000000..8e87bfecabb5 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-ack-below-snd_una-cubic.pkt @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test PRR-slowstart implementation. +// In this variant we verify that the sender uses SACK info on an ACK +// below snd_una. + +// Set up config. +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 + +0 > S. 0:0(0) ack 1 +// RTT 10ms + +.01 < . 1:1(0) ack 1 win 320 + +0 accept(3, ..., ...) = 4 + +// Send 10 data segments. + +0 write(4, ..., 10000) = 10000 + +0 > P. 1:10001(10000) ack 1 + +// Lost packet 1:1001,4001:5001,7001:8001. + +.01 < . 1:1(0) ack 1 win 320 + +0 < . 1:1(0) ack 1 win 320 + +0 < . 1:1(0) ack 1 win 320 + +0 > . 1:1001(1000) ack 1 + ++.012 < . 1:1(0) ack 4001 win 320 + +0 > . 4001:7001(3000) ack 1 + + +0 write(4, ..., 10000) = 10000 + +// The following ACK was reordered - delayed so that it arrives with +// an ACK field below snd_una. Here we check that the newly-SACKed +// 2MSS at 5001:7001 cause us to send out 2 more MSS. ++.002 < . 1:1(0) ack 3001 win 320 + +0 > . 7001:8001(1000) ack 1 + +0 > . 10001:11001(1000) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_nagle_https_client.pkt b/tools/testing/selftests/net/packetdrill/tcp_nagle_https_client.pkt new file mode 100644 index 000000000000..7adae7a9ef4a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_nagle_https_client.pkt @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0 +// This is a test inspired by an Android client app using SSL. This +// test verifies using TCP_NODELAY would save application latency +// (Perhaps even better with TCP_NAGLE). +// +`./defaults.sh +ethtool -K tun0 tso off gso off +./set_sysctls.py /proc/sys/net/ipv4/tcp_timestamps=0` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 + +0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +0 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0 + + +0 connect(4, ..., ...) = -1 EINPROGRESS (Operation now in progress) + +0 > S 0:0(0) + +.1 < S. 0:0(0) ack 1 win 5792 + +0 > . 1:1(0) ack 1 + +// SSL handshake (resumed session) + +0 write(4, ..., 517) = 517 + +0 > P. 1:518(517) ack 1 + +.1 < . 1:1(0) ack 518 win 229 + + +0 < P. 1:144(143) ack 1 win 229 + +0 > . 518:518(0) ack 144 + +0 read(4, ..., 1000) = 143 + +// Application POST header (51B) and body (2002B) + +0 write(4, ..., 51) = 51 + +0 > P. 518:569(51) ack 144 + +.03 write(4, ..., 2002) = 2002 + +0 > . 569:1543(974) ack 144 + +0 > P. 1543:2517(974) ack 144 +// Without disabling Nagle, this packet will not happen until the remote ACK. + +0 > P. 2517:2571(54) ack 144 + + +.1 < . 1:1(0) ack 2571 win 229 + +// Reset sysctls +`/tmp/sysctl_restore_${PPID}.sh` diff --git a/tools/testing/selftests/net/packetdrill/tcp_nagle_sendmsg_msg_more.pkt b/tools/testing/selftests/net/packetdrill/tcp_nagle_sendmsg_msg_more.pkt new file mode 100644 index 000000000000..fa9c01813996 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_nagle_sendmsg_msg_more.pkt @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test the MSG_MORE flag will correctly corks the tiny writes +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 + +0 > S. 0:0(0) ack 1 + +.01 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 +// Disable Nagle by default on this socket. + +0 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0 + +// Test the basic case: MSG_MORE overwrites TCP_NODELAY and enables Nagle. + +0 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 40}], msg_flags=0}, MSG_MORE) = 40 + +.21~+.215 > P. 1:41(40) ack 1 + +.01 < . 1:1(0) ack 41 win 257 + +// Test unsetting MSG_MORE releases the packet + +0 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 100}], msg_flags=0}, MSG_MORE) = 100 ++.005 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 160}], msg_flags=0}, MSG_MORE) = 160 + +.01 sendmsg(4, {msg_name(...)=..., + msg_iov(3)=[{..., 100}, {..., 200}, {..., 195}], + msg_flags=0}, MSG_MORE) = 495 ++.008 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 5}], msg_flags=0}, 0) = 5 + +0 > P. 41:801(760) ack 1 + +.02 < . 1:1(0) ack 801 win 257 + + +// Test >MSS write will unleash MSS packets but hold on the remaining data. + +.1 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 3100}], msg_flags=0}, MSG_MORE) = 3100 + +0 > . 801:3801(3000) ack 1 ++.003 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 50}], msg_flags=0}, MSG_MORE) = 50 + + +.01 < . 1:1(0) ack 2801 win 257 +// Err... we relase the remaining right after the ACK? note that PUSH is reset + +0 > . 3801:3951(150) ack 1 + +// Test we'll hold on the subsequent writes when inflight (3801:3951) > 0 ++.001 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 1}], msg_flags=0}, MSG_MORE) = 1 ++.002 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 2}], msg_flags=0}, MSG_MORE) = 2 ++.003 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 3}], msg_flags=0}, MSG_MORE) = 3 ++.004 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 4}], msg_flags=0}, MSG_MORE) = 4 + +.02 < . 1:1(0) ack 3951 win 257 + +0 > . 3951:3961(10) ack 1 + +.02 < . 1:1(0) ack 3961 win 257 + + +// Test the case a MSG_MORE send followed by a write flushes the data + +0 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 20}], msg_flags=0}, MSG_MORE) = 20 + +.05 write(4, ..., 20) = 20 + +0 > P. 3961:4001(40) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_nagle_sockopt_cork_nodelay.pkt b/tools/testing/selftests/net/packetdrill/tcp_nagle_sockopt_cork_nodelay.pkt new file mode 100644 index 000000000000..0ddec5f7dc1a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_nagle_sockopt_cork_nodelay.pkt @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test TCP_CORK and TCP_NODELAY sockopt behavior +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 + +0 > S. 0:0(0) ack 1 + +.01 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 +// Set TCP_CORK sockopt to hold small packets + +0 setsockopt(4, SOL_TCP, TCP_CORK, [1], 4) = 0 + + +0 write(4, ..., 40) = 40 + +.05 write(4, ..., 40) = 40 + +// Unset TCP_CORK should push pending bytes out + +.01 setsockopt(4, SOL_TCP, TCP_CORK, [0], 4) = 0 + +0 > P. 1:81(80) ack 1 + +.01 < . 1:1(0) ack 81 win 257 + +// Set TCP_CORK sockopt to hold small packets + +0 setsockopt(4, SOL_TCP, TCP_CORK, [1], 4) = 0 + + +0 write(4, ..., 40) = 40 + +.05 write(4, ..., 40) = 40 + +// Set TCP_NODELAY sockopt should push pending bytes out + +0 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0 + +0 > P. 81:161(80) ack 1 + +.01 < . 1:1(0) ack 161 win 257 + +// Set MSG_MORE to hold small packets + +0 send(4, ..., 40, MSG_MORE) = 40 + +.05 send(4, ..., 40, MSG_MORE) = 40 + +// Set TCP_NODELAY sockopt should push pending bytes out + +.01 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0 + +0 > . 161:241(80) ack 1 + +.01 < . 1:1(0) ack 241 win 257 diff --git a/tools/testing/selftests/net/packetdrill/tcp_timestamping_client-only-last-byte.pkt b/tools/testing/selftests/net/packetdrill/tcp_timestamping_client-only-last-byte.pkt new file mode 100644 index 000000000000..2087ec0c746a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_timestamping_client-only-last-byte.pkt @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test that tx timestamping sends timestamps only for +// the last byte of each sendmsg. +`./defaults.sh +` + +// Create a socket and set it to non-blocking. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR) + +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +// Establish connection and verify that there was no error. + +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) + +0 > S 0:0(0) + +.01 < S. 0:0(0) ack 1 win 20000 + +0 > . 1:1(0) ack 1 + +0 getsockopt(3, SOL_SOCKET, SO_ERROR, [0], [4]) = 0 + +0 fcntl(3, F_SETFL, O_RDWR) = 0 // set back to blocking + + +0 setsockopt(3, SOL_SOCKET, SO_TIMESTAMPING, + [SOF_TIMESTAMPING_TX_SCHED | SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_TX_ACK | SOF_TIMESTAMPING_SOFTWARE | + SOF_TIMESTAMPING_OPT_ID], 4) = 0 + + +0 write(3, ..., 11000) = 11000 + +0 > P. 1:10001(10000) ack 1 + +.01 < . 1:1(0) ack 10001 win 4000 + +0 > P. 10001:11001(1000) ack 1 + +.01 < . 1:1(0) ack 11001 win 4000 + +// Make sure that internal TCP timestamps are not overwritten and we have sane +// RTT measurement. + +0 %{ +assert 5000 <= tcpi_rtt <= 20000, 'srtt=%d us' % tcpi_rtt +}% + +// SCM_TSTAMP_SCHED for the last byte should be received almost immediately +// once 10001 is acked at t=20ms. +// setsockopt(..., [SOF_TIMESTAMPING_SOFTWARE | SOF_TIMESTAMPING_OPT_ID], ...) +// is called after when SYN is acked. So, we expect the last byte of the first +// chunk to have a timestamp key of 10999 (i.e., 11000 - 1). + +0 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=20000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_SCHED, + ee_data=10999}} + ]}, MSG_ERRQUEUE) = 0 +// SCM_TSTAMP_SND for the last byte should be received almost immediately +// once 10001 is acked at t=20ms. + +0 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=20000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_SND, + ee_data=10999}} + ]}, MSG_ERRQUEUE) = 0 +// SCM_TSTAMP_ACK for the last byte should be received at t=30ms. + +0 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=30000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_ACK, + ee_data=10999}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_timestamping_partial.pkt b/tools/testing/selftests/net/packetdrill/tcp_timestamping_partial.pkt new file mode 100644 index 000000000000..876024a31110 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_timestamping_partial.pkt @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test tx timestamping for partial writes (IPv4). +`./defaults.sh +` + +// Create a socket and set it to non-blocking. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR) + +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +// Establish connection and verify that there was no error. + +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) + +0 > S 0:0(0) + +.01 < S. 0:0(0) ack 1 win 2000 + +0 > . 1:1(0) ack 1 + +0 getsockopt(3, SOL_SOCKET, SO_ERROR, [0], [4]) = 0 + + +0 setsockopt(3, SOL_SOCKET, SO_SNDBUF, [1000], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_TIMESTAMPING, + [SOF_TIMESTAMPING_TX_SCHED | SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_TX_ACK | SOF_TIMESTAMPING_SOFTWARE | + SOF_TIMESTAMPING_OPT_ID], 4) = 0 + +// We have a partial write. + +0 write(3, ..., 10000) = 2964 + +0 > . 1:989(988) ack 1 + +0 > P. 989:1977(988) ack 1 + +.01 < . 1:1(0) ack 1977 win 92 + +0 > P. 1977:2965(988) ack 1 + +.01 < . 1:1(0) ack 2965 win 92 + +// Make sure that internal TCP timestamps are not overwritten and we have sane +// RTT measurement. + +0 %{ +assert 5000 <= tcpi_rtt <= 20000, 'srtt=%d us' % tcpi_rtt +}% + +// SCM_TSTAMP_SCHED for the first chunk should be received almost immediately +// after the first ack at t=20ms. + +0 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=20000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_SCHED, + ee_data=2963}} + ]}, MSG_ERRQUEUE) = 0 +// SCM_TSTAMP_SND for the first chunk should be received almost immediately +// after the first ack at t=20ms. + +0 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=20000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_SND, + ee_data=2963}} + ]}, MSG_ERRQUEUE) = 0 +// SCM_TSTAMP_ACK for the first chunk should be received after the last ack at +// t=30ms. + +0 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=30000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_ACK, + ee_data=2963}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_timestamping_server.pkt b/tools/testing/selftests/net/packetdrill/tcp_timestamping_server.pkt new file mode 100644 index 000000000000..84d94780e6be --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_timestamping_server.pkt @@ -0,0 +1,145 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test tx timestamping for server-side (IPv4). +`./defaults.sh +` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 + +0 > S. 0:0(0) ack 1 + +.01 < . 1:1(0) ack 1 win 514 + + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_SOCKET, SO_TIMESTAMPING, + [SOF_TIMESTAMPING_TX_SCHED | SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_TX_ACK | SOF_TIMESTAMPING_SOFTWARE | + SOF_TIMESTAMPING_OPT_ID], 4) = 0 + +// Write two 2KB chunks. +// setsockopt(..., [SOF_TIMESTAMPING_SOFTWARE | SOF_TIMESTAMPING_OPT_ID], ...) +// is called after when SYN is acked. So, we expect the last byte of the first +// and the second chunks to have timestamp keys of 1999 (i.e., 2000 - 1) and +// 3999 (i.e., 4000 - 1) respectively. + +0 write(4, ..., 2000) = 2000 + +0 write(4, ..., 2000) = 2000 + +0 > P. 1:2001(2000) ack 1 + +0 > P. 2001:4001(2000) ack 1 + +.01 < . 1:1(0) ack 2001 win 514 + +.01 < . 1:1(0) ack 4001 win 514 + +// Make sure that internal TCP timestamps are not overwritten and we have sane +// RTT measurement. + +0 %{ +assert 5000 <= tcpi_rtt <= 20000, 'srtt=%d us' % tcpi_rtt +}% + +// SCM_TSTAMP_SCHED for the first chunk should be received almost immediately +// after write at t=10ms. + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=10000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_SCHED, + ee_data=1999}} + ]}, MSG_ERRQUEUE) = 0 +// SCM_TSTAMP_SND for the first chunk should be received almost immediately +// after write at t=10ms. + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=10000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_SND, + ee_data=1999}} + ]}, MSG_ERRQUEUE) = 0 +// SCM_TSTAMP_SCHED for the second chunk should be received almost immediately +// after that at t=10ms. + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=10000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_SCHED, + ee_data=3999}} + ]}, MSG_ERRQUEUE) = 0 +// SCM_TSTAMP_SND for the second chunk should be received almost immediately +// after that at t=10ms. + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=10000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_SND, + ee_data=3999}} + ]}, MSG_ERRQUEUE) = 0 +// SCM_TSTAMP_ACK for the first chunk should be received at t=20ms. + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=20000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_ACK, + ee_data=1999}} + ]}, MSG_ERRQUEUE) = 0 +// SCM_TSTAMP_ACK for the second chunk should be received at t=30ms. + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=30000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_ACK, + ee_data=3999}} + ]}, MSG_ERRQUEUE) = 0 -- cgit v1.2.3 From 6f6692053939038f48c2f9f404fe414038a44431 Mon Sep 17 00:00:00 2001 From: Soham Chakradeo Date: Tue, 17 Dec 2024 18:52:00 +0000 Subject: selftests/net: packetdrill: import tcp/eor, tcp/splice, tcp/ts_recent, tcp/blocking Use the standard import and testing method, as described in the import of tcp/ecn and tcp/close , tcp/sack , tcp/tcp_info. Signed-off-by: Willem de Bruijn Signed-off-by: Soham Chakradeo Link: https://patch.msgid.link/20241217185203.297935-4-sohamch.kernel@gmail.com Signed-off-by: Jakub Kicinski --- .../packetdrill/tcp_blocking_blocking-accept.pkt | 18 ++++++ .../packetdrill/tcp_blocking_blocking-connect.pkt | 13 ++++ .../net/packetdrill/tcp_blocking_blocking-read.pkt | 29 +++++++++ .../packetdrill/tcp_blocking_blocking-write.pkt | 35 +++++++++++ .../net/packetdrill/tcp_eor_no-coalesce-large.pkt | 38 ++++++++++++ .../packetdrill/tcp_eor_no-coalesce-retrans.pkt | 72 ++++++++++++++++++++++ .../net/packetdrill/tcp_eor_no-coalesce-small.pkt | 36 +++++++++++ .../packetdrill/tcp_eor_no-coalesce-subsequent.pkt | 66 ++++++++++++++++++++ .../tcp_splice_tcp_splice_loop_test.pkt | 20 ++++++ .../net/packetdrill/tcp_ts_recent_fin_tsval.pkt | 23 +++++++ .../net/packetdrill/tcp_ts_recent_invalid_ack.pkt | 25 ++++++++ .../net/packetdrill/tcp_ts_recent_reset_tsval.pkt | 25 ++++++++ 12 files changed, 400 insertions(+) create mode 100644 tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-accept.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-connect.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-write.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-large.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-retrans.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-small.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-subsequent.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_splice_tcp_splice_loop_test.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_ts_recent_fin_tsval.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_ts_recent_invalid_ack.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_ts_recent_reset_tsval.pkt (limited to 'tools') diff --git a/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-accept.pkt b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-accept.pkt new file mode 100644 index 000000000000..38535701656e --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-accept.pkt @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test for blocking accept. + +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0...0.200 accept(3, ..., ...) = 4 + + +.1 < S 0:0(0) win 32792 + +0 > S. 0:0(0) ack 1 + +.1 < . 1:1(0) ack 1 win 257 + + +.1 write(4, ..., 2000) = 2000 + +0 > P. 1:2001(2000) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-connect.pkt b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-connect.pkt new file mode 100644 index 000000000000..3692ef102381 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-connect.pkt @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test for blocking connect. + +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + + +.1...0.200 connect(3, ..., ...) = 0 + + +0 > S 0:0(0) + +.1 < S. 0:0(0) ack 1 win 5792 + +0 > . 1:1(0) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt new file mode 100644 index 000000000000..914eabab367a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test for blocking read. +--tolerance_usecs=10000 + +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 32792 + +0 > S. 0:0(0) ack 1 + +.1 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 + + +0...0.100 read(4, ..., 2000) = 2000 + +.1 < P. 1:2001(2000) ack 1 win 257 + +0 > . 1:1(0) ack 2001 + + +.1...0.200 read(4, ..., 2000) = 2000 + +.1 < P. 2001:4001(2000) ack 1 win 257 + +0 > . 1:1(0) ack 4001 + + +.1 < P. 4001:6001(2000) ack 1 win 257 + +0 > . 1:1(0) ack 6001 + +0...0.000 read(4, ..., 1000) = 1000 + +0...0.000 read(4, ..., 1000) = 1000 diff --git a/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-write.pkt b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-write.pkt new file mode 100644 index 000000000000..cec5a0725d95 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-write.pkt @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test for blocking write. +--tolerance_usecs=10000 + +`./defaults.sh +./set_sysctls.py /proc/sys/net/ipv4/tcp_min_tso_segs=10 +` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 50000 + +0 > S. 0:0(0) ack 1 + +.1 < . 1:1(0) ack 1 win 50000 + +0 accept(3, ..., ...) = 4 + +// Kernel doubles our value -> sk->sk_sndbuf is set to 42000 + +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [21000], 4) = 0 + +0 getsockopt(4, SOL_SOCKET, SO_SNDBUF, [42000], [4]) = 0 + +// A write of 60000 does not block. + +0...0.300 write(4, ..., 61000) = 61000 // this write() blocks + + +.1 < . 1:1(0) ack 10001 win 50000 + + +.1 < . 1:1(0) ack 30001 win 50000 + +// This ACK should wakeup the write(). An ACK of 35001 does not. + +.1 < . 1:1(0) ack 36001 win 50000 + +// Reset to sysctls defaults. +`/tmp/sysctl_restore_${PPID}.sh` diff --git a/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-large.pkt b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-large.pkt new file mode 100644 index 000000000000..f95b9b3c9fa1 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-large.pkt @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test TCP does not append any data from consequent writes to the tail +// skb created for the chunk. The large chunk itself should be packetized as +// usual. +`./defaults.sh +` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 + +0 > S. 0:0(0) ack 1 + +.01 < . 1:1(0) ack 1 win 514 + + +0 accept(3, ..., ...) = 4 + +// Write a 10400B chunk to fill the ICW, and have a 400 byte skb sitting on +// the tail. + +0 write(4, ..., 10400) = 10400 + +// Write another 10040B chunk with no coalescing options. + +0 send(4, ..., 10400, MSG_EOR) = 10400 + +// Write a 2KB chunk. This chunk should not be appended to the packets created +// the previous chunk. + +0 write(4, ..., 2000) = 2000 + + +0 > P. 1:10001(10000) ack 1 ++.001 < . 1:1(0) ack 10001 win 514 +// Now we have enough room to send out the 2 x 400B packets out. + +0 > P. 10001:20801(10800) ack 1 ++.001 < . 1:1(0) ack 20801 win 514 +// This 2KB packet should be sent alone. + +0 > P. 20801:22801(2000) ack 1 ++.001 < . 1:1(0) ack 22801 win 514 diff --git a/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-retrans.pkt b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-retrans.pkt new file mode 100644 index 000000000000..2ff66075288e --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-retrans.pkt @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test TCP does not append any data from consequent writes to the tail +// skb created for the chunk. Also, when packets are retransmitted, they +// will not be coalesce into the same skb. +`./defaults.sh +` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 + +0 > S. 0:0(0) ack 1 + +.01 < . 1:1(0) ack 1 win 514 + + +0 accept(3, ..., ...) = 4 + +// Write a 10400B chunk to fill the ICW, and have a 400 byte skb sitting on +// the tail. + +0 write(4, ..., 10400) = 10400 + +// Write 10 400B chunks with no coalescing options. + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 +// This chunk should not be appended to the skbs created for the previous chunk. + +0 write(4, ..., 10000) = 10000 + + +0 > P. 1:10001(10000) ack 1 ++.001 < . 1:1(0) ack 10001 win 514 +// Now we have enough room to send out the 2 x 400B packets out. + +0 > P. 10001:10801(800) ack 1 +// The 9 remaining 400B chunks should be sent as individual packets. + +0 > P. 10801:11201(400) ack 1 + +0 > P. 11201:11601(400) ack 1 + +0 > P. 11601:12001(400) ack 1 + +0 > P. 12001:12401(400) ack 1 + +0 > P. 12401:12801(400) ack 1 + +0 > P. 12801:13201(400) ack 1 + +0 > P. 13201:13601(400) ack 1 + +0 > P. 13601:14001(400) ack 1 + +0 > P. 14001:14401(400) ack 1 +// The last 10KB chunk should be sent separately. + +0 > P. 14401:24401(10000) ack 1 + ++.001 < . 1:1(0) ack 10401 win 514 ++.001 < . 1:1(0) ack 10801 win 514 ++.001 < . 1:1(0) ack 11201 win 514 ++.001 < . 1:1(0) ack 11601 win 514 ++.001 < . 1:1(0) ack 12001 win 514 +// TCP should fill the hole but no coalescing should happen, and all +// retransmissions should be sent out as individual packets. + +// Note : This is timeout based retransmit. +// Do not put +0 here or flakes will come back. ++.004~+.008 > P. 12001:12401(400) ack 1 + ++.001 < . 1:1(0) ack 12401 win 514 + +0 > P. 12401:12801(400) ack 1 + +0 > P. 12801:13201(400) ack 1 ++.001 < . 1:1(0) ack 12801 win 514 ++.001 < . 1:1(0) ack 14401 win 514 ++.001 < . 1:1(0) ack 24401 win 514 diff --git a/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-small.pkt b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-small.pkt new file mode 100644 index 000000000000..77039c5aac39 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-small.pkt @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test TCP does not append any data from consequent writes to the tail +// skb created for the chunk. +`./defaults.sh +` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 + +0 > S. 0:0(0) ack 1 + +.01 < . 1:1(0) ack 1 win 514 + + +0 accept(3, ..., ...) = 4 + +// Write a 10400B chunk to fill the ICW, and have a 400 byte skb sitting on +// the tail. + +0 write(4, ..., 10400) = 10400 + +// Write a 400B chunk with no coalescing options. + +0 send(4, ..., 400, MSG_EOR) = 400 + +// This chunk should not be appended to the skbs created for the previous chunk. + +0 write(4, ..., 10000) = 10000 + + +0 > P. 1:10001(10000) ack 1 ++.001 < . 1:1(0) ack 10001 win 514 +// Now we have enough room to send out the 2 x 400B packets out. + +0 > P. 10001:10801(800) ack 1 + +0 > P. 10801:20801(10000) ack 1 ++.001 < . 1:1(0) ack 10401 win 514 ++.001 < . 1:1(0) ack 10801 win 514 ++.001 < . 1:1(0) ack 20801 win 514 diff --git a/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-subsequent.pkt b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-subsequent.pkt new file mode 100644 index 000000000000..dd5a06250595 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-subsequent.pkt @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test TCP does not append any data from consequent writes to the tail +// skb created for the chunk even though we have 10 back-to-back small +// writes. +`./defaults.sh +` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 + +0 > S. 0:0(0) ack 1 + +.01 < . 1:1(0) ack 1 win 514 + + +0 accept(3, ..., ...) = 4 + +// Write a 10400B chunk to fill the ICW, and have a 400 byte skb sitting on +// the tail. + +0 write(4, ..., 10400) = 10400 + +// Write 10 400B chunks with no coalescing options. + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 +// This chunk should not be appended to the skbs created for the previous chunk. + +0 write(4, ..., 10000) = 10000 + + +0 > P. 1:10001(10000) ack 1 ++.001 < . 1:1(0) ack 10001 win 514 +// Now we have enough room to send out the 2 x 400B packets out. + +0 > P. 10001:10801(800) ack 1 +// The 9 remaining 400B chunks should be sent as individual packets. + +0 > P. 10801:11201(400) ack 1 + +0 > P. 11201:11601(400) ack 1 + +0 > P. 11601:12001(400) ack 1 + +0 > P. 12001:12401(400) ack 1 + +0 > P. 12401:12801(400) ack 1 + +0 > P. 12801:13201(400) ack 1 + +0 > P. 13201:13601(400) ack 1 + +0 > P. 13601:14001(400) ack 1 + +0 > P. 14001:14401(400) ack 1 +// The last 10KB chunk should be sent separately. + +0 > P. 14401:24401(10000) ack 1 + ++.001 < . 1:1(0) ack 10401 win 514 ++.001 < . 1:1(0) ack 10801 win 514 ++.001 < . 1:1(0) ack 11201 win 514 ++.001 < . 1:1(0) ack 11601 win 514 ++.001 < . 1:1(0) ack 12001 win 514 ++.001 < . 1:1(0) ack 12401 win 514 ++.001 < . 1:1(0) ack 12801 win 514 ++.001 < . 1:1(0) ack 13201 win 514 ++.001 < . 1:1(0) ack 13601 win 514 ++.001 < . 1:1(0) ack 14001 win 514 ++.001 < . 1:1(0) ack 14401 win 514 ++.001 < . 1:1(0) ack 24401 win 514 diff --git a/tools/testing/selftests/net/packetdrill/tcp_splice_tcp_splice_loop_test.pkt b/tools/testing/selftests/net/packetdrill/tcp_splice_tcp_splice_loop_test.pkt new file mode 100644 index 000000000000..0cbd43253236 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_splice_tcp_splice_loop_test.pkt @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: GPL-2.0 +`./defaults.sh` + +// Initialize a server socket + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_IP, IP_FREEBIND, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +// Connection should get accepted + +0 < S 0:0(0) win 32972 + +0 > S. 0:0(0) ack 1 <...> + +0 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 + + +0 pipe([5, 6]) = 0 + +0 < U. 1:101(100) ack 1 win 257 urg 100 + +0 splice(4, NULL, 6, NULL, 99, 0) = 99 + +0 splice(4, NULL, 6, NULL, 1, 0) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_ts_recent_fin_tsval.pkt b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_fin_tsval.pkt new file mode 100644 index 000000000000..e61424a7bd0a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_fin_tsval.pkt @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test that we send FIN packet with correct TSval +--tcp_ts_tick_usecs=1000 +--tolerance_usecs=7000 + +`./defaults.sh` + +// Create a socket. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +// Establish a connection. + +0 < S 0:0(0) win 20000 + +0 > S. 0:0(0) ack 1 + +.1 < . 1:1(0) ack 1 win 20000 + +0 accept(3, ..., ...) = 4 + + +1 close(4) = 0 +// Check that FIN TSval is updated properly, one second has passed since last sent packet. + +0 > F. 1:1(0) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_ts_recent_invalid_ack.pkt b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_invalid_ack.pkt new file mode 100644 index 000000000000..174ce9a1bfc0 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_invalid_ack.pkt @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test that we reject TS val updates on a packet with invalid ACK sequence + +`./defaults.sh +` + +// Create a socket. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +// Establish a connection. + +.1 < S 0:0(0) win 20000 + +0 > S. 0:0(0) ack 1 + +.1 < . 1:1(0) ack 1 win 20000 + +0 accept(3, ..., ...) = 4 + +// bad packet with high tsval (its ACK sequence is above our sndnxt) + +0 < F. 1:1(0) ack 9999 win 20000 + + + +0 < . 1:1001(1000) ack 1 win 20000 + +0 > . 1:1(0) ack 1001 diff --git a/tools/testing/selftests/net/packetdrill/tcp_ts_recent_reset_tsval.pkt b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_reset_tsval.pkt new file mode 100644 index 000000000000..2e3b3bb7493a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_reset_tsval.pkt @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test that we send RST packet with correct TSval +--tcp_ts_tick_usecs=1000 + +`./defaults.sh` + +// Create a socket. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +// Establish a connection. + +0 < S 0:0(0) win 20000 + +0 > S. 0:0(0) ack 1 + +.1 < . 1:1(0) ack 1 win 20000 + +0 accept(3, ..., ...) = 4 + + +0 < . 1:1001(1000) ack 1 win 20000 + +0 > . 1:1(0) ack 1001 + + +1 close(4) = 0 +// Check that RST TSval is updated properly, one second has passed since last sent packet. + +0 > R. 1:1(0) ack 1001 -- cgit v1.2.3 From 5d4cadef52f29eea779a0b44e09f59657c1b46d8 Mon Sep 17 00:00:00 2001 From: Soham Chakradeo Date: Tue, 17 Dec 2024 18:52:01 +0000 Subject: selftests/net: packetdrill: import tcp/user_timeout, tcp/validate, tcp/sendfile, tcp/limited-transmit, tcp/syscall_bad_arg Use the standard import and testing method, as described in the import of tcp/ecn and tcp/close , tcp/sack , tcp/tcp_info. Signed-off-by: Willem de Bruijn Signed-off-by: Soham Chakradeo Link: https://patch.msgid.link/20241217185203.297935-5-sohamch.kernel@gmail.com Signed-off-by: Jakub Kicinski --- ...p_limited_transmit_limited-transmit-no-sack.pkt | 53 ++++++++++++++++++++++ .../tcp_limited_transmit_limited-transmit-sack.pkt | 50 ++++++++++++++++++++ .../packetdrill/tcp_sendfile_sendfile-simple.pkt | 26 +++++++++++ ...cp_syscall_bad_arg_fastopen-invalid-buf-ptr.pkt | 42 +++++++++++++++++ .../tcp_syscall_bad_arg_sendmsg-empty-iov.pkt | 30 ++++++++++++ ...tcp_syscall_bad_arg_syscall-invalid-buf-ptr.pkt | 25 ++++++++++ .../tcp_user_timeout_user-timeout-probe.pkt | 37 +++++++++++++++ .../packetdrill/tcp_user_timeout_user_timeout.pkt | 32 +++++++++++++ .../tcp_validate_validate-established-no-flags.pkt | 24 ++++++++++ 9 files changed, 319 insertions(+) create mode 100644 tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-no-sack.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-sack.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_sendfile_sendfile-simple.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_fastopen-invalid-buf-ptr.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_syscall-invalid-buf-ptr.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_user_timeout_user_timeout.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_validate_validate-established-no-flags.pkt (limited to 'tools') diff --git a/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-no-sack.pkt b/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-no-sack.pkt new file mode 100644 index 000000000000..96b01eb5b7a4 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-no-sack.pkt @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test RFC 3042 "Limited Transmit": "sending a new data segment in +// response to each of the first two duplicate acknowledgments that +// arrive at the sender". +// This variation tests a receiver that doesn't support SACK. + +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 32792 + +0 > S. 0:0(0) ack 1 + +.1 < . 1:1(0) ack 1 win 320 + +0 accept(3, ..., ...) = 4 + +// Write some data, and send the initial congestion window. + +0 write(4, ..., 15000) = 15000 + +0 > P. 1:10001(10000) ack 1 + +// Limited transmit: on first dupack, send a new data segment. + +.11 < . 1:1(0) ack 1 win 320 + +0 > . 10001:11001(1000) ack 1 + +// Limited transmit: on second dupack, send a new data segment. + +.01 < . 1:1(0) ack 1 win 320 + +0 > . 11001:12001(1000) ack 1 + +// It turned out to be reordering, not loss. +// We have one packet newly acked (1001:3001 were DUP-ACK'd) +// So we revert state back to Open. Slow start cwnd from 10 to 11 +// and send 11 - 9 = 2 packets + +.01 < . 1:1(0) ack 3001 win 320 + +0 > P. 12001:14001(2000) ack 1 + + +.02 < . 1:1(0) ack 5001 win 320 + +0 > P. 14001:15001(1000) ack 1 + +// Client gradually ACKs all data. + +.02 < . 1:1(0) ack 7001 win 320 + +.02 < . 1:1(0) ack 9001 win 320 + +.02 < . 1:1(0) ack 11001 win 320 + +.02 < . 1:1(0) ack 13001 win 320 + +.02 < . 1:1(0) ack 15001 win 320 + +// Clean up. + +.17 close(4) = 0 + +0 > F. 15001:15001(0) ack 1 + +.1 < F. 1:1(0) ack 15002 win 257 + +0 > . 15002:15002(0) ack 2 diff --git a/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-sack.pkt b/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-sack.pkt new file mode 100644 index 000000000000..642da51ec3a4 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-sack.pkt @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test RFC 3042 "Limited Transmit": "sending a new data segment in +// response to each of the first two duplicate acknowledgments that +// arrive at the sender". +// This variation tests a receiver that supports SACK. + +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 32792 + +0 > S. 0:0(0) ack 1 + +.1 < . 1:1(0) ack 1 win 320 + +0 accept(3, ..., ...) = 4 + +// Write some data, and send the initial congestion window. + +0 write(4, ..., 15000) = 15000 + +0 > P. 1:10001(10000) ack 1 + +// Limited transmit: on first dupack, send a new data segment. + +.11 < . 1:1(0) ack 1 win 320 + +0 > . 10001:11001(1000) ack 1 + +// Limited transmit: on second dupack, send a new data segment. + +.01 < . 1:1(0) ack 1 win 320 + +0 > . 11001:12001(1000) ack 1 + +// It turned out to be reordering, not loss. + +.01 < . 1:1(0) ack 3001 win 320 + +0 > P. 12001:14001(2000) ack 1 + + +.02 < . 1:1(0) ack 5001 win 320 + +0 > P. 14001:15001(1000) ack 1 + +// Client gradually ACKs all data. + +.02 < . 1:1(0) ack 7001 win 320 + +.02 < . 1:1(0) ack 9001 win 320 + +.02 < . 1:1(0) ack 11001 win 320 + +.02 < . 1:1(0) ack 13001 win 320 + +.02 < . 1:1(0) ack 15001 win 320 + +// Clean up. + +.17 close(4) = 0 + +0 > F. 15001:15001(0) ack 1 + +.1 < F. 1:1(0) ack 15002 win 257 + +0 > . 15002:15002(0) ack 2 diff --git a/tools/testing/selftests/net/packetdrill/tcp_sendfile_sendfile-simple.pkt b/tools/testing/selftests/net/packetdrill/tcp_sendfile_sendfile-simple.pkt new file mode 100644 index 000000000000..6740859a1360 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_sendfile_sendfile-simple.pkt @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0 +// Simplest possible test of open() and then sendfile(). +// We write some zeroes into a file (since packetdrill expects payloads +// to be all zeroes) and then open() the file, then use sendfile() +// and verify that the correct number of zeroes goes out. + +`./defaults.sh +/bin/rm -f /tmp/testfile +/bin/dd bs=1 count=5 if=/dev/zero of=/tmp/testfile status=none +` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 + +0 > S. 0:0(0) ack 1 + +0 < . 1:1(0) ack 1 win 514 + + +0 accept(3, ..., ...) = 4 + + +0 open("/tmp/testfile", O_RDONLY) = 5 + +0 sendfile(4, 5, [0], 5) = 5 + +0 > P. 1:6(5) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_fastopen-invalid-buf-ptr.pkt b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_fastopen-invalid-buf-ptr.pkt new file mode 100644 index 000000000000..8940726a3ec2 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_fastopen-invalid-buf-ptr.pkt @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test TCP fastopen behavior with NULL as buffer pointer, but a non-zero +// buffer length. +`./defaults.sh +./set_sysctls.py /proc/sys/net/ipv4/tcp_timestamps=0` + +// Cache warmup: send a Fast Open cookie request + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 ++0 setsockopt(3, SOL_TCP, TCP_FASTOPEN_CONNECT, [1], 4) = 0 ++0 connect(3, ..., ...) = -1 EINPROGRESS (Operation is now in progress) ++0 > S 0:0(0) ++0 < S. 123:123(0) ack 1 win 14600 ++0 > . 1:1(0) ack 1 ++0 close(3) = 0 ++0 > F. 1:1(0) ack 1 ++0 < F. 1:1(0) ack 2 win 92 ++0 > . 2:2(0) ack 2 + +// Test with MSG_FASTOPEN without TCP_FASTOPEN_CONNECT. ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0 ++0 sendto(4, NULL, 1, MSG_FASTOPEN, ..., ...) = -1 ++0 close(4) = 0 + +// Test with TCP_FASTOPEN_CONNECT without MSG_FASTOPEN. ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 5 ++0 fcntl(5, F_SETFL, O_RDWR|O_NONBLOCK) = 0 ++0 setsockopt(5, SOL_TCP, TCP_FASTOPEN_CONNECT, [1], 4) = 0 ++0 connect(5, ..., ...) = 0 ++0 sendto(5, NULL, 1, 0, ..., ...) = -1 ++0 close(5) = 0 + +// Test with both TCP_FASTOPEN_CONNECT and MSG_FASTOPEN. ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 6 ++0 fcntl(6, F_SETFL, O_RDWR|O_NONBLOCK) = 0 ++0 setsockopt(6, SOL_TCP, TCP_FASTOPEN_CONNECT, [1], 4) = 0 ++0 connect(6, ..., ...) = 0 ++0 sendto(6, NULL, 1, MSG_FASTOPEN, ..., ...) = -1 ++0 close(6) = 0 + +`/tmp/sysctl_restore_${PPID}.sh` diff --git a/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt new file mode 100644 index 000000000000..b2b2cdf27e20 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test that we correctly skip zero-length IOVs. +`./defaults.sh` + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 + +0 > S. 0:0(0) ack 1 + +.01 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0 + + +0 sendmsg(4, {msg_name(...)=..., + msg_iov(4)=[{..., 0}, {..., 40}, {..., 0}, {..., 20}], + msg_flags=0}, 0) = 60 + +0 > P. 1:61(60) ack 1 + +.01 < . 1:1(0) ack 61 win 257 + + +0 sendmsg(4, {msg_name(...)=..., + msg_iov(4)=[{..., 0}, {..., 0}, {..., 0}, {..., 0}], + msg_flags=0}, MSG_ZEROCOPY) = 0 + + +0 sendmsg(4, {msg_name(...)=..., + msg_iov(4)=[{..., 0}, {..., 10}, {..., 0}, {..., 50}], + msg_flags=0}, MSG_ZEROCOPY) = 60 + +0 > P. 61:121(60) ack 1 + +.01 < . 1:1(0) ack 121 win 257 diff --git a/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_syscall-invalid-buf-ptr.pkt b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_syscall-invalid-buf-ptr.pkt new file mode 100644 index 000000000000..59f5903f285c --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_syscall-invalid-buf-ptr.pkt @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test kernel behavior with NULL as buffer pointer + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 + +0 > S. 0:0(0) ack 1 + +.2 < . 1:1(0) ack 1 win 514 + + +0 accept(3, ..., ...) = 4 + + +0 write(4, NULL, 1000) = -1 EFAULT (Bad address) + +0 send(4, NULL, 1000, 0) = -1 EFAULT (Bad address) + +0 sendto(4, NULL, 1000, 0, ..., ...) = -1 EFAULT (Bad address) + + +0 < . 1:1001(1000) ack 1 win 200 + +0 read(4, NULL, 1000) = -1 EFAULT (Bad address) + +0 recv(4, NULL, 1000, 0) = -1 EFAULT (Bad address) + +0 recvfrom(4, NULL, 1000, 0, ..., ...) = -1 EFAULT (Bad address) diff --git a/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt b/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt new file mode 100644 index 000000000000..183051ba0cae --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + + +0 < S 0:0(0) win 0 + +0 > S. 0:0(0) ack 1 + + +.1 < . 1:1(0) ack 1 win 65530 + +0 accept(3, ..., ...) = 4 + + +0 setsockopt(4, SOL_TCP, TCP_USER_TIMEOUT, [3000], 4) = 0 + +0 write(4, ..., 24) = 24 + +0 > P. 1:25(24) ack 1 + +.1 < . 1:1(0) ack 25 win 65530 + +0 %{ assert tcpi_probes == 0, tcpi_probes; \ + assert tcpi_backoff == 0, tcpi_backoff }% + +// install a qdisc dropping all packets + +0 `tc qdisc delete dev tun0 root 2>/dev/null ; tc qdisc add dev tun0 root pfifo limit 0` + +0 write(4, ..., 24) = 24 + // When qdisc is congested we retry every 500ms + // (TCP_RESOURCE_PROBE_INTERVAL) and therefore + // we retry 6 times before hitting 3s timeout. + // First verify that the connection is alive: ++3.250 write(4, ..., 24) = 24 + // Now verify that shortly after that the socket is dead: + +.100 write(4, ..., 24) = -1 ETIMEDOUT (Connection timed out) + + +0 %{ assert tcpi_probes == 6, tcpi_probes; \ + assert tcpi_backoff == 0, tcpi_backoff }% + +0 close(4) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user_timeout.pkt b/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user_timeout.pkt new file mode 100644 index 000000000000..2efe02bfba9c --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user_timeout.pkt @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 +`./defaults.sh` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 + +0 > S. 0:0(0) ack 1 + +.1 < . 1:1(0) ack 1 win 32792 + + + +0 accept(3, ..., ...) = 4 + +// Okay, we received nothing, and decide to close this idle socket. +// We set TCP_USER_TIMEOUT to 3 seconds because really it is not worth +// trying hard to cleanly close this flow, at the price of keeping +// a TCP structure in kernel for about 1 minute ! + +2 setsockopt(4, SOL_TCP, TCP_USER_TIMEOUT, [3000], 4) = 0 + +0 close(4) = 0 + + +0 > F. 1:1(0) ack 1 + +.3~+.400 > F. 1:1(0) ack 1 + +.3~+.400 > F. 1:1(0) ack 1 + +.6~+.800 > F. 1:1(0) ack 1 + +// We finally receive something from the peer, but it is way too late +// Our socket vanished because TCP_USER_TIMEOUT was really small + +0 < . 1:2(1) ack 1 win 32792 + +0 > R 1:1(0) diff --git a/tools/testing/selftests/net/packetdrill/tcp_validate_validate-established-no-flags.pkt b/tools/testing/selftests/net/packetdrill/tcp_validate_validate-established-no-flags.pkt new file mode 100644 index 000000000000..8bd60226ccfc --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_validate_validate-established-no-flags.pkt @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 +// Verify that established connections drop a segment without the ACK flag set. + +`./defaults.sh` + +// Create a socket. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +// Establish a connection. + +0 < S 0:0(0) win 20000 + +0 > S. 0:0(0) ack 1 + +.01 < . 1:1(0) ack 1 win 20000 + +0 accept(3, ..., ...) = 4 + +// Receive a segment with no flags set, verify that it's not enqueued. + +.01 < - 1:1001(1000) win 20000 + +0 ioctl(4, SIOCINQ, [0]) = 0 + +// Receive a segment with ACK flag set, verify that it is enqueued. + +.01 < . 1:1001(1000) ack 1 win 20000 + +0 ioctl(4, SIOCINQ, [1000]) = 0 -- cgit v1.2.3 From a4e17a8f239a545c463f8ec27db4ed6e74b31841 Mon Sep 17 00:00:00 2001 From: "Ricardo B. Marliere" Date: Thu, 5 Dec 2024 17:50:35 -0300 Subject: ktest.pl: Check kernelrelease return in get_version In the case of a test that uses the special option ${KERNEL_VERSION} in one of its settings but has no configuration available in ${OUTPUT_DIR}, for example if it's a new empty directory, then the `make kernelrelease` call will fail and the subroutine will chomp an empty string, silently. Fix that by adding an empty configuration and retrying. Cc: stable@vger.kernel.org Cc: John Hawley Fixes: 5f9b6ced04a4e ("ktest: Bisecting, install modules, add logging") Link: https://lore.kernel.org/20241205-ktest_kver_fallback-v2-1-869dae4c7777@suse.com Signed-off-by: Ricardo B. Marliere Signed-off-by: Steven Rostedt --- tools/testing/ktest/ktest.pl | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'tools') diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index dacad94e2be4..171262915636 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -2419,6 +2419,11 @@ sub get_version { return if ($have_version); doprint "$make kernelrelease ... "; $version = `$make -s kernelrelease | tail -1`; + if (!length($version)) { + run_command "$make allnoconfig" or return 0; + doprint "$make kernelrelease ... "; + $version = `$make -s kernelrelease | tail -1`; + } chomp($version); doprint "$version\n"; $have_version = 1; -- cgit v1.2.3 From 776735b954f49f85fd19e1198efa421fae2ad77c Mon Sep 17 00:00:00 2001 From: Ba Jing Date: Mon, 2 Sep 2024 21:07:35 +0800 Subject: ktest.pl: Remove unused declarations in run_bisect_test function Since $output and $ret are not used in the subsequent code, the declarations should be removed. Fixes: a75fececff3c ("ktest: Added sample.conf, new %default option format") Link: https://lore.kernel.org/20240902130735.6034-1-bajing@cmss.chinamobile.com Signed-off-by: Ba Jing Signed-off-by: Steven Rostedt --- tools/testing/ktest/ktest.pl | 2 -- 1 file changed, 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index 171262915636..c76ad0be54e2 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -2965,8 +2965,6 @@ sub run_bisect_test { my $failed = 0; my $result; - my $output; - my $ret; $in_bisect = 1; -- cgit v1.2.3 From 770221a36932a65c5a8b7711b5477430a1dbf5e8 Mon Sep 17 00:00:00 2001 From: Ba Jing Date: Mon, 2 Sep 2024 20:46:45 +0800 Subject: ktest.pl: Fix typo in comment "on of these" should be "one of these". Fixes: 77d942ceacbad ("ktest: Create variables for the ktest config files") Link: https://lore.kernel.org/20240902124645.5674-1-bajing@cmss.chinamobile.com Signed-off-by: Ba Jing Signed-off-by: Steven Rostedt --- tools/testing/ktest/ktest.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index c76ad0be54e2..8c8da966c641 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -1245,7 +1245,7 @@ sub __read_config { # Config variables are only active while reading the # config and can be defined anywhere. They also ignore # TEST_START and DEFAULTS, but are skipped if they are in - # on of these sections that have SKIP defined. + # one of these sections that have SKIP defined. # The save variable can be # defined multiple times and the new one simply overrides # the previous one. -- cgit v1.2.3 From f3a30016e4b557495d49df7851f18ad97b6d5a23 Mon Sep 17 00:00:00 2001 From: WangYuli Date: Wed, 18 Dec 2024 22:04:37 +0800 Subject: ktest.pl: Fix typo "accesing" There is a spelling mistake of 'accesing' in comments which should be 'accessing'. Fixes: 6d76f469c8ac9 ("ktest: Add useful example configs") Link: https://lore.kernel.org/8714AE3735C0EA0B+20241218140437.194906-1-wangyuli@uniontech.com Reviewed-by: SeongJae Park Signed-off-by: WangYuli Signed-off-by: Steven Rostedt --- tools/testing/ktest/examples/include/defaults.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/ktest/examples/include/defaults.conf b/tools/testing/ktest/examples/include/defaults.conf index 63a1a83f4f0b..f6d8517a471e 100644 --- a/tools/testing/ktest/examples/include/defaults.conf +++ b/tools/testing/ktest/examples/include/defaults.conf @@ -46,7 +46,7 @@ CLEAR_LOG = 1 SSH_USER = root -# For accesing the machine, we will ssh to root@machine. +# For accessing the machine, we will ssh to root@machine. SSH := ssh ${SSH_USER}@${MACHINE} # Update this. The default here is ktest will ssh to the target box -- cgit v1.2.3 From 09bb926d290789ff35e7fa53045811a8c57356a9 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Nov 2024 16:55:33 -0800 Subject: KVM: selftests: Return a value from vcpu_get_reg() instead of using an out-param Return a uint64_t from vcpu_get_reg() instead of having the caller provide a pointer to storage, as none of the vcpu_get_reg() usage in KVM selftests accesses a register larger than 64 bits, and vcpu_set_reg() only accepts a 64-bit value. If a use case comes along that needs to get a register that is larger than 64 bits, then a utility can be added to assert success and take a void pointer, but until then, forcing an out param yields ugly code and prevents feeding the output of vcpu_get_reg() into vcpu_set_reg(). Reviewed-by: Andrew Jones Acked-by: Claudio Imbrenda Link: https://lore.kernel.org/r/20241128005547.4077116-3-seanjc@google.com Signed-off-by: Sean Christopherson --- .../selftests/kvm/aarch64/aarch32_id_regs.c | 10 ++-- .../selftests/kvm/aarch64/debug-exceptions.c | 4 +- tools/testing/selftests/kvm/aarch64/hypercalls.c | 6 +- tools/testing/selftests/kvm/aarch64/no-vgic-v3.c | 2 +- tools/testing/selftests/kvm/aarch64/psci_test.c | 8 +-- tools/testing/selftests/kvm/aarch64/set_id_regs.c | 22 ++++---- .../selftests/kvm/aarch64/vpmu_counter_access.c | 19 +++---- tools/testing/selftests/kvm/include/kvm_util.h | 6 +- .../testing/selftests/kvm/lib/aarch64/processor.c | 8 +-- tools/testing/selftests/kvm/lib/riscv/processor.c | 66 +++++++++++----------- tools/testing/selftests/kvm/riscv/arch_timer.c | 2 +- tools/testing/selftests/kvm/riscv/ebreak_test.c | 2 +- tools/testing/selftests/kvm/riscv/sbi_pmu_test.c | 2 +- tools/testing/selftests/kvm/s390x/resets.c | 2 +- tools/testing/selftests/kvm/steal_time.c | 3 +- 15 files changed, 81 insertions(+), 81 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c b/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c index 8e5bd07a3727..447d61cae4db 100644 --- a/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c +++ b/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c @@ -97,7 +97,7 @@ static void test_user_raz_wi(struct kvm_vcpu *vcpu) uint64_t reg_id = raz_wi_reg_ids[i]; uint64_t val; - vcpu_get_reg(vcpu, reg_id, &val); + val = vcpu_get_reg(vcpu, reg_id); TEST_ASSERT_EQ(val, 0); /* @@ -106,7 +106,7 @@ static void test_user_raz_wi(struct kvm_vcpu *vcpu) */ vcpu_set_reg(vcpu, reg_id, BAD_ID_REG_VAL); - vcpu_get_reg(vcpu, reg_id, &val); + val = vcpu_get_reg(vcpu, reg_id); TEST_ASSERT_EQ(val, 0); } } @@ -126,14 +126,14 @@ static void test_user_raz_invariant(struct kvm_vcpu *vcpu) uint64_t reg_id = raz_invariant_reg_ids[i]; uint64_t val; - vcpu_get_reg(vcpu, reg_id, &val); + val = vcpu_get_reg(vcpu, reg_id); TEST_ASSERT_EQ(val, 0); r = __vcpu_set_reg(vcpu, reg_id, BAD_ID_REG_VAL); TEST_ASSERT(r < 0 && errno == EINVAL, "unexpected KVM_SET_ONE_REG error: r=%d, errno=%d", r, errno); - vcpu_get_reg(vcpu, reg_id, &val); + val = vcpu_get_reg(vcpu, reg_id); TEST_ASSERT_EQ(val, 0); } } @@ -144,7 +144,7 @@ static bool vcpu_aarch64_only(struct kvm_vcpu *vcpu) { uint64_t val, el0; - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), &val); + val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1)); el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), val); return el0 == ID_AA64PFR0_EL1_ELx_64BIT_ONLY; diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c index ff7a949fc96a..c7fb55c9135b 100644 --- a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c +++ b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c @@ -501,7 +501,7 @@ void test_single_step_from_userspace(int test_cnt) TEST_ASSERT(ss_enable, "Unexpected KVM_EXIT_DEBUG"); /* Check if the current pc is expected. */ - vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc), &pc); + pc = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc)); TEST_ASSERT(!test_pc || pc == test_pc, "Unexpected pc 0x%lx (expected 0x%lx)", pc, test_pc); @@ -583,7 +583,7 @@ int main(int argc, char *argv[]) uint64_t aa64dfr0; vm = vm_create_with_one_vcpu(&vcpu, guest_code); - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1), &aa64dfr0); + aa64dfr0 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1)); __TEST_REQUIRE(debug_version(aa64dfr0) >= 6, "Armv8 debug architecture not supported."); kvm_vm_free(vm); diff --git a/tools/testing/selftests/kvm/aarch64/hypercalls.c b/tools/testing/selftests/kvm/aarch64/hypercalls.c index 9d192ce0078d..ec54ec7726e9 100644 --- a/tools/testing/selftests/kvm/aarch64/hypercalls.c +++ b/tools/testing/selftests/kvm/aarch64/hypercalls.c @@ -173,7 +173,7 @@ static void test_fw_regs_before_vm_start(struct kvm_vcpu *vcpu) const struct kvm_fw_reg_info *reg_info = &fw_reg_info[i]; /* First 'read' should be an upper limit of the features supported */ - vcpu_get_reg(vcpu, reg_info->reg, &val); + val = vcpu_get_reg(vcpu, reg_info->reg); TEST_ASSERT(val == FW_REG_ULIMIT_VAL(reg_info->max_feat_bit), "Expected all the features to be set for reg: 0x%lx; expected: 0x%lx; read: 0x%lx", reg_info->reg, FW_REG_ULIMIT_VAL(reg_info->max_feat_bit), val); @@ -184,7 +184,7 @@ static void test_fw_regs_before_vm_start(struct kvm_vcpu *vcpu) "Failed to clear all the features of reg: 0x%lx; ret: %d", reg_info->reg, errno); - vcpu_get_reg(vcpu, reg_info->reg, &val); + val = vcpu_get_reg(vcpu, reg_info->reg); TEST_ASSERT(val == 0, "Expected all the features to be cleared for reg: 0x%lx", reg_info->reg); @@ -214,7 +214,7 @@ static void test_fw_regs_after_vm_start(struct kvm_vcpu *vcpu) * Before starting the VM, the test clears all the bits. * Check if that's still the case. */ - vcpu_get_reg(vcpu, reg_info->reg, &val); + val = vcpu_get_reg(vcpu, reg_info->reg); TEST_ASSERT(val == 0, "Expected all the features to be cleared for reg: 0x%lx", reg_info->reg); diff --git a/tools/testing/selftests/kvm/aarch64/no-vgic-v3.c b/tools/testing/selftests/kvm/aarch64/no-vgic-v3.c index 58304bbc2036..ebd70430c89d 100644 --- a/tools/testing/selftests/kvm/aarch64/no-vgic-v3.c +++ b/tools/testing/selftests/kvm/aarch64/no-vgic-v3.c @@ -164,7 +164,7 @@ int main(int argc, char *argv[]) uint64_t pfr0; vm = vm_create_with_one_vcpu(&vcpu, NULL); - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), &pfr0); + pfr0 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1)); __TEST_REQUIRE(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC), pfr0), "GICv3 not supported."); kvm_vm_free(vm); diff --git a/tools/testing/selftests/kvm/aarch64/psci_test.c b/tools/testing/selftests/kvm/aarch64/psci_test.c index eaa7655fefc1..ab491ee9e5f7 100644 --- a/tools/testing/selftests/kvm/aarch64/psci_test.c +++ b/tools/testing/selftests/kvm/aarch64/psci_test.c @@ -111,8 +111,8 @@ static void assert_vcpu_reset(struct kvm_vcpu *vcpu) { uint64_t obs_pc, obs_x0; - vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc), &obs_pc); - vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.regs[0]), &obs_x0); + obs_pc = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc)); + obs_x0 = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.regs[0])); TEST_ASSERT(obs_pc == CPU_ON_ENTRY_ADDR, "unexpected target cpu pc: %lx (expected: %lx)", @@ -152,7 +152,7 @@ static void host_test_cpu_on(void) */ vcpu_power_off(target); - vcpu_get_reg(target, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1), &target_mpidr); + target_mpidr = vcpu_get_reg(target, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1)); vcpu_args_set(source, 1, target_mpidr & MPIDR_HWID_BITMASK); enter_guest(source); @@ -244,7 +244,7 @@ static void host_test_system_off2(void) setup_vm(guest_test_system_off2, &source, &target); - vcpu_get_reg(target, KVM_REG_ARM_PSCI_VERSION, &psci_version); + psci_version = vcpu_get_reg(target, KVM_REG_ARM_PSCI_VERSION); TEST_ASSERT(psci_version >= PSCI_VERSION(1, 3), "Unexpected PSCI version %lu.%lu", diff --git a/tools/testing/selftests/kvm/aarch64/set_id_regs.c b/tools/testing/selftests/kvm/aarch64/set_id_regs.c index a79b7f18452d..bc6cf50e5135 100644 --- a/tools/testing/selftests/kvm/aarch64/set_id_regs.c +++ b/tools/testing/selftests/kvm/aarch64/set_id_regs.c @@ -346,7 +346,7 @@ static uint64_t test_reg_set_success(struct kvm_vcpu *vcpu, uint64_t reg, uint64_t mask = ftr_bits->mask; uint64_t val, new_val, ftr; - vcpu_get_reg(vcpu, reg, &val); + val = vcpu_get_reg(vcpu, reg); ftr = (val & mask) >> shift; ftr = get_safe_value(ftr_bits, ftr); @@ -356,7 +356,7 @@ static uint64_t test_reg_set_success(struct kvm_vcpu *vcpu, uint64_t reg, val |= ftr; vcpu_set_reg(vcpu, reg, val); - vcpu_get_reg(vcpu, reg, &new_val); + new_val = vcpu_get_reg(vcpu, reg); TEST_ASSERT_EQ(new_val, val); return new_val; @@ -370,7 +370,7 @@ static void test_reg_set_fail(struct kvm_vcpu *vcpu, uint64_t reg, uint64_t val, old_val, ftr; int r; - vcpu_get_reg(vcpu, reg, &val); + val = vcpu_get_reg(vcpu, reg); ftr = (val & mask) >> shift; ftr = get_invalid_value(ftr_bits, ftr); @@ -384,7 +384,7 @@ static void test_reg_set_fail(struct kvm_vcpu *vcpu, uint64_t reg, TEST_ASSERT(r < 0 && errno == EINVAL, "Unexpected KVM_SET_ONE_REG error: r=%d, errno=%d", r, errno); - vcpu_get_reg(vcpu, reg, &val); + val = vcpu_get_reg(vcpu, reg); TEST_ASSERT_EQ(val, old_val); } @@ -471,7 +471,7 @@ static void test_user_set_mpam_reg(struct kvm_vcpu *vcpu) } /* Get the id register value */ - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), &val); + val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1)); /* Try to set MPAM=0. This should always be possible. */ val &= ~ID_AA64PFR0_EL1_MPAM_MASK; @@ -508,7 +508,7 @@ static void test_user_set_mpam_reg(struct kvm_vcpu *vcpu) } /* Get the id register value */ - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1), &val); + val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1)); /* Try to set MPAM_frac=0. This should always be possible. */ val &= ~ID_AA64PFR1_EL1_MPAM_frac_MASK; @@ -576,7 +576,7 @@ static void test_clidr(struct kvm_vcpu *vcpu) uint64_t clidr; int level; - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CLIDR_EL1), &clidr); + clidr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CLIDR_EL1)); /* find the first empty level in the cache hierarchy */ for (level = 1; level < 7; level++) { @@ -601,7 +601,7 @@ static void test_ctr(struct kvm_vcpu *vcpu) { u64 ctr; - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CTR_EL0), &ctr); + ctr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CTR_EL0)); ctr &= ~CTR_EL0_DIC_MASK; if (ctr & CTR_EL0_IminLine_MASK) ctr--; @@ -617,7 +617,7 @@ static void test_vcpu_ftr_id_regs(struct kvm_vcpu *vcpu) test_clidr(vcpu); test_ctr(vcpu); - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1), &val); + val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1)); val++; vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1), val); @@ -630,7 +630,7 @@ static void test_assert_id_reg_unchanged(struct kvm_vcpu *vcpu, uint32_t encodin size_t idx = encoding_to_range_idx(encoding); uint64_t observed; - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(encoding), &observed); + observed = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(encoding)); TEST_ASSERT_EQ(test_reg_vals[idx], observed); } @@ -665,7 +665,7 @@ int main(void) vm = vm_create_with_one_vcpu(&vcpu, guest_code); /* Check for AARCH64 only system */ - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), &val); + val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1)); el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), val); aarch64_only = (el0 == ID_AA64PFR0_EL1_ELx_64BIT_ONLY); diff --git a/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c b/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c index f9c0c86d7e85..f16b3b27e32e 100644 --- a/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c +++ b/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c @@ -440,8 +440,7 @@ static void create_vpmu_vm(void *guest_code) "Failed to create vgic-v3, skipping"); /* Make sure that PMUv3 support is indicated in the ID register */ - vcpu_get_reg(vpmu_vm.vcpu, - KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1), &dfr0); + dfr0 = vcpu_get_reg(vpmu_vm.vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1)); pmuver = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), dfr0); TEST_ASSERT(pmuver != ID_AA64DFR0_EL1_PMUVer_IMP_DEF && pmuver >= ID_AA64DFR0_EL1_PMUVer_IMP, @@ -484,7 +483,7 @@ static void test_create_vpmu_vm_with_pmcr_n(uint64_t pmcr_n, bool expect_fail) create_vpmu_vm(guest_code); vcpu = vpmu_vm.vcpu; - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0), &pmcr_orig); + pmcr_orig = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0)); pmcr = pmcr_orig; /* @@ -493,7 +492,7 @@ static void test_create_vpmu_vm_with_pmcr_n(uint64_t pmcr_n, bool expect_fail) */ set_pmcr_n(&pmcr, pmcr_n); vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0), pmcr); - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0), &pmcr); + pmcr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0)); if (expect_fail) TEST_ASSERT(pmcr_orig == pmcr, @@ -521,7 +520,7 @@ static void run_access_test(uint64_t pmcr_n) vcpu = vpmu_vm.vcpu; /* Save the initial sp to restore them later to run the guest again */ - vcpu_get_reg(vcpu, ARM64_CORE_REG(sp_el1), &sp); + sp = vcpu_get_reg(vcpu, ARM64_CORE_REG(sp_el1)); run_vcpu(vcpu, pmcr_n); @@ -572,12 +571,12 @@ static void run_pmregs_validity_test(uint64_t pmcr_n) * Test if the 'set' and 'clr' variants of the registers * are initialized based on the number of valid counters. */ - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id), ®_val); + reg_val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id)); TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0, "Initial read of set_reg: 0x%llx has unimplemented counters enabled: 0x%lx", KVM_ARM64_SYS_REG(set_reg_id), reg_val); - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(clr_reg_id), ®_val); + reg_val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(clr_reg_id)); TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0, "Initial read of clr_reg: 0x%llx has unimplemented counters enabled: 0x%lx", KVM_ARM64_SYS_REG(clr_reg_id), reg_val); @@ -589,12 +588,12 @@ static void run_pmregs_validity_test(uint64_t pmcr_n) */ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id), max_counters_mask); - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id), ®_val); + reg_val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id)); TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0, "Read of set_reg: 0x%llx has unimplemented counters enabled: 0x%lx", KVM_ARM64_SYS_REG(set_reg_id), reg_val); - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(clr_reg_id), ®_val); + reg_val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(clr_reg_id)); TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0, "Read of clr_reg: 0x%llx has unimplemented counters enabled: 0x%lx", KVM_ARM64_SYS_REG(clr_reg_id), reg_val); @@ -625,7 +624,7 @@ static uint64_t get_pmcr_n_limit(void) uint64_t pmcr; create_vpmu_vm(guest_code); - vcpu_get_reg(vpmu_vm.vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0), &pmcr); + pmcr = vcpu_get_reg(vpmu_vm.vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0)); destroy_vpmu_vm(); return get_pmcr_n(pmcr); } diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index bc7c242480d6..287a3ec06df4 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -702,11 +702,13 @@ static inline int __vcpu_set_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t va return __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, ®); } -static inline void vcpu_get_reg(struct kvm_vcpu *vcpu, uint64_t id, void *addr) +static inline uint64_t vcpu_get_reg(struct kvm_vcpu *vcpu, uint64_t id) { - struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)addr }; + uint64_t val; + struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)&val }; vcpu_ioctl(vcpu, KVM_GET_ONE_REG, ®); + return val; } static inline void vcpu_set_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t val) { diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index 698e34f39241..7ba3aa3755f3 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -281,8 +281,8 @@ void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init) */ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CPACR_EL1), 3 << 20); - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), &sctlr_el1); - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1), &tcr_el1); + sctlr_el1 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1)); + tcr_el1 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1)); /* Configure base granule size */ switch (vm->mode) { @@ -360,8 +360,8 @@ void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent) { uint64_t pstate, pc; - vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pstate), &pstate); - vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc), &pc); + pstate = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pstate)); + pc = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc)); fprintf(stream, "%*spstate: 0x%.16lx pc: 0x%.16lx\n", indent, "", pstate, pc); diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c index 6ae47b3d6b25..dd663bcf0cc0 100644 --- a/tools/testing/selftests/kvm/lib/riscv/processor.c +++ b/tools/testing/selftests/kvm/lib/riscv/processor.c @@ -221,39 +221,39 @@ void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent) { struct kvm_riscv_core core; - vcpu_get_reg(vcpu, RISCV_CORE_REG(mode), &core.mode); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.pc), &core.regs.pc); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.ra), &core.regs.ra); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.sp), &core.regs.sp); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.gp), &core.regs.gp); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.tp), &core.regs.tp); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t0), &core.regs.t0); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t1), &core.regs.t1); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t2), &core.regs.t2); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s0), &core.regs.s0); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s1), &core.regs.s1); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a0), &core.regs.a0); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a1), &core.regs.a1); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a2), &core.regs.a2); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a3), &core.regs.a3); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a4), &core.regs.a4); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a5), &core.regs.a5); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a6), &core.regs.a6); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a7), &core.regs.a7); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s2), &core.regs.s2); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s3), &core.regs.s3); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s4), &core.regs.s4); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s5), &core.regs.s5); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s6), &core.regs.s6); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s7), &core.regs.s7); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s8), &core.regs.s8); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s9), &core.regs.s9); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s10), &core.regs.s10); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s11), &core.regs.s11); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t3), &core.regs.t3); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t4), &core.regs.t4); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t5), &core.regs.t5); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t6), &core.regs.t6); + core.mode = vcpu_get_reg(vcpu, RISCV_CORE_REG(mode)); + core.regs.pc = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.pc)); + core.regs.ra = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.ra)); + core.regs.sp = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.sp)); + core.regs.gp = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.gp)); + core.regs.tp = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.tp)); + core.regs.t0 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t0)); + core.regs.t1 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t1)); + core.regs.t2 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t2)); + core.regs.s0 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s0)); + core.regs.s1 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s1)); + core.regs.a0 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a0)); + core.regs.a1 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a1)); + core.regs.a2 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a2)); + core.regs.a3 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a3)); + core.regs.a4 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a4)); + core.regs.a5 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a5)); + core.regs.a6 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a6)); + core.regs.a7 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a7)); + core.regs.s2 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s2)); + core.regs.s3 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s3)); + core.regs.s4 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s4)); + core.regs.s5 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s5)); + core.regs.s6 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s6)); + core.regs.s7 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s7)); + core.regs.s8 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s8)); + core.regs.s9 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s9)); + core.regs.s10 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s10)); + core.regs.s11 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s11)); + core.regs.t3 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t3)); + core.regs.t4 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t4)); + core.regs.t5 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t5)); + core.regs.t6 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t6)); fprintf(stream, " MODE: 0x%lx\n", core.mode); diff --git a/tools/testing/selftests/kvm/riscv/arch_timer.c b/tools/testing/selftests/kvm/riscv/arch_timer.c index 2c792228ac0b..9e370800a6a2 100644 --- a/tools/testing/selftests/kvm/riscv/arch_timer.c +++ b/tools/testing/selftests/kvm/riscv/arch_timer.c @@ -93,7 +93,7 @@ struct kvm_vm *test_vm_create(void) vcpu_init_vector_tables(vcpus[i]); /* Initialize guest timer frequency. */ - vcpu_get_reg(vcpus[0], RISCV_TIMER_REG(frequency), &timer_freq); + timer_freq = vcpu_get_reg(vcpus[0], RISCV_TIMER_REG(frequency)); sync_global_to_guest(vm, timer_freq); pr_debug("timer_freq: %lu\n", timer_freq); diff --git a/tools/testing/selftests/kvm/riscv/ebreak_test.c b/tools/testing/selftests/kvm/riscv/ebreak_test.c index 0e0712854953..cfed6c727bfc 100644 --- a/tools/testing/selftests/kvm/riscv/ebreak_test.c +++ b/tools/testing/selftests/kvm/riscv/ebreak_test.c @@ -60,7 +60,7 @@ int main(void) TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_DEBUG); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.pc), &pc); + pc = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.pc)); TEST_ASSERT_EQ(pc, LABEL_ADDRESS(sw_bp_1)); /* skip sw_bp_1 */ diff --git a/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c index f299cbfd23ca..f45c0ecc902d 100644 --- a/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c +++ b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c @@ -608,7 +608,7 @@ static void test_vm_events_overflow(void *guest_code) vcpu_init_vector_tables(vcpu); /* Initialize guest timer frequency. */ - vcpu_get_reg(vcpu, RISCV_TIMER_REG(frequency), &timer_freq); + timer_freq = vcpu_get_reg(vcpu, RISCV_TIMER_REG(frequency)); sync_global_to_guest(vm, timer_freq); run_vcpu(vcpu); diff --git a/tools/testing/selftests/kvm/s390x/resets.c b/tools/testing/selftests/kvm/s390x/resets.c index 357943f2bea8..b58f75b381e5 100644 --- a/tools/testing/selftests/kvm/s390x/resets.c +++ b/tools/testing/selftests/kvm/s390x/resets.c @@ -61,7 +61,7 @@ static void test_one_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t value) { uint64_t eval_reg; - vcpu_get_reg(vcpu, id, &eval_reg); + eval_reg = vcpu_get_reg(vcpu, id); TEST_ASSERT(eval_reg == value, "value == 0x%lx", value); } diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c index a8d3afa0b86b..cce2520af720 100644 --- a/tools/testing/selftests/kvm/steal_time.c +++ b/tools/testing/selftests/kvm/steal_time.c @@ -269,9 +269,8 @@ static void guest_code(int cpu) static bool is_steal_time_supported(struct kvm_vcpu *vcpu) { uint64_t id = RISCV_SBI_EXT_REG(KVM_RISCV_SBI_EXT_STA); - unsigned long enabled; + unsigned long enabled = vcpu_get_reg(vcpu, id); - vcpu_get_reg(vcpu, id, &enabled); TEST_ASSERT(enabled == 0 || enabled == 1, "Expected boolean result"); return enabled; -- cgit v1.2.3 From fe85ce31b2891611a2e4d788872be815cea85a4b Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Nov 2024 16:55:34 -0800 Subject: KVM: selftests: Assert that vcpu_{g,s}et_reg() won't truncate Assert that the register being read/written by vcpu_{g,s}et_reg() is no larger than a uint64_t, i.e. that a selftest isn't unintentionally truncating the value being read/written. Ideally, the assert would be done at compile-time, but that would limit the checks to hardcoded accesses and/or require fancier compile-time assertion infrastructure to filter out dynamic usage. Reviewed-by: Andrew Jones Link: https://lore.kernel.org/r/20241128005547.4077116-4-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/include/kvm_util.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 287a3ec06df4..4c4e5a847f67 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -707,6 +707,8 @@ static inline uint64_t vcpu_get_reg(struct kvm_vcpu *vcpu, uint64_t id) uint64_t val; struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)&val }; + TEST_ASSERT(KVM_REG_SIZE(id) <= sizeof(val), "Reg %lx too big", id); + vcpu_ioctl(vcpu, KVM_GET_ONE_REG, ®); return val; } @@ -714,6 +716,8 @@ static inline void vcpu_set_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t val { struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)&val }; + TEST_ASSERT(KVM_REG_SIZE(id) <= sizeof(val), "Reg %lx too big", id); + vcpu_ioctl(vcpu, KVM_SET_ONE_REG, ®); } -- cgit v1.2.3 From d6533c15133867cd3032bcab7f839ae5d53d0e70 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Nov 2024 16:55:35 -0800 Subject: KVM: selftests: Check for a potential unhandled exception iff KVM_RUN succeeded Don't check for an unhandled exception if KVM_RUN failed, e.g. if it returned errno=EFAULT, as reporting unhandled exceptions is done via a ucall, i.e. requires KVM_RUN to exit cleanly. Theoretically, checking for a ucall on a failed KVM_RUN could get a false positive, e.g. if there were stale data in vcpu->run from a previous exit. Reviewed-by: James Houghton Reviewed-by: Andrew Jones Link: https://lore.kernel.org/r/20241128005547.4077116-5-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/lib/kvm_util.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 480e3a40d197..33fefeb3ca44 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -1648,7 +1648,8 @@ int _vcpu_run(struct kvm_vcpu *vcpu) rc = __vcpu_run(vcpu); } while (rc == -1 && errno == EINTR); - assert_on_unhandled_exception(vcpu); + if (!rc) + assert_on_unhandled_exception(vcpu); return rc; } -- cgit v1.2.3 From b12391498d1e7ee49390cda34df4a5cc21700e9f Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Nov 2024 16:55:36 -0800 Subject: KVM: selftests: Rename max_guest_memory_test to mmu_stress_test Rename max_guest_memory_test to mmu_stress_test so that the name isn't horribly misleading when future changes extend the test to verify things like mprotect() interactions, and because the test is useful even when its configured to populate far less than the maximum amount of guest memory. Reviewed-by: James Houghton Reviewed-by: Andrew Jones Link: https://lore.kernel.org/r/20241128005547.4077116-6-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/Makefile | 2 +- .../testing/selftests/kvm/max_guest_memory_test.c | 289 --------------------- tools/testing/selftests/kvm/mmu_stress_test.c | 289 +++++++++++++++++++++ 3 files changed, 290 insertions(+), 290 deletions(-) delete mode 100644 tools/testing/selftests/kvm/max_guest_memory_test.c create mode 100644 tools/testing/selftests/kvm/mmu_stress_test.c (limited to 'tools') diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 41593d2e7de9..4384e5f45c36 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -140,7 +140,7 @@ TEST_GEN_PROGS_x86_64 += guest_print_test TEST_GEN_PROGS_x86_64 += hardware_disable_test TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus TEST_GEN_PROGS_x86_64 += kvm_page_table_test -TEST_GEN_PROGS_x86_64 += max_guest_memory_test +TEST_GEN_PROGS_x86_64 += mmu_stress_test TEST_GEN_PROGS_x86_64 += memslot_modification_stress_test TEST_GEN_PROGS_x86_64 += memslot_perf_test TEST_GEN_PROGS_x86_64 += rseq_test diff --git a/tools/testing/selftests/kvm/max_guest_memory_test.c b/tools/testing/selftests/kvm/max_guest_memory_test.c deleted file mode 100644 index 0b9678858b6d..000000000000 --- a/tools/testing/selftests/kvm/max_guest_memory_test.c +++ /dev/null @@ -1,289 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "kvm_util.h" -#include "test_util.h" -#include "guest_modes.h" -#include "processor.h" - -static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride) -{ - uint64_t gpa; - - for (;;) { - for (gpa = start_gpa; gpa < end_gpa; gpa += stride) - *((volatile uint64_t *)gpa) = gpa; - GUEST_SYNC(0); - } -} - -struct vcpu_info { - struct kvm_vcpu *vcpu; - uint64_t start_gpa; - uint64_t end_gpa; -}; - -static int nr_vcpus; -static atomic_t rendezvous; - -static void rendezvous_with_boss(void) -{ - int orig = atomic_read(&rendezvous); - - if (orig > 0) { - atomic_dec_and_test(&rendezvous); - while (atomic_read(&rendezvous) > 0) - cpu_relax(); - } else { - atomic_inc(&rendezvous); - while (atomic_read(&rendezvous) < 0) - cpu_relax(); - } -} - -static void run_vcpu(struct kvm_vcpu *vcpu) -{ - vcpu_run(vcpu); - TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_SYNC); -} - -static void *vcpu_worker(void *data) -{ - struct vcpu_info *info = data; - struct kvm_vcpu *vcpu = info->vcpu; - struct kvm_vm *vm = vcpu->vm; - struct kvm_sregs sregs; - - vcpu_args_set(vcpu, 3, info->start_gpa, info->end_gpa, vm->page_size); - - rendezvous_with_boss(); - - run_vcpu(vcpu); - rendezvous_with_boss(); - vcpu_sregs_get(vcpu, &sregs); -#ifdef __x86_64__ - /* Toggle CR0.WP to trigger a MMU context reset. */ - sregs.cr0 ^= X86_CR0_WP; -#endif - vcpu_sregs_set(vcpu, &sregs); - rendezvous_with_boss(); - - run_vcpu(vcpu); - rendezvous_with_boss(); - - return NULL; -} - -static pthread_t *spawn_workers(struct kvm_vm *vm, struct kvm_vcpu **vcpus, - uint64_t start_gpa, uint64_t end_gpa) -{ - struct vcpu_info *info; - uint64_t gpa, nr_bytes; - pthread_t *threads; - int i; - - threads = malloc(nr_vcpus * sizeof(*threads)); - TEST_ASSERT(threads, "Failed to allocate vCPU threads"); - - info = malloc(nr_vcpus * sizeof(*info)); - TEST_ASSERT(info, "Failed to allocate vCPU gpa ranges"); - - nr_bytes = ((end_gpa - start_gpa) / nr_vcpus) & - ~((uint64_t)vm->page_size - 1); - TEST_ASSERT(nr_bytes, "C'mon, no way you have %d CPUs", nr_vcpus); - - for (i = 0, gpa = start_gpa; i < nr_vcpus; i++, gpa += nr_bytes) { - info[i].vcpu = vcpus[i]; - info[i].start_gpa = gpa; - info[i].end_gpa = gpa + nr_bytes; - pthread_create(&threads[i], NULL, vcpu_worker, &info[i]); - } - return threads; -} - -static void rendezvous_with_vcpus(struct timespec *time, const char *name) -{ - int i, rendezvoused; - - pr_info("Waiting for vCPUs to finish %s...\n", name); - - rendezvoused = atomic_read(&rendezvous); - for (i = 0; abs(rendezvoused) != 1; i++) { - usleep(100); - if (!(i & 0x3f)) - pr_info("\r%d vCPUs haven't rendezvoused...", - abs(rendezvoused) - 1); - rendezvoused = atomic_read(&rendezvous); - } - - clock_gettime(CLOCK_MONOTONIC, time); - - /* Release the vCPUs after getting the time of the previous action. */ - pr_info("\rAll vCPUs finished %s, releasing...\n", name); - if (rendezvoused > 0) - atomic_set(&rendezvous, -nr_vcpus - 1); - else - atomic_set(&rendezvous, nr_vcpus + 1); -} - -static void calc_default_nr_vcpus(void) -{ - cpu_set_t possible_mask; - int r; - - r = sched_getaffinity(0, sizeof(possible_mask), &possible_mask); - TEST_ASSERT(!r, "sched_getaffinity failed, errno = %d (%s)", - errno, strerror(errno)); - - nr_vcpus = CPU_COUNT(&possible_mask) * 3/4; - TEST_ASSERT(nr_vcpus > 0, "Uh, no CPUs?"); -} - -int main(int argc, char *argv[]) -{ - /* - * Skip the first 4gb and slot0. slot0 maps <1gb and is used to back - * the guest's code, stack, and page tables. Because selftests creates - * an IRQCHIP, a.k.a. a local APIC, KVM creates an internal memslot - * just below the 4gb boundary. This test could create memory at - * 1gb-3gb,but it's simpler to skip straight to 4gb. - */ - const uint64_t start_gpa = SZ_4G; - const int first_slot = 1; - - struct timespec time_start, time_run1, time_reset, time_run2; - uint64_t max_gpa, gpa, slot_size, max_mem, i; - int max_slots, slot, opt, fd; - bool hugepages = false; - struct kvm_vcpu **vcpus; - pthread_t *threads; - struct kvm_vm *vm; - void *mem; - - /* - * Default to 2gb so that maxing out systems with MAXPHADDR=46, which - * are quite common for x86, requires changing only max_mem (KVM allows - * 32k memslots, 32k * 2gb == ~64tb of guest memory). - */ - slot_size = SZ_2G; - - max_slots = kvm_check_cap(KVM_CAP_NR_MEMSLOTS); - TEST_ASSERT(max_slots > first_slot, "KVM is broken"); - - /* All KVM MMUs should be able to survive a 128gb guest. */ - max_mem = 128ull * SZ_1G; - - calc_default_nr_vcpus(); - - while ((opt = getopt(argc, argv, "c:h:m:s:H")) != -1) { - switch (opt) { - case 'c': - nr_vcpus = atoi_positive("Number of vCPUs", optarg); - break; - case 'm': - max_mem = 1ull * atoi_positive("Memory size", optarg) * SZ_1G; - break; - case 's': - slot_size = 1ull * atoi_positive("Slot size", optarg) * SZ_1G; - break; - case 'H': - hugepages = true; - break; - case 'h': - default: - printf("usage: %s [-c nr_vcpus] [-m max_mem_in_gb] [-s slot_size_in_gb] [-H]\n", argv[0]); - exit(1); - } - } - - vcpus = malloc(nr_vcpus * sizeof(*vcpus)); - TEST_ASSERT(vcpus, "Failed to allocate vCPU array"); - - vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus); - - max_gpa = vm->max_gfn << vm->page_shift; - TEST_ASSERT(max_gpa > (4 * slot_size), "MAXPHYADDR <4gb "); - - fd = kvm_memfd_alloc(slot_size, hugepages); - mem = mmap(NULL, slot_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - TEST_ASSERT(mem != MAP_FAILED, "mmap() failed"); - - TEST_ASSERT(!madvise(mem, slot_size, MADV_NOHUGEPAGE), "madvise() failed"); - - /* Pre-fault the memory to avoid taking mmap_sem on guest page faults. */ - for (i = 0; i < slot_size; i += vm->page_size) - ((uint8_t *)mem)[i] = 0xaa; - - gpa = 0; - for (slot = first_slot; slot < max_slots; slot++) { - gpa = start_gpa + ((slot - first_slot) * slot_size); - if (gpa + slot_size > max_gpa) - break; - - if ((gpa - start_gpa) >= max_mem) - break; - - vm_set_user_memory_region(vm, slot, 0, gpa, slot_size, mem); - -#ifdef __x86_64__ - /* Identity map memory in the guest using 1gb pages. */ - for (i = 0; i < slot_size; i += SZ_1G) - __virt_pg_map(vm, gpa + i, gpa + i, PG_LEVEL_1G); -#else - for (i = 0; i < slot_size; i += vm->page_size) - virt_pg_map(vm, gpa + i, gpa + i); -#endif - } - - atomic_set(&rendezvous, nr_vcpus + 1); - threads = spawn_workers(vm, vcpus, start_gpa, gpa); - - free(vcpus); - vcpus = NULL; - - pr_info("Running with %lugb of guest memory and %u vCPUs\n", - (gpa - start_gpa) / SZ_1G, nr_vcpus); - - rendezvous_with_vcpus(&time_start, "spawning"); - rendezvous_with_vcpus(&time_run1, "run 1"); - rendezvous_with_vcpus(&time_reset, "reset"); - rendezvous_with_vcpus(&time_run2, "run 2"); - - time_run2 = timespec_sub(time_run2, time_reset); - time_reset = timespec_sub(time_reset, time_run1); - time_run1 = timespec_sub(time_run1, time_start); - - pr_info("run1 = %ld.%.9lds, reset = %ld.%.9lds, run2 = %ld.%.9lds\n", - time_run1.tv_sec, time_run1.tv_nsec, - time_reset.tv_sec, time_reset.tv_nsec, - time_run2.tv_sec, time_run2.tv_nsec); - - /* - * Delete even numbered slots (arbitrary) and unmap the first half of - * the backing (also arbitrary) to verify KVM correctly drops all - * references to the removed regions. - */ - for (slot = (slot - 1) & ~1ull; slot >= first_slot; slot -= 2) - vm_set_user_memory_region(vm, slot, 0, 0, 0, NULL); - - munmap(mem, slot_size / 2); - - /* Sanity check that the vCPUs actually ran. */ - for (i = 0; i < nr_vcpus; i++) - pthread_join(threads[i], NULL); - - /* - * Deliberately exit without deleting the remaining memslots or closing - * kvm_fd to test cleanup via mmu_notifier.release. - */ -} diff --git a/tools/testing/selftests/kvm/mmu_stress_test.c b/tools/testing/selftests/kvm/mmu_stress_test.c new file mode 100644 index 000000000000..0b9678858b6d --- /dev/null +++ b/tools/testing/selftests/kvm/mmu_stress_test.c @@ -0,0 +1,289 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "kvm_util.h" +#include "test_util.h" +#include "guest_modes.h" +#include "processor.h" + +static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride) +{ + uint64_t gpa; + + for (;;) { + for (gpa = start_gpa; gpa < end_gpa; gpa += stride) + *((volatile uint64_t *)gpa) = gpa; + GUEST_SYNC(0); + } +} + +struct vcpu_info { + struct kvm_vcpu *vcpu; + uint64_t start_gpa; + uint64_t end_gpa; +}; + +static int nr_vcpus; +static atomic_t rendezvous; + +static void rendezvous_with_boss(void) +{ + int orig = atomic_read(&rendezvous); + + if (orig > 0) { + atomic_dec_and_test(&rendezvous); + while (atomic_read(&rendezvous) > 0) + cpu_relax(); + } else { + atomic_inc(&rendezvous); + while (atomic_read(&rendezvous) < 0) + cpu_relax(); + } +} + +static void run_vcpu(struct kvm_vcpu *vcpu) +{ + vcpu_run(vcpu); + TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_SYNC); +} + +static void *vcpu_worker(void *data) +{ + struct vcpu_info *info = data; + struct kvm_vcpu *vcpu = info->vcpu; + struct kvm_vm *vm = vcpu->vm; + struct kvm_sregs sregs; + + vcpu_args_set(vcpu, 3, info->start_gpa, info->end_gpa, vm->page_size); + + rendezvous_with_boss(); + + run_vcpu(vcpu); + rendezvous_with_boss(); + vcpu_sregs_get(vcpu, &sregs); +#ifdef __x86_64__ + /* Toggle CR0.WP to trigger a MMU context reset. */ + sregs.cr0 ^= X86_CR0_WP; +#endif + vcpu_sregs_set(vcpu, &sregs); + rendezvous_with_boss(); + + run_vcpu(vcpu); + rendezvous_with_boss(); + + return NULL; +} + +static pthread_t *spawn_workers(struct kvm_vm *vm, struct kvm_vcpu **vcpus, + uint64_t start_gpa, uint64_t end_gpa) +{ + struct vcpu_info *info; + uint64_t gpa, nr_bytes; + pthread_t *threads; + int i; + + threads = malloc(nr_vcpus * sizeof(*threads)); + TEST_ASSERT(threads, "Failed to allocate vCPU threads"); + + info = malloc(nr_vcpus * sizeof(*info)); + TEST_ASSERT(info, "Failed to allocate vCPU gpa ranges"); + + nr_bytes = ((end_gpa - start_gpa) / nr_vcpus) & + ~((uint64_t)vm->page_size - 1); + TEST_ASSERT(nr_bytes, "C'mon, no way you have %d CPUs", nr_vcpus); + + for (i = 0, gpa = start_gpa; i < nr_vcpus; i++, gpa += nr_bytes) { + info[i].vcpu = vcpus[i]; + info[i].start_gpa = gpa; + info[i].end_gpa = gpa + nr_bytes; + pthread_create(&threads[i], NULL, vcpu_worker, &info[i]); + } + return threads; +} + +static void rendezvous_with_vcpus(struct timespec *time, const char *name) +{ + int i, rendezvoused; + + pr_info("Waiting for vCPUs to finish %s...\n", name); + + rendezvoused = atomic_read(&rendezvous); + for (i = 0; abs(rendezvoused) != 1; i++) { + usleep(100); + if (!(i & 0x3f)) + pr_info("\r%d vCPUs haven't rendezvoused...", + abs(rendezvoused) - 1); + rendezvoused = atomic_read(&rendezvous); + } + + clock_gettime(CLOCK_MONOTONIC, time); + + /* Release the vCPUs after getting the time of the previous action. */ + pr_info("\rAll vCPUs finished %s, releasing...\n", name); + if (rendezvoused > 0) + atomic_set(&rendezvous, -nr_vcpus - 1); + else + atomic_set(&rendezvous, nr_vcpus + 1); +} + +static void calc_default_nr_vcpus(void) +{ + cpu_set_t possible_mask; + int r; + + r = sched_getaffinity(0, sizeof(possible_mask), &possible_mask); + TEST_ASSERT(!r, "sched_getaffinity failed, errno = %d (%s)", + errno, strerror(errno)); + + nr_vcpus = CPU_COUNT(&possible_mask) * 3/4; + TEST_ASSERT(nr_vcpus > 0, "Uh, no CPUs?"); +} + +int main(int argc, char *argv[]) +{ + /* + * Skip the first 4gb and slot0. slot0 maps <1gb and is used to back + * the guest's code, stack, and page tables. Because selftests creates + * an IRQCHIP, a.k.a. a local APIC, KVM creates an internal memslot + * just below the 4gb boundary. This test could create memory at + * 1gb-3gb,but it's simpler to skip straight to 4gb. + */ + const uint64_t start_gpa = SZ_4G; + const int first_slot = 1; + + struct timespec time_start, time_run1, time_reset, time_run2; + uint64_t max_gpa, gpa, slot_size, max_mem, i; + int max_slots, slot, opt, fd; + bool hugepages = false; + struct kvm_vcpu **vcpus; + pthread_t *threads; + struct kvm_vm *vm; + void *mem; + + /* + * Default to 2gb so that maxing out systems with MAXPHADDR=46, which + * are quite common for x86, requires changing only max_mem (KVM allows + * 32k memslots, 32k * 2gb == ~64tb of guest memory). + */ + slot_size = SZ_2G; + + max_slots = kvm_check_cap(KVM_CAP_NR_MEMSLOTS); + TEST_ASSERT(max_slots > first_slot, "KVM is broken"); + + /* All KVM MMUs should be able to survive a 128gb guest. */ + max_mem = 128ull * SZ_1G; + + calc_default_nr_vcpus(); + + while ((opt = getopt(argc, argv, "c:h:m:s:H")) != -1) { + switch (opt) { + case 'c': + nr_vcpus = atoi_positive("Number of vCPUs", optarg); + break; + case 'm': + max_mem = 1ull * atoi_positive("Memory size", optarg) * SZ_1G; + break; + case 's': + slot_size = 1ull * atoi_positive("Slot size", optarg) * SZ_1G; + break; + case 'H': + hugepages = true; + break; + case 'h': + default: + printf("usage: %s [-c nr_vcpus] [-m max_mem_in_gb] [-s slot_size_in_gb] [-H]\n", argv[0]); + exit(1); + } + } + + vcpus = malloc(nr_vcpus * sizeof(*vcpus)); + TEST_ASSERT(vcpus, "Failed to allocate vCPU array"); + + vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus); + + max_gpa = vm->max_gfn << vm->page_shift; + TEST_ASSERT(max_gpa > (4 * slot_size), "MAXPHYADDR <4gb "); + + fd = kvm_memfd_alloc(slot_size, hugepages); + mem = mmap(NULL, slot_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + TEST_ASSERT(mem != MAP_FAILED, "mmap() failed"); + + TEST_ASSERT(!madvise(mem, slot_size, MADV_NOHUGEPAGE), "madvise() failed"); + + /* Pre-fault the memory to avoid taking mmap_sem on guest page faults. */ + for (i = 0; i < slot_size; i += vm->page_size) + ((uint8_t *)mem)[i] = 0xaa; + + gpa = 0; + for (slot = first_slot; slot < max_slots; slot++) { + gpa = start_gpa + ((slot - first_slot) * slot_size); + if (gpa + slot_size > max_gpa) + break; + + if ((gpa - start_gpa) >= max_mem) + break; + + vm_set_user_memory_region(vm, slot, 0, gpa, slot_size, mem); + +#ifdef __x86_64__ + /* Identity map memory in the guest using 1gb pages. */ + for (i = 0; i < slot_size; i += SZ_1G) + __virt_pg_map(vm, gpa + i, gpa + i, PG_LEVEL_1G); +#else + for (i = 0; i < slot_size; i += vm->page_size) + virt_pg_map(vm, gpa + i, gpa + i); +#endif + } + + atomic_set(&rendezvous, nr_vcpus + 1); + threads = spawn_workers(vm, vcpus, start_gpa, gpa); + + free(vcpus); + vcpus = NULL; + + pr_info("Running with %lugb of guest memory and %u vCPUs\n", + (gpa - start_gpa) / SZ_1G, nr_vcpus); + + rendezvous_with_vcpus(&time_start, "spawning"); + rendezvous_with_vcpus(&time_run1, "run 1"); + rendezvous_with_vcpus(&time_reset, "reset"); + rendezvous_with_vcpus(&time_run2, "run 2"); + + time_run2 = timespec_sub(time_run2, time_reset); + time_reset = timespec_sub(time_reset, time_run1); + time_run1 = timespec_sub(time_run1, time_start); + + pr_info("run1 = %ld.%.9lds, reset = %ld.%.9lds, run2 = %ld.%.9lds\n", + time_run1.tv_sec, time_run1.tv_nsec, + time_reset.tv_sec, time_reset.tv_nsec, + time_run2.tv_sec, time_run2.tv_nsec); + + /* + * Delete even numbered slots (arbitrary) and unmap the first half of + * the backing (also arbitrary) to verify KVM correctly drops all + * references to the removed regions. + */ + for (slot = (slot - 1) & ~1ull; slot >= first_slot; slot -= 2) + vm_set_user_memory_region(vm, slot, 0, 0, 0, NULL); + + munmap(mem, slot_size / 2); + + /* Sanity check that the vCPUs actually ran. */ + for (i = 0; i < nr_vcpus; i++) + pthread_join(threads[i], NULL); + + /* + * Deliberately exit without deleting the remaining memslots or closing + * kvm_fd to test cleanup via mmu_notifier.release. + */ +} -- cgit v1.2.3 From 55e164df482a48e168b26994197c2a848aae5959 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Nov 2024 16:55:37 -0800 Subject: KVM: selftests: Only muck with SREGS on x86 in mmu_stress_test Try to get/set SREGS in mmu_stress_test only when running on x86, as the ioctls are supported only by x86 and PPC, and the latter doesn't yet support KVM selftests. Reviewed-by: James Houghton Reviewed-by: Andrew Jones Link: https://lore.kernel.org/r/20241128005547.4077116-7-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/mmu_stress_test.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/mmu_stress_test.c b/tools/testing/selftests/kvm/mmu_stress_test.c index 0b9678858b6d..847da23ec1b1 100644 --- a/tools/testing/selftests/kvm/mmu_stress_test.c +++ b/tools/testing/selftests/kvm/mmu_stress_test.c @@ -59,10 +59,10 @@ static void run_vcpu(struct kvm_vcpu *vcpu) static void *vcpu_worker(void *data) { + struct kvm_sregs __maybe_unused sregs; struct vcpu_info *info = data; struct kvm_vcpu *vcpu = info->vcpu; struct kvm_vm *vm = vcpu->vm; - struct kvm_sregs sregs; vcpu_args_set(vcpu, 3, info->start_gpa, info->end_gpa, vm->page_size); @@ -70,12 +70,12 @@ static void *vcpu_worker(void *data) run_vcpu(vcpu); rendezvous_with_boss(); - vcpu_sregs_get(vcpu, &sregs); #ifdef __x86_64__ + vcpu_sregs_get(vcpu, &sregs); /* Toggle CR0.WP to trigger a MMU context reset. */ sregs.cr0 ^= X86_CR0_WP; -#endif vcpu_sregs_set(vcpu, &sregs); +#endif rendezvous_with_boss(); run_vcpu(vcpu); -- cgit v1.2.3 From 1ddd3ea75ac3be79dbd800507dd7e08928bd454d Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Nov 2024 16:55:38 -0800 Subject: KVM: selftests: Compute number of extra pages needed in mmu_stress_test Create mmu_stress_tests's VM with the correct number of extra pages needed to map all of memory in the guest. The bug hasn't been noticed before as the test currently runs only on x86, which maps guest memory with 1GiB pages, i.e. doesn't need much memory in the guest for page tables. Reviewed-by: James Houghton Reviewed-by: Andrew Jones Link: https://lore.kernel.org/r/20241128005547.4077116-8-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/mmu_stress_test.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/mmu_stress_test.c b/tools/testing/selftests/kvm/mmu_stress_test.c index 847da23ec1b1..5467b12f5903 100644 --- a/tools/testing/selftests/kvm/mmu_stress_test.c +++ b/tools/testing/selftests/kvm/mmu_stress_test.c @@ -209,7 +209,13 @@ int main(int argc, char *argv[]) vcpus = malloc(nr_vcpus * sizeof(*vcpus)); TEST_ASSERT(vcpus, "Failed to allocate vCPU array"); - vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus); + vm = __vm_create_with_vcpus(VM_SHAPE_DEFAULT, nr_vcpus, +#ifdef __x86_64__ + max_mem / SZ_1G, +#else + max_mem / vm_guest_mode_params[VM_MODE_DEFAULT].page_size, +#endif + guest_code, vcpus); max_gpa = vm->max_gfn << vm->page_shift; TEST_ASSERT(max_gpa > (4 * slot_size), "MAXPHYADDR <4gb "); -- cgit v1.2.3 From c35d8f579e50328f539bc049cc057f2158bb8e60 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Nov 2024 16:55:39 -0800 Subject: KVM: sefltests: Explicitly include ucall_common.h in mmu_stress_test.c Explicitly include ucall_common.h in the MMU stress test, as unlike arm64 and x86-64, RISC-V doesn't include ucall_common.h in its processor.h, i.e. this will allow enabling the test on RISC-V. Reported-by: Andrew Jones Link: https://lore.kernel.org/r/20241128005547.4077116-9-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/mmu_stress_test.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/mmu_stress_test.c b/tools/testing/selftests/kvm/mmu_stress_test.c index 5467b12f5903..fbb693428a82 100644 --- a/tools/testing/selftests/kvm/mmu_stress_test.c +++ b/tools/testing/selftests/kvm/mmu_stress_test.c @@ -15,6 +15,7 @@ #include "test_util.h" #include "guest_modes.h" #include "processor.h" +#include "ucall_common.h" static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride) { -- cgit v1.2.3 From 8abe7632a1eebdfac2c553a21b4f980db416c166 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Nov 2024 16:55:40 -0800 Subject: KVM: selftests: Enable mmu_stress_test on arm64 Enable the mmu_stress_test on arm64. The intent was to enable the test across all architectures when it was first added, but a few goofs made it unrunnable on !x86. Now that those goofs are fixed, at least for arm64, enable the test. Cc: Oliver Upton Cc: Marc Zyngier Reviewed-by: James Houghton Reviewed-by: Andrew Jones Link: https://lore.kernel.org/r/20241128005547.4077116-10-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/Makefile | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 4384e5f45c36..c59a337cd4da 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -180,6 +180,7 @@ TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus TEST_GEN_PROGS_aarch64 += kvm_page_table_test TEST_GEN_PROGS_aarch64 += memslot_modification_stress_test TEST_GEN_PROGS_aarch64 += memslot_perf_test +TEST_GEN_PROGS_aarch64 += mmu_stress_test TEST_GEN_PROGS_aarch64 += rseq_test TEST_GEN_PROGS_aarch64 += set_memory_region_test TEST_GEN_PROGS_aarch64 += steal_time -- cgit v1.2.3 From 3a042252640450fd288c56953bf21583d5198fba Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Nov 2024 16:55:41 -0800 Subject: KVM: selftests: Use vcpu_arch_put_guest() in mmu_stress_test Use vcpu_arch_put_guest() to write memory from the guest in mmu_stress_test as an easy way to provide a bit of extra coverage. Reviewed-by: James Houghton Reviewed-by: Andrew Jones Link: https://lore.kernel.org/r/20241128005547.4077116-11-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/mmu_stress_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/mmu_stress_test.c b/tools/testing/selftests/kvm/mmu_stress_test.c index fbb693428a82..656a837c7f49 100644 --- a/tools/testing/selftests/kvm/mmu_stress_test.c +++ b/tools/testing/selftests/kvm/mmu_stress_test.c @@ -23,7 +23,7 @@ static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride) for (;;) { for (gpa = start_gpa; gpa < end_gpa; gpa += stride) - *((volatile uint64_t *)gpa) = gpa; + vcpu_arch_put_guest(*((volatile uint64_t *)gpa), gpa); GUEST_SYNC(0); } } -- cgit v1.2.3 From 82b542e1184884885fe2b4aabd47672540db02c7 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Nov 2024 16:55:42 -0800 Subject: KVM: selftests: Precisely limit the number of guest loops in mmu_stress_test Run the exact number of guest loops required in mmu_stress_test instead of looping indefinitely in anticipation of adding more stages that run different code (e.g. reads instead of writes). Reviewed-by: James Houghton Reviewed-by: Andrew Jones Link: https://lore.kernel.org/r/20241128005547.4077116-12-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/mmu_stress_test.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/mmu_stress_test.c b/tools/testing/selftests/kvm/mmu_stress_test.c index 656a837c7f49..c6bf18cb7c89 100644 --- a/tools/testing/selftests/kvm/mmu_stress_test.c +++ b/tools/testing/selftests/kvm/mmu_stress_test.c @@ -20,12 +20,15 @@ static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride) { uint64_t gpa; + int i; - for (;;) { + for (i = 0; i < 2; i++) { for (gpa = start_gpa; gpa < end_gpa; gpa += stride) vcpu_arch_put_guest(*((volatile uint64_t *)gpa), gpa); - GUEST_SYNC(0); + GUEST_SYNC(i); } + + GUEST_ASSERT(0); } struct vcpu_info { @@ -52,10 +55,18 @@ static void rendezvous_with_boss(void) } } -static void run_vcpu(struct kvm_vcpu *vcpu) +static void assert_sync_stage(struct kvm_vcpu *vcpu, int stage) +{ + struct ucall uc; + + TEST_ASSERT_EQ(get_ucall(vcpu, &uc), UCALL_SYNC); + TEST_ASSERT_EQ(uc.args[1], stage); +} + +static void run_vcpu(struct kvm_vcpu *vcpu, int stage) { vcpu_run(vcpu); - TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_SYNC); + assert_sync_stage(vcpu, stage); } static void *vcpu_worker(void *data) @@ -69,7 +80,8 @@ static void *vcpu_worker(void *data) rendezvous_with_boss(); - run_vcpu(vcpu); + /* Stage 0, write all of guest memory. */ + run_vcpu(vcpu, 0); rendezvous_with_boss(); #ifdef __x86_64__ vcpu_sregs_get(vcpu, &sregs); @@ -79,7 +91,8 @@ static void *vcpu_worker(void *data) #endif rendezvous_with_boss(); - run_vcpu(vcpu); + /* Stage 1, re-write all of guest memory. */ + run_vcpu(vcpu, 1); rendezvous_with_boss(); return NULL; -- cgit v1.2.3 From 80b7859a3a43ff8bb924947a03b144626aeb1d0c Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Nov 2024 16:55:43 -0800 Subject: KVM: selftests: Add a read-only mprotect() phase to mmu_stress_test Add a third phase of mmu_stress_test to verify that mprotect()ing guest memory to make it read-only doesn't cause explosions, e.g. to verify KVM correctly handles the resulting mmu_notifier invalidations. Reviewed-by: James Houghton Reviewed-by: Andrew Jones Link: https://lore.kernel.org/r/20241128005547.4077116-13-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/mmu_stress_test.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/mmu_stress_test.c b/tools/testing/selftests/kvm/mmu_stress_test.c index c6bf18cb7c89..0918fade9267 100644 --- a/tools/testing/selftests/kvm/mmu_stress_test.c +++ b/tools/testing/selftests/kvm/mmu_stress_test.c @@ -28,6 +28,10 @@ static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride) GUEST_SYNC(i); } + for (gpa = start_gpa; gpa < end_gpa; gpa += stride) + *((volatile uint64_t *)gpa); + GUEST_SYNC(2); + GUEST_ASSERT(0); } @@ -95,6 +99,10 @@ static void *vcpu_worker(void *data) run_vcpu(vcpu, 1); rendezvous_with_boss(); + /* Stage 2, read all of guest memory, which is now read-only. */ + run_vcpu(vcpu, 2); + rendezvous_with_boss(); + return NULL; } @@ -175,7 +183,7 @@ int main(int argc, char *argv[]) const uint64_t start_gpa = SZ_4G; const int first_slot = 1; - struct timespec time_start, time_run1, time_reset, time_run2; + struct timespec time_start, time_run1, time_reset, time_run2, time_ro; uint64_t max_gpa, gpa, slot_size, max_mem, i; int max_slots, slot, opt, fd; bool hugepages = false; @@ -279,14 +287,20 @@ int main(int argc, char *argv[]) rendezvous_with_vcpus(&time_reset, "reset"); rendezvous_with_vcpus(&time_run2, "run 2"); + mprotect(mem, slot_size, PROT_READ); + rendezvous_with_vcpus(&time_ro, "mprotect RO"); + + time_ro = timespec_sub(time_ro, time_run2); time_run2 = timespec_sub(time_run2, time_reset); - time_reset = timespec_sub(time_reset, time_run1); + time_reset = timespec_sub(time_reset, time_run1); time_run1 = timespec_sub(time_run1, time_start); - pr_info("run1 = %ld.%.9lds, reset = %ld.%.9lds, run2 = %ld.%.9lds\n", + pr_info("run1 = %ld.%.9lds, reset = %ld.%.9lds, run2 = %ld.%.9lds, " + "ro = %ld.%.9lds\n", time_run1.tv_sec, time_run1.tv_nsec, time_reset.tv_sec, time_reset.tv_nsec, - time_run2.tv_sec, time_run2.tv_nsec); + time_run2.tv_sec, time_run2.tv_nsec, + time_ro.tv_sec, time_ro.tv_nsec); /* * Delete even numbered slots (arbitrary) and unmap the first half of -- cgit v1.2.3 From b6c304aec6483f6cd254df690eda35b225cd856c Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Nov 2024 16:55:44 -0800 Subject: KVM: selftests: Verify KVM correctly handles mprotect(PROT_READ) Add two phases to mmu_stress_test to verify that KVM correctly handles guest memory that was writable, and then made read-only in the primary MMU, and then made writable again. Add bonus coverage for x86 and arm64 to verify that all of guest memory was marked read-only. Making forward progress (without making memory writable) requires arch specific code to skip over the faulting instruction, but the test can at least verify each vCPU's starting page was made read-only for other architectures. Link: https://lore.kernel.org/r/20241128005547.4077116-14-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/mmu_stress_test.c | 104 +++++++++++++++++++++++++- 1 file changed, 101 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/mmu_stress_test.c b/tools/testing/selftests/kvm/mmu_stress_test.c index 0918fade9267..d9c76b4c0d88 100644 --- a/tools/testing/selftests/kvm/mmu_stress_test.c +++ b/tools/testing/selftests/kvm/mmu_stress_test.c @@ -17,6 +17,8 @@ #include "processor.h" #include "ucall_common.h" +static bool mprotect_ro_done; + static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride) { uint64_t gpa; @@ -32,6 +34,42 @@ static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride) *((volatile uint64_t *)gpa); GUEST_SYNC(2); + /* + * Write to the region while mprotect(PROT_READ) is underway. Keep + * looping until the memory is guaranteed to be read-only, otherwise + * vCPUs may complete their writes and advance to the next stage + * prematurely. + * + * For architectures that support skipping the faulting instruction, + * generate the store via inline assembly to ensure the exact length + * of the instruction is known and stable (vcpu_arch_put_guest() on + * fixed-length architectures should work, but the cost of paranoia + * is low in this case). For x86, hand-code the exact opcode so that + * there is no room for variability in the generated instruction. + */ + do { + for (gpa = start_gpa; gpa < end_gpa; gpa += stride) +#ifdef __x86_64__ + asm volatile(".byte 0x48,0x89,0x00" :: "a"(gpa) : "memory"); /* mov %rax, (%rax) */ +#elif defined(__aarch64__) + asm volatile("str %0, [%0]" :: "r" (gpa) : "memory"); +#else + vcpu_arch_put_guest(*((volatile uint64_t *)gpa), gpa); +#endif + } while (!READ_ONCE(mprotect_ro_done)); + + /* + * Only architectures that write the entire range can explicitly sync, + * as other architectures will be stuck on the write fault. + */ +#if defined(__x86_64__) || defined(__aarch64__) + GUEST_SYNC(3); +#endif + + for (gpa = start_gpa; gpa < end_gpa; gpa += stride) + vcpu_arch_put_guest(*((volatile uint64_t *)gpa), gpa); + GUEST_SYNC(4); + GUEST_ASSERT(0); } @@ -79,6 +117,7 @@ static void *vcpu_worker(void *data) struct vcpu_info *info = data; struct kvm_vcpu *vcpu = info->vcpu; struct kvm_vm *vm = vcpu->vm; + int r; vcpu_args_set(vcpu, 3, info->start_gpa, info->end_gpa, vm->page_size); @@ -101,6 +140,57 @@ static void *vcpu_worker(void *data) /* Stage 2, read all of guest memory, which is now read-only. */ run_vcpu(vcpu, 2); + + /* + * Stage 3, write guest memory and verify KVM returns -EFAULT for once + * the mprotect(PROT_READ) lands. Only architectures that support + * validating *all* of guest memory sync for this stage, as vCPUs will + * be stuck on the faulting instruction for other architectures. Go to + * stage 3 without a rendezvous + */ + do { + r = _vcpu_run(vcpu); + } while (!r); + TEST_ASSERT(r == -1 && errno == EFAULT, + "Expected EFAULT on write to RO memory, got r = %d, errno = %d", r, errno); + +#if defined(__x86_64__) || defined(__aarch64__) + /* + * Verify *all* writes from the guest hit EFAULT due to the VMA now + * being read-only. x86 and arm64 only at this time as skipping the + * instruction that hits the EFAULT requires advancing the program + * counter, which is arch specific and relies on inline assembly. + */ +#ifdef __x86_64__ + vcpu->run->kvm_valid_regs = KVM_SYNC_X86_REGS; +#endif + for (;;) { + r = _vcpu_run(vcpu); + if (!r) + break; + TEST_ASSERT_EQ(errno, EFAULT); +#if defined(__x86_64__) + WRITE_ONCE(vcpu->run->kvm_dirty_regs, KVM_SYNC_X86_REGS); + vcpu->run->s.regs.regs.rip += 3; +#elif defined(__aarch64__) + vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc), + vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc)) + 4); +#endif + + } + assert_sync_stage(vcpu, 3); +#endif /* __x86_64__ || __aarch64__ */ + rendezvous_with_boss(); + + /* + * Stage 4. Run to completion, waiting for mprotect(PROT_WRITE) to + * make the memory writable again. + */ + do { + r = _vcpu_run(vcpu); + } while (r && errno == EFAULT); + TEST_ASSERT_EQ(r, 0); + assert_sync_stage(vcpu, 4); rendezvous_with_boss(); return NULL; @@ -183,7 +273,7 @@ int main(int argc, char *argv[]) const uint64_t start_gpa = SZ_4G; const int first_slot = 1; - struct timespec time_start, time_run1, time_reset, time_run2, time_ro; + struct timespec time_start, time_run1, time_reset, time_run2, time_ro, time_rw; uint64_t max_gpa, gpa, slot_size, max_mem, i; int max_slots, slot, opt, fd; bool hugepages = false; @@ -288,19 +378,27 @@ int main(int argc, char *argv[]) rendezvous_with_vcpus(&time_run2, "run 2"); mprotect(mem, slot_size, PROT_READ); + usleep(10); + mprotect_ro_done = true; + sync_global_to_guest(vm, mprotect_ro_done); + rendezvous_with_vcpus(&time_ro, "mprotect RO"); + mprotect(mem, slot_size, PROT_READ | PROT_WRITE); + rendezvous_with_vcpus(&time_rw, "mprotect RW"); + time_rw = timespec_sub(time_rw, time_ro); time_ro = timespec_sub(time_ro, time_run2); time_run2 = timespec_sub(time_run2, time_reset); time_reset = timespec_sub(time_reset, time_run1); time_run1 = timespec_sub(time_run1, time_start); pr_info("run1 = %ld.%.9lds, reset = %ld.%.9lds, run2 = %ld.%.9lds, " - "ro = %ld.%.9lds\n", + "ro = %ld.%.9lds, rw = %ld.%.9lds\n", time_run1.tv_sec, time_run1.tv_nsec, time_reset.tv_sec, time_reset.tv_nsec, time_run2.tv_sec, time_run2.tv_nsec, - time_ro.tv_sec, time_ro.tv_nsec); + time_ro.tv_sec, time_ro.tv_nsec, + time_rw.tv_sec, time_rw.tv_nsec); /* * Delete even numbered slots (arbitrary) and unmap the first half of -- cgit v1.2.3 From 43fbd8cd389faa9760c5152b1c58e893c812953b Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Nov 2024 16:55:45 -0800 Subject: KVM: selftests: Provide empty 'all' and 'clean' targets for unsupported ARCHs Provide empty targets for KVM selftests if the target architecture is unsupported to make it obvious which architectures are supported, and so that various side effects don't fail and/or do weird things, e.g. as is, "mkdir -p $(sort $(dir $(TEST_GEN_PROGS)))" fails due to a missing operand, and conversely, "$(shell mkdir -p $(sort $(OUTPUT)/$(ARCH_DIR) ..." will create an empty, useless directory for the unsupported architecture. Move the guts of the Makefile to Makefile.kvm so that it's easier to see that the if-statement effectively guards all of KVM selftests. Reported-by: Muhammad Usama Anjum Acked-by: Muhammad Usama Anjum Acked-by: Andrew Jones Link: https://lore.kernel.org/r/20241128005547.4077116-15-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/.gitignore | 1 + tools/testing/selftests/kvm/Makefile | 336 +------------------------------ tools/testing/selftests/kvm/Makefile.kvm | 334 ++++++++++++++++++++++++++++++ 3 files changed, 340 insertions(+), 331 deletions(-) create mode 100644 tools/testing/selftests/kvm/Makefile.kvm (limited to 'tools') diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 7f57abf936e7..1d41a046a7bf 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -9,3 +9,4 @@ !config !settings !Makefile +!Makefile.kvm \ No newline at end of file diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index c59a337cd4da..7b33464bf8cc 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -1,12 +1,9 @@ # SPDX-License-Identifier: GPL-2.0-only -include ../../../build/Build.include - -all: - top_srcdir = ../../../.. include $(top_srcdir)/scripts/subarch.include ARCH ?= $(SUBARCH) +ifeq ($(ARCH),$(filter $(ARCH),arm64 s390 riscv x86 x86_64)) ifeq ($(ARCH),x86) ARCH_DIR := x86_64 else ifeq ($(ARCH),arm64) @@ -17,332 +14,9 @@ else ARCH_DIR := $(ARCH) endif -LIBKVM += lib/assert.c -LIBKVM += lib/elf.c -LIBKVM += lib/guest_modes.c -LIBKVM += lib/io.c -LIBKVM += lib/kvm_util.c -LIBKVM += lib/memstress.c -LIBKVM += lib/guest_sprintf.c -LIBKVM += lib/rbtree.c -LIBKVM += lib/sparsebit.c -LIBKVM += lib/test_util.c -LIBKVM += lib/ucall_common.c -LIBKVM += lib/userfaultfd_util.c - -LIBKVM_STRING += lib/string_override.c - -LIBKVM_x86_64 += lib/x86_64/apic.c -LIBKVM_x86_64 += lib/x86_64/handlers.S -LIBKVM_x86_64 += lib/x86_64/hyperv.c -LIBKVM_x86_64 += lib/x86_64/memstress.c -LIBKVM_x86_64 += lib/x86_64/pmu.c -LIBKVM_x86_64 += lib/x86_64/processor.c -LIBKVM_x86_64 += lib/x86_64/sev.c -LIBKVM_x86_64 += lib/x86_64/svm.c -LIBKVM_x86_64 += lib/x86_64/ucall.c -LIBKVM_x86_64 += lib/x86_64/vmx.c - -LIBKVM_aarch64 += lib/aarch64/gic.c -LIBKVM_aarch64 += lib/aarch64/gic_v3.c -LIBKVM_aarch64 += lib/aarch64/gic_v3_its.c -LIBKVM_aarch64 += lib/aarch64/handlers.S -LIBKVM_aarch64 += lib/aarch64/processor.c -LIBKVM_aarch64 += lib/aarch64/spinlock.c -LIBKVM_aarch64 += lib/aarch64/ucall.c -LIBKVM_aarch64 += lib/aarch64/vgic.c - -LIBKVM_s390x += lib/s390x/diag318_test_handler.c -LIBKVM_s390x += lib/s390x/processor.c -LIBKVM_s390x += lib/s390x/ucall.c -LIBKVM_s390x += lib/s390x/facility.c - -LIBKVM_riscv += lib/riscv/handlers.S -LIBKVM_riscv += lib/riscv/processor.c -LIBKVM_riscv += lib/riscv/ucall.c - -# Non-compiled test targets -TEST_PROGS_x86_64 += x86_64/nx_huge_pages_test.sh - -# Compiled test targets -TEST_GEN_PROGS_x86_64 = x86_64/cpuid_test -TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test -TEST_GEN_PROGS_x86_64 += x86_64/dirty_log_page_splitting_test -TEST_GEN_PROGS_x86_64 += x86_64/feature_msrs_test -TEST_GEN_PROGS_x86_64 += x86_64/exit_on_emulation_failure_test -TEST_GEN_PROGS_x86_64 += x86_64/fix_hypercall_test -TEST_GEN_PROGS_x86_64 += x86_64/hwcr_msr_test -TEST_GEN_PROGS_x86_64 += x86_64/hyperv_clock -TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid -TEST_GEN_PROGS_x86_64 += x86_64/hyperv_evmcs -TEST_GEN_PROGS_x86_64 += x86_64/hyperv_extended_hypercalls -TEST_GEN_PROGS_x86_64 += x86_64/hyperv_features -TEST_GEN_PROGS_x86_64 += x86_64/hyperv_ipi -TEST_GEN_PROGS_x86_64 += x86_64/hyperv_svm_test -TEST_GEN_PROGS_x86_64 += x86_64/hyperv_tlb_flush -TEST_GEN_PROGS_x86_64 += x86_64/kvm_clock_test -TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test -TEST_GEN_PROGS_x86_64 += x86_64/monitor_mwait_test -TEST_GEN_PROGS_x86_64 += x86_64/nested_exceptions_test -TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test -TEST_GEN_PROGS_x86_64 += x86_64/pmu_counters_test -TEST_GEN_PROGS_x86_64 += x86_64/pmu_event_filter_test -TEST_GEN_PROGS_x86_64 += x86_64/private_mem_conversions_test -TEST_GEN_PROGS_x86_64 += x86_64/private_mem_kvm_exits_test -TEST_GEN_PROGS_x86_64 += x86_64/set_boot_cpu_id -TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test -TEST_GEN_PROGS_x86_64 += x86_64/smaller_maxphyaddr_emulation_test -TEST_GEN_PROGS_x86_64 += x86_64/smm_test -TEST_GEN_PROGS_x86_64 += x86_64/state_test -TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test -TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test -TEST_GEN_PROGS_x86_64 += x86_64/svm_int_ctl_test -TEST_GEN_PROGS_x86_64 += x86_64/svm_nested_shutdown_test -TEST_GEN_PROGS_x86_64 += x86_64/svm_nested_soft_inject_test -TEST_GEN_PROGS_x86_64 += x86_64/tsc_scaling_sync -TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test -TEST_GEN_PROGS_x86_64 += x86_64/ucna_injection_test -TEST_GEN_PROGS_x86_64 += x86_64/userspace_io_test -TEST_GEN_PROGS_x86_64 += x86_64/userspace_msr_exit_test -TEST_GEN_PROGS_x86_64 += x86_64/vmx_apic_access_test -TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test -TEST_GEN_PROGS_x86_64 += x86_64/vmx_dirty_log_test -TEST_GEN_PROGS_x86_64 += x86_64/vmx_exception_with_invalid_guest_state -TEST_GEN_PROGS_x86_64 += x86_64/vmx_msrs_test -TEST_GEN_PROGS_x86_64 += x86_64/vmx_invalid_nested_guest_state -TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test -TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test -TEST_GEN_PROGS_x86_64 += x86_64/vmx_nested_tsc_scaling_test -TEST_GEN_PROGS_x86_64 += x86_64/apic_bus_clock_test -TEST_GEN_PROGS_x86_64 += x86_64/xapic_ipi_test -TEST_GEN_PROGS_x86_64 += x86_64/xapic_state_test -TEST_GEN_PROGS_x86_64 += x86_64/xcr0_cpuid_test -TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test -TEST_GEN_PROGS_x86_64 += x86_64/debug_regs -TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test -TEST_GEN_PROGS_x86_64 += x86_64/vmx_pmu_caps_test -TEST_GEN_PROGS_x86_64 += x86_64/xen_shinfo_test -TEST_GEN_PROGS_x86_64 += x86_64/xen_vmcall_test -TEST_GEN_PROGS_x86_64 += x86_64/sev_init2_tests -TEST_GEN_PROGS_x86_64 += x86_64/sev_migrate_tests -TEST_GEN_PROGS_x86_64 += x86_64/sev_smoke_test -TEST_GEN_PROGS_x86_64 += x86_64/amx_test -TEST_GEN_PROGS_x86_64 += x86_64/max_vcpuid_cap_test -TEST_GEN_PROGS_x86_64 += x86_64/triple_fault_event_test -TEST_GEN_PROGS_x86_64 += x86_64/recalc_apic_map_test -TEST_GEN_PROGS_x86_64 += access_tracking_perf_test -TEST_GEN_PROGS_x86_64 += coalesced_io_test -TEST_GEN_PROGS_x86_64 += demand_paging_test -TEST_GEN_PROGS_x86_64 += dirty_log_test -TEST_GEN_PROGS_x86_64 += dirty_log_perf_test -TEST_GEN_PROGS_x86_64 += guest_memfd_test -TEST_GEN_PROGS_x86_64 += guest_print_test -TEST_GEN_PROGS_x86_64 += hardware_disable_test -TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus -TEST_GEN_PROGS_x86_64 += kvm_page_table_test -TEST_GEN_PROGS_x86_64 += mmu_stress_test -TEST_GEN_PROGS_x86_64 += memslot_modification_stress_test -TEST_GEN_PROGS_x86_64 += memslot_perf_test -TEST_GEN_PROGS_x86_64 += rseq_test -TEST_GEN_PROGS_x86_64 += set_memory_region_test -TEST_GEN_PROGS_x86_64 += steal_time -TEST_GEN_PROGS_x86_64 += kvm_binary_stats_test -TEST_GEN_PROGS_x86_64 += system_counter_offset_test -TEST_GEN_PROGS_x86_64 += pre_fault_memory_test - -# Compiled outputs used by test targets -TEST_GEN_PROGS_EXTENDED_x86_64 += x86_64/nx_huge_pages_test - -TEST_GEN_PROGS_aarch64 += aarch64/aarch32_id_regs -TEST_GEN_PROGS_aarch64 += aarch64/arch_timer_edge_cases -TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions -TEST_GEN_PROGS_aarch64 += aarch64/hypercalls -TEST_GEN_PROGS_aarch64 += aarch64/mmio_abort -TEST_GEN_PROGS_aarch64 += aarch64/page_fault_test -TEST_GEN_PROGS_aarch64 += aarch64/psci_test -TEST_GEN_PROGS_aarch64 += aarch64/set_id_regs -TEST_GEN_PROGS_aarch64 += aarch64/smccc_filter -TEST_GEN_PROGS_aarch64 += aarch64/vcpu_width_config -TEST_GEN_PROGS_aarch64 += aarch64/vgic_init -TEST_GEN_PROGS_aarch64 += aarch64/vgic_irq -TEST_GEN_PROGS_aarch64 += aarch64/vgic_lpi_stress -TEST_GEN_PROGS_aarch64 += aarch64/vpmu_counter_access -TEST_GEN_PROGS_aarch64 += aarch64/no-vgic-v3 -TEST_GEN_PROGS_aarch64 += access_tracking_perf_test -TEST_GEN_PROGS_aarch64 += arch_timer -TEST_GEN_PROGS_aarch64 += coalesced_io_test -TEST_GEN_PROGS_aarch64 += demand_paging_test -TEST_GEN_PROGS_aarch64 += dirty_log_test -TEST_GEN_PROGS_aarch64 += dirty_log_perf_test -TEST_GEN_PROGS_aarch64 += guest_print_test -TEST_GEN_PROGS_aarch64 += get-reg-list -TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus -TEST_GEN_PROGS_aarch64 += kvm_page_table_test -TEST_GEN_PROGS_aarch64 += memslot_modification_stress_test -TEST_GEN_PROGS_aarch64 += memslot_perf_test -TEST_GEN_PROGS_aarch64 += mmu_stress_test -TEST_GEN_PROGS_aarch64 += rseq_test -TEST_GEN_PROGS_aarch64 += set_memory_region_test -TEST_GEN_PROGS_aarch64 += steal_time -TEST_GEN_PROGS_aarch64 += kvm_binary_stats_test - -TEST_GEN_PROGS_s390x = s390x/memop -TEST_GEN_PROGS_s390x += s390x/resets -TEST_GEN_PROGS_s390x += s390x/sync_regs_test -TEST_GEN_PROGS_s390x += s390x/tprot -TEST_GEN_PROGS_s390x += s390x/cmma_test -TEST_GEN_PROGS_s390x += s390x/debug_test -TEST_GEN_PROGS_s390x += s390x/cpumodel_subfuncs_test -TEST_GEN_PROGS_s390x += s390x/shared_zeropage_test -TEST_GEN_PROGS_s390x += s390x/ucontrol_test -TEST_GEN_PROGS_s390x += demand_paging_test -TEST_GEN_PROGS_s390x += dirty_log_test -TEST_GEN_PROGS_s390x += guest_print_test -TEST_GEN_PROGS_s390x += kvm_create_max_vcpus -TEST_GEN_PROGS_s390x += kvm_page_table_test -TEST_GEN_PROGS_s390x += rseq_test -TEST_GEN_PROGS_s390x += set_memory_region_test -TEST_GEN_PROGS_s390x += kvm_binary_stats_test - -TEST_GEN_PROGS_riscv += riscv/sbi_pmu_test -TEST_GEN_PROGS_riscv += riscv/ebreak_test -TEST_GEN_PROGS_riscv += arch_timer -TEST_GEN_PROGS_riscv += coalesced_io_test -TEST_GEN_PROGS_riscv += demand_paging_test -TEST_GEN_PROGS_riscv += dirty_log_test -TEST_GEN_PROGS_riscv += get-reg-list -TEST_GEN_PROGS_riscv += guest_print_test -TEST_GEN_PROGS_riscv += kvm_binary_stats_test -TEST_GEN_PROGS_riscv += kvm_create_max_vcpus -TEST_GEN_PROGS_riscv += kvm_page_table_test -TEST_GEN_PROGS_riscv += set_memory_region_test -TEST_GEN_PROGS_riscv += steal_time - -SPLIT_TESTS += arch_timer -SPLIT_TESTS += get-reg-list - -TEST_PROGS += $(TEST_PROGS_$(ARCH_DIR)) -TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(ARCH_DIR)) -TEST_GEN_PROGS_EXTENDED += $(TEST_GEN_PROGS_EXTENDED_$(ARCH_DIR)) -LIBKVM += $(LIBKVM_$(ARCH_DIR)) - -OVERRIDE_TARGETS = 1 - -# lib.mak defines $(OUTPUT), prepends $(OUTPUT)/ to $(TEST_GEN_PROGS), and most -# importantly defines, i.e. overwrites, $(CC) (unless `make -e` or `make CC=`, -# which causes the environment variable to override the makefile). -include ../lib.mk - -INSTALL_HDR_PATH = $(top_srcdir)/usr -LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/ -LINUX_TOOL_INCLUDE = $(top_srcdir)/tools/include -ifeq ($(ARCH),x86_64) -LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/x86/include -else -LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include -endif -CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \ - -Wno-gnu-variable-sized-type-not-at-end -MD -MP -DCONFIG_64BIT \ - -fno-builtin-memcmp -fno-builtin-memcpy \ - -fno-builtin-memset -fno-builtin-strnlen \ - -fno-stack-protector -fno-PIE -fno-strict-aliasing \ - -I$(LINUX_TOOL_INCLUDE) -I$(LINUX_TOOL_ARCH_INCLUDE) \ - -I$(LINUX_HDR_PATH) -Iinclude -I$(/dev/null; echo "$$?"),0) - CFLAGS += -march=x86-64-v2 -endif -endif -ifeq ($(ARCH),arm64) -tools_dir := $(top_srcdir)/tools -arm64_tools_dir := $(tools_dir)/arch/arm64/tools/ - -ifneq ($(abs_objdir),) -arm64_hdr_outdir := $(abs_objdir)/tools/ +include Makefile.kvm else -arm64_hdr_outdir := $(tools_dir)/ -endif - -GEN_HDRS := $(arm64_hdr_outdir)arch/arm64/include/generated/ -CFLAGS += -I$(GEN_HDRS) - -$(GEN_HDRS): $(wildcard $(arm64_tools_dir)/*) - $(MAKE) -C $(arm64_tools_dir) OUTPUT=$(arm64_hdr_outdir) +# Empty targets for unsupported architectures +all: +clean: endif - -no-pie-option := $(call try-run, echo 'int main(void) { return 0; }' | \ - $(CC) -Werror $(CFLAGS) -no-pie -x c - -o "$$TMP", -no-pie) - -# On s390, build the testcases KVM-enabled -pgste-option = $(call try-run, echo 'int main(void) { return 0; }' | \ - $(CC) -Werror -Wl$(comma)--s390-pgste -x c - -o "$$TMP",-Wl$(comma)--s390-pgste) - -LDLIBS += -ldl -LDFLAGS += -pthread $(no-pie-option) $(pgste-option) - -LIBKVM_C := $(filter %.c,$(LIBKVM)) -LIBKVM_S := $(filter %.S,$(LIBKVM)) -LIBKVM_C_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_C)) -LIBKVM_S_OBJ := $(patsubst %.S, $(OUTPUT)/%.o, $(LIBKVM_S)) -LIBKVM_STRING_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_STRING)) -LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(LIBKVM_STRING_OBJ) -SPLIT_TEST_GEN_PROGS := $(patsubst %, $(OUTPUT)/%, $(SPLIT_TESTS)) -SPLIT_TEST_GEN_OBJ := $(patsubst %, $(OUTPUT)/$(ARCH_DIR)/%.o, $(SPLIT_TESTS)) - -TEST_GEN_OBJ = $(patsubst %, %.o, $(TEST_GEN_PROGS)) -TEST_GEN_OBJ += $(patsubst %, %.o, $(TEST_GEN_PROGS_EXTENDED)) -TEST_DEP_FILES = $(patsubst %.o, %.d, $(TEST_GEN_OBJ)) -TEST_DEP_FILES += $(patsubst %.o, %.d, $(LIBKVM_OBJS)) -TEST_DEP_FILES += $(patsubst %.o, %.d, $(SPLIT_TEST_GEN_OBJ)) --include $(TEST_DEP_FILES) - -$(shell mkdir -p $(sort $(OUTPUT)/$(ARCH_DIR) $(dir $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ)))) - -$(filter-out $(SPLIT_TEST_GEN_PROGS), $(TEST_GEN_PROGS)) \ -$(TEST_GEN_PROGS_EXTENDED): %: %.o - $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH) $< $(LIBKVM_OBJS) $(LDLIBS) -o $@ -$(TEST_GEN_OBJ): $(OUTPUT)/%.o: %.c - $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ - -$(SPLIT_TEST_GEN_PROGS): $(OUTPUT)/%: $(OUTPUT)/%.o $(OUTPUT)/$(ARCH_DIR)/%.o - $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH) $^ $(LDLIBS) -o $@ -$(SPLIT_TEST_GEN_OBJ): $(OUTPUT)/$(ARCH_DIR)/%.o: $(ARCH_DIR)/%.c - $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ - -EXTRA_CLEAN += $(GEN_HDRS) \ - $(LIBKVM_OBJS) \ - $(SPLIT_TEST_GEN_OBJ) \ - $(TEST_DEP_FILES) \ - $(TEST_GEN_OBJ) \ - cscope.* - -$(LIBKVM_C_OBJ): $(OUTPUT)/%.o: %.c $(GEN_HDRS) - $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ - -$(LIBKVM_S_OBJ): $(OUTPUT)/%.o: %.S $(GEN_HDRS) - $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ - -# Compile the string overrides as freestanding to prevent the compiler from -# generating self-referential code, e.g. without "freestanding" the compiler may -# "optimize" memcmp() by invoking memcmp(), thus causing infinite recursion. -$(LIBKVM_STRING_OBJ): $(OUTPUT)/%.o: %.c - $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c -ffreestanding $< -o $@ - -$(shell mkdir -p $(sort $(dir $(TEST_GEN_PROGS)))) -$(SPLIT_TEST_GEN_OBJ): $(GEN_HDRS) -$(TEST_GEN_PROGS): $(LIBKVM_OBJS) -$(TEST_GEN_PROGS_EXTENDED): $(LIBKVM_OBJS) -$(TEST_GEN_OBJ): $(GEN_HDRS) - -cscope: include_paths = $(LINUX_TOOL_INCLUDE) $(LINUX_HDR_PATH) include lib .. -cscope: - $(RM) cscope.* - (find $(include_paths) -name '*.h' \ - -exec realpath --relative-base=$(PWD) {} \;; \ - find . -name '*.c' \ - -exec realpath --relative-base=$(PWD) {} \;) | sort -u > cscope.files - cscope -b diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm new file mode 100644 index 000000000000..e988a72f8c20 --- /dev/null +++ b/tools/testing/selftests/kvm/Makefile.kvm @@ -0,0 +1,334 @@ +# SPDX-License-Identifier: GPL-2.0-only +include ../../../build/Build.include + +all: + +LIBKVM += lib/assert.c +LIBKVM += lib/elf.c +LIBKVM += lib/guest_modes.c +LIBKVM += lib/io.c +LIBKVM += lib/kvm_util.c +LIBKVM += lib/memstress.c +LIBKVM += lib/guest_sprintf.c +LIBKVM += lib/rbtree.c +LIBKVM += lib/sparsebit.c +LIBKVM += lib/test_util.c +LIBKVM += lib/ucall_common.c +LIBKVM += lib/userfaultfd_util.c + +LIBKVM_STRING += lib/string_override.c + +LIBKVM_x86_64 += lib/x86_64/apic.c +LIBKVM_x86_64 += lib/x86_64/handlers.S +LIBKVM_x86_64 += lib/x86_64/hyperv.c +LIBKVM_x86_64 += lib/x86_64/memstress.c +LIBKVM_x86_64 += lib/x86_64/pmu.c +LIBKVM_x86_64 += lib/x86_64/processor.c +LIBKVM_x86_64 += lib/x86_64/sev.c +LIBKVM_x86_64 += lib/x86_64/svm.c +LIBKVM_x86_64 += lib/x86_64/ucall.c +LIBKVM_x86_64 += lib/x86_64/vmx.c + +LIBKVM_aarch64 += lib/aarch64/gic.c +LIBKVM_aarch64 += lib/aarch64/gic_v3.c +LIBKVM_aarch64 += lib/aarch64/gic_v3_its.c +LIBKVM_aarch64 += lib/aarch64/handlers.S +LIBKVM_aarch64 += lib/aarch64/processor.c +LIBKVM_aarch64 += lib/aarch64/spinlock.c +LIBKVM_aarch64 += lib/aarch64/ucall.c +LIBKVM_aarch64 += lib/aarch64/vgic.c + +LIBKVM_s390x += lib/s390x/diag318_test_handler.c +LIBKVM_s390x += lib/s390x/processor.c +LIBKVM_s390x += lib/s390x/ucall.c +LIBKVM_s390x += lib/s390x/facility.c + +LIBKVM_riscv += lib/riscv/handlers.S +LIBKVM_riscv += lib/riscv/processor.c +LIBKVM_riscv += lib/riscv/ucall.c + +# Non-compiled test targets +TEST_PROGS_x86_64 += x86_64/nx_huge_pages_test.sh + +# Compiled test targets +TEST_GEN_PROGS_x86_64 = x86_64/cpuid_test +TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test +TEST_GEN_PROGS_x86_64 += x86_64/dirty_log_page_splitting_test +TEST_GEN_PROGS_x86_64 += x86_64/feature_msrs_test +TEST_GEN_PROGS_x86_64 += x86_64/exit_on_emulation_failure_test +TEST_GEN_PROGS_x86_64 += x86_64/fix_hypercall_test +TEST_GEN_PROGS_x86_64 += x86_64/hwcr_msr_test +TEST_GEN_PROGS_x86_64 += x86_64/hyperv_clock +TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid +TEST_GEN_PROGS_x86_64 += x86_64/hyperv_evmcs +TEST_GEN_PROGS_x86_64 += x86_64/hyperv_extended_hypercalls +TEST_GEN_PROGS_x86_64 += x86_64/hyperv_features +TEST_GEN_PROGS_x86_64 += x86_64/hyperv_ipi +TEST_GEN_PROGS_x86_64 += x86_64/hyperv_svm_test +TEST_GEN_PROGS_x86_64 += x86_64/hyperv_tlb_flush +TEST_GEN_PROGS_x86_64 += x86_64/kvm_clock_test +TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test +TEST_GEN_PROGS_x86_64 += x86_64/monitor_mwait_test +TEST_GEN_PROGS_x86_64 += x86_64/nested_exceptions_test +TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test +TEST_GEN_PROGS_x86_64 += x86_64/pmu_counters_test +TEST_GEN_PROGS_x86_64 += x86_64/pmu_event_filter_test +TEST_GEN_PROGS_x86_64 += x86_64/private_mem_conversions_test +TEST_GEN_PROGS_x86_64 += x86_64/private_mem_kvm_exits_test +TEST_GEN_PROGS_x86_64 += x86_64/set_boot_cpu_id +TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test +TEST_GEN_PROGS_x86_64 += x86_64/smaller_maxphyaddr_emulation_test +TEST_GEN_PROGS_x86_64 += x86_64/smm_test +TEST_GEN_PROGS_x86_64 += x86_64/state_test +TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test +TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test +TEST_GEN_PROGS_x86_64 += x86_64/svm_int_ctl_test +TEST_GEN_PROGS_x86_64 += x86_64/svm_nested_shutdown_test +TEST_GEN_PROGS_x86_64 += x86_64/svm_nested_soft_inject_test +TEST_GEN_PROGS_x86_64 += x86_64/tsc_scaling_sync +TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test +TEST_GEN_PROGS_x86_64 += x86_64/ucna_injection_test +TEST_GEN_PROGS_x86_64 += x86_64/userspace_io_test +TEST_GEN_PROGS_x86_64 += x86_64/userspace_msr_exit_test +TEST_GEN_PROGS_x86_64 += x86_64/vmx_apic_access_test +TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test +TEST_GEN_PROGS_x86_64 += x86_64/vmx_dirty_log_test +TEST_GEN_PROGS_x86_64 += x86_64/vmx_exception_with_invalid_guest_state +TEST_GEN_PROGS_x86_64 += x86_64/vmx_msrs_test +TEST_GEN_PROGS_x86_64 += x86_64/vmx_invalid_nested_guest_state +TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test +TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test +TEST_GEN_PROGS_x86_64 += x86_64/vmx_nested_tsc_scaling_test +TEST_GEN_PROGS_x86_64 += x86_64/apic_bus_clock_test +TEST_GEN_PROGS_x86_64 += x86_64/xapic_ipi_test +TEST_GEN_PROGS_x86_64 += x86_64/xapic_state_test +TEST_GEN_PROGS_x86_64 += x86_64/xcr0_cpuid_test +TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test +TEST_GEN_PROGS_x86_64 += x86_64/debug_regs +TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test +TEST_GEN_PROGS_x86_64 += x86_64/vmx_pmu_caps_test +TEST_GEN_PROGS_x86_64 += x86_64/xen_shinfo_test +TEST_GEN_PROGS_x86_64 += x86_64/xen_vmcall_test +TEST_GEN_PROGS_x86_64 += x86_64/sev_init2_tests +TEST_GEN_PROGS_x86_64 += x86_64/sev_migrate_tests +TEST_GEN_PROGS_x86_64 += x86_64/sev_smoke_test +TEST_GEN_PROGS_x86_64 += x86_64/amx_test +TEST_GEN_PROGS_x86_64 += x86_64/max_vcpuid_cap_test +TEST_GEN_PROGS_x86_64 += x86_64/triple_fault_event_test +TEST_GEN_PROGS_x86_64 += x86_64/recalc_apic_map_test +TEST_GEN_PROGS_x86_64 += access_tracking_perf_test +TEST_GEN_PROGS_x86_64 += coalesced_io_test +TEST_GEN_PROGS_x86_64 += demand_paging_test +TEST_GEN_PROGS_x86_64 += dirty_log_test +TEST_GEN_PROGS_x86_64 += dirty_log_perf_test +TEST_GEN_PROGS_x86_64 += guest_memfd_test +TEST_GEN_PROGS_x86_64 += guest_print_test +TEST_GEN_PROGS_x86_64 += hardware_disable_test +TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus +TEST_GEN_PROGS_x86_64 += kvm_page_table_test +TEST_GEN_PROGS_x86_64 += mmu_stress_test +TEST_GEN_PROGS_x86_64 += memslot_modification_stress_test +TEST_GEN_PROGS_x86_64 += memslot_perf_test +TEST_GEN_PROGS_x86_64 += rseq_test +TEST_GEN_PROGS_x86_64 += set_memory_region_test +TEST_GEN_PROGS_x86_64 += steal_time +TEST_GEN_PROGS_x86_64 += kvm_binary_stats_test +TEST_GEN_PROGS_x86_64 += system_counter_offset_test +TEST_GEN_PROGS_x86_64 += pre_fault_memory_test + +# Compiled outputs used by test targets +TEST_GEN_PROGS_EXTENDED_x86_64 += x86_64/nx_huge_pages_test + +TEST_GEN_PROGS_aarch64 += aarch64/aarch32_id_regs +TEST_GEN_PROGS_aarch64 += aarch64/arch_timer_edge_cases +TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions +TEST_GEN_PROGS_aarch64 += aarch64/hypercalls +TEST_GEN_PROGS_aarch64 += aarch64/mmio_abort +TEST_GEN_PROGS_aarch64 += aarch64/page_fault_test +TEST_GEN_PROGS_aarch64 += aarch64/psci_test +TEST_GEN_PROGS_aarch64 += aarch64/set_id_regs +TEST_GEN_PROGS_aarch64 += aarch64/smccc_filter +TEST_GEN_PROGS_aarch64 += aarch64/vcpu_width_config +TEST_GEN_PROGS_aarch64 += aarch64/vgic_init +TEST_GEN_PROGS_aarch64 += aarch64/vgic_irq +TEST_GEN_PROGS_aarch64 += aarch64/vgic_lpi_stress +TEST_GEN_PROGS_aarch64 += aarch64/vpmu_counter_access +TEST_GEN_PROGS_aarch64 += aarch64/no-vgic-v3 +TEST_GEN_PROGS_aarch64 += access_tracking_perf_test +TEST_GEN_PROGS_aarch64 += arch_timer +TEST_GEN_PROGS_aarch64 += coalesced_io_test +TEST_GEN_PROGS_aarch64 += demand_paging_test +TEST_GEN_PROGS_aarch64 += dirty_log_test +TEST_GEN_PROGS_aarch64 += dirty_log_perf_test +TEST_GEN_PROGS_aarch64 += guest_print_test +TEST_GEN_PROGS_aarch64 += get-reg-list +TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus +TEST_GEN_PROGS_aarch64 += kvm_page_table_test +TEST_GEN_PROGS_aarch64 += memslot_modification_stress_test +TEST_GEN_PROGS_aarch64 += memslot_perf_test +TEST_GEN_PROGS_aarch64 += mmu_stress_test +TEST_GEN_PROGS_aarch64 += rseq_test +TEST_GEN_PROGS_aarch64 += set_memory_region_test +TEST_GEN_PROGS_aarch64 += steal_time +TEST_GEN_PROGS_aarch64 += kvm_binary_stats_test + +TEST_GEN_PROGS_s390x = s390x/memop +TEST_GEN_PROGS_s390x += s390x/resets +TEST_GEN_PROGS_s390x += s390x/sync_regs_test +TEST_GEN_PROGS_s390x += s390x/tprot +TEST_GEN_PROGS_s390x += s390x/cmma_test +TEST_GEN_PROGS_s390x += s390x/debug_test +TEST_GEN_PROGS_s390x += s390x/cpumodel_subfuncs_test +TEST_GEN_PROGS_s390x += s390x/shared_zeropage_test +TEST_GEN_PROGS_s390x += s390x/ucontrol_test +TEST_GEN_PROGS_s390x += demand_paging_test +TEST_GEN_PROGS_s390x += dirty_log_test +TEST_GEN_PROGS_s390x += guest_print_test +TEST_GEN_PROGS_s390x += kvm_create_max_vcpus +TEST_GEN_PROGS_s390x += kvm_page_table_test +TEST_GEN_PROGS_s390x += rseq_test +TEST_GEN_PROGS_s390x += set_memory_region_test +TEST_GEN_PROGS_s390x += kvm_binary_stats_test + +TEST_GEN_PROGS_riscv += riscv/sbi_pmu_test +TEST_GEN_PROGS_riscv += riscv/ebreak_test +TEST_GEN_PROGS_riscv += arch_timer +TEST_GEN_PROGS_riscv += coalesced_io_test +TEST_GEN_PROGS_riscv += demand_paging_test +TEST_GEN_PROGS_riscv += dirty_log_test +TEST_GEN_PROGS_riscv += get-reg-list +TEST_GEN_PROGS_riscv += guest_print_test +TEST_GEN_PROGS_riscv += kvm_binary_stats_test +TEST_GEN_PROGS_riscv += kvm_create_max_vcpus +TEST_GEN_PROGS_riscv += kvm_page_table_test +TEST_GEN_PROGS_riscv += set_memory_region_test +TEST_GEN_PROGS_riscv += steal_time + +SPLIT_TESTS += arch_timer +SPLIT_TESTS += get-reg-list + +TEST_PROGS += $(TEST_PROGS_$(ARCH_DIR)) +TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(ARCH_DIR)) +TEST_GEN_PROGS_EXTENDED += $(TEST_GEN_PROGS_EXTENDED_$(ARCH_DIR)) +LIBKVM += $(LIBKVM_$(ARCH_DIR)) + +OVERRIDE_TARGETS = 1 + +# lib.mak defines $(OUTPUT), prepends $(OUTPUT)/ to $(TEST_GEN_PROGS), and most +# importantly defines, i.e. overwrites, $(CC) (unless `make -e` or `make CC=`, +# which causes the environment variable to override the makefile). +include ../lib.mk + +INSTALL_HDR_PATH = $(top_srcdir)/usr +LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/ +LINUX_TOOL_INCLUDE = $(top_srcdir)/tools/include +ifeq ($(ARCH),x86_64) +LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/x86/include +else +LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include +endif +CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \ + -Wno-gnu-variable-sized-type-not-at-end -MD -MP -DCONFIG_64BIT \ + -fno-builtin-memcmp -fno-builtin-memcpy \ + -fno-builtin-memset -fno-builtin-strnlen \ + -fno-stack-protector -fno-PIE -fno-strict-aliasing \ + -I$(LINUX_TOOL_INCLUDE) -I$(LINUX_TOOL_ARCH_INCLUDE) \ + -I$(LINUX_HDR_PATH) -Iinclude -I$(/dev/null; echo "$$?"),0) + CFLAGS += -march=x86-64-v2 +endif +endif +ifeq ($(ARCH),arm64) +tools_dir := $(top_srcdir)/tools +arm64_tools_dir := $(tools_dir)/arch/arm64/tools/ + +ifneq ($(abs_objdir),) +arm64_hdr_outdir := $(abs_objdir)/tools/ +else +arm64_hdr_outdir := $(tools_dir)/ +endif + +GEN_HDRS := $(arm64_hdr_outdir)arch/arm64/include/generated/ +CFLAGS += -I$(GEN_HDRS) + +$(GEN_HDRS): $(wildcard $(arm64_tools_dir)/*) + $(MAKE) -C $(arm64_tools_dir) OUTPUT=$(arm64_hdr_outdir) +endif + +no-pie-option := $(call try-run, echo 'int main(void) { return 0; }' | \ + $(CC) -Werror $(CFLAGS) -no-pie -x c - -o "$$TMP", -no-pie) + +# On s390, build the testcases KVM-enabled +pgste-option = $(call try-run, echo 'int main(void) { return 0; }' | \ + $(CC) -Werror -Wl$(comma)--s390-pgste -x c - -o "$$TMP",-Wl$(comma)--s390-pgste) + +LDLIBS += -ldl +LDFLAGS += -pthread $(no-pie-option) $(pgste-option) + +LIBKVM_C := $(filter %.c,$(LIBKVM)) +LIBKVM_S := $(filter %.S,$(LIBKVM)) +LIBKVM_C_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_C)) +LIBKVM_S_OBJ := $(patsubst %.S, $(OUTPUT)/%.o, $(LIBKVM_S)) +LIBKVM_STRING_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_STRING)) +LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(LIBKVM_STRING_OBJ) +SPLIT_TEST_GEN_PROGS := $(patsubst %, $(OUTPUT)/%, $(SPLIT_TESTS)) +SPLIT_TEST_GEN_OBJ := $(patsubst %, $(OUTPUT)/$(ARCH_DIR)/%.o, $(SPLIT_TESTS)) + +TEST_GEN_OBJ = $(patsubst %, %.o, $(TEST_GEN_PROGS)) +TEST_GEN_OBJ += $(patsubst %, %.o, $(TEST_GEN_PROGS_EXTENDED)) +TEST_DEP_FILES = $(patsubst %.o, %.d, $(TEST_GEN_OBJ)) +TEST_DEP_FILES += $(patsubst %.o, %.d, $(LIBKVM_OBJS)) +TEST_DEP_FILES += $(patsubst %.o, %.d, $(SPLIT_TEST_GEN_OBJ)) +-include $(TEST_DEP_FILES) + +$(shell mkdir -p $(sort $(OUTPUT)/$(ARCH_DIR) $(dir $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ)))) + +$(filter-out $(SPLIT_TEST_GEN_PROGS), $(TEST_GEN_PROGS)) \ +$(TEST_GEN_PROGS_EXTENDED): %: %.o + $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH) $< $(LIBKVM_OBJS) $(LDLIBS) -o $@ +$(TEST_GEN_OBJ): $(OUTPUT)/%.o: %.c + $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ + +$(SPLIT_TEST_GEN_PROGS): $(OUTPUT)/%: $(OUTPUT)/%.o $(OUTPUT)/$(ARCH_DIR)/%.o + $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH) $^ $(LDLIBS) -o $@ +$(SPLIT_TEST_GEN_OBJ): $(OUTPUT)/$(ARCH_DIR)/%.o: $(ARCH_DIR)/%.c + $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ + +EXTRA_CLEAN += $(GEN_HDRS) \ + $(LIBKVM_OBJS) \ + $(SPLIT_TEST_GEN_OBJ) \ + $(TEST_DEP_FILES) \ + $(TEST_GEN_OBJ) \ + cscope.* + +$(LIBKVM_C_OBJ): $(OUTPUT)/%.o: %.c $(GEN_HDRS) + $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ + +$(LIBKVM_S_OBJ): $(OUTPUT)/%.o: %.S $(GEN_HDRS) + $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ + +# Compile the string overrides as freestanding to prevent the compiler from +# generating self-referential code, e.g. without "freestanding" the compiler may +# "optimize" memcmp() by invoking memcmp(), thus causing infinite recursion. +$(LIBKVM_STRING_OBJ): $(OUTPUT)/%.o: %.c + $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c -ffreestanding $< -o $@ + +$(shell mkdir -p $(sort $(dir $(TEST_GEN_PROGS)))) +$(SPLIT_TEST_GEN_OBJ): $(GEN_HDRS) +$(TEST_GEN_PROGS): $(LIBKVM_OBJS) +$(TEST_GEN_PROGS_EXTENDED): $(LIBKVM_OBJS) +$(TEST_GEN_OBJ): $(GEN_HDRS) + +cscope: include_paths = $(LINUX_TOOL_INCLUDE) $(LINUX_HDR_PATH) include lib .. +cscope: + $(RM) cscope.* + (find $(include_paths) -name '*.h' \ + -exec realpath --relative-base=$(PWD) {} \;; \ + find . -name '*.c' \ + -exec realpath --relative-base=$(PWD) {} \;) | sort -u > cscope.files + cscope -b -- cgit v1.2.3 From 67730e6c53d70fb31618230f81c4acee9f72eaa3 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Nov 2024 16:55:46 -0800 Subject: KVM: selftests: Use canonical $(ARCH) paths for KVM selftests directories Use the kernel's canonical $(ARCH) paths instead of the raw target triple for KVM selftests directories. KVM selftests are quite nearly the only place in the entire kernel that using the target triple for directories, tools/testing/selftests/drivers/s390x being the lone holdout. Using the kernel's preferred nomenclature eliminates the minor, but annoying, friction of having to translate to KVM's selftests directories, e.g. for pattern matching, opening files, running selftests, etc. Opportunsitically delete file comments that reference the full path of the file, as they are obviously prone to becoming stale, and serve no known purpose. Reviewed-by: Muhammad Usama Anjum Acked-by: Claudio Imbrenda Acked-by: Andrew Jones Link: https://lore.kernel.org/r/20241128005547.4077116-16-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/Makefile | 10 +- tools/testing/selftests/kvm/Makefile.kvm | 328 +++-- .../selftests/kvm/aarch64/aarch32_id_regs.c | 167 --- tools/testing/selftests/kvm/aarch64/arch_timer.c | 220 --- .../selftests/kvm/aarch64/arch_timer_edge_cases.c | 1062 --------------- .../selftests/kvm/aarch64/debug-exceptions.c | 607 --------- tools/testing/selftests/kvm/aarch64/get-reg-list.c | 771 ----------- tools/testing/selftests/kvm/aarch64/hypercalls.c | 308 ----- tools/testing/selftests/kvm/aarch64/mmio_abort.c | 159 --- tools/testing/selftests/kvm/aarch64/no-vgic-v3.c | 175 --- .../selftests/kvm/aarch64/page_fault_test.c | 1135 ---------------- tools/testing/selftests/kvm/aarch64/psci_test.c | 290 ---- tools/testing/selftests/kvm/aarch64/set_id_regs.c | 695 ---------- tools/testing/selftests/kvm/aarch64/smccc_filter.c | 268 ---- .../selftests/kvm/aarch64/vcpu_width_config.c | 121 -- tools/testing/selftests/kvm/aarch64/vgic_init.c | 764 ----------- tools/testing/selftests/kvm/aarch64/vgic_irq.c | 847 ------------ .../selftests/kvm/aarch64/vgic_lpi_stress.c | 410 ------ .../selftests/kvm/aarch64/vpmu_counter_access.c | 648 --------- .../testing/selftests/kvm/arm64/aarch32_id_regs.c | 167 +++ tools/testing/selftests/kvm/arm64/arch_timer.c | 220 +++ .../selftests/kvm/arm64/arch_timer_edge_cases.c | 1062 +++++++++++++++ .../testing/selftests/kvm/arm64/debug-exceptions.c | 607 +++++++++ tools/testing/selftests/kvm/arm64/get-reg-list.c | 771 +++++++++++ tools/testing/selftests/kvm/arm64/hypercalls.c | 308 +++++ tools/testing/selftests/kvm/arm64/mmio_abort.c | 159 +++ tools/testing/selftests/kvm/arm64/no-vgic-v3.c | 175 +++ .../testing/selftests/kvm/arm64/page_fault_test.c | 1135 ++++++++++++++++ tools/testing/selftests/kvm/arm64/psci_test.c | 290 ++++ tools/testing/selftests/kvm/arm64/set_id_regs.c | 695 ++++++++++ tools/testing/selftests/kvm/arm64/smccc_filter.c | 268 ++++ .../selftests/kvm/arm64/vcpu_width_config.c | 121 ++ tools/testing/selftests/kvm/arm64/vgic_init.c | 764 +++++++++++ tools/testing/selftests/kvm/arm64/vgic_irq.c | 847 ++++++++++++ .../testing/selftests/kvm/arm64/vgic_lpi_stress.c | 410 ++++++ .../selftests/kvm/arm64/vpmu_counter_access.c | 648 +++++++++ tools/testing/selftests/kvm/dirty_log_perf_test.c | 2 +- .../selftests/kvm/include/aarch64/arch_timer.h | 158 --- .../testing/selftests/kvm/include/aarch64/delay.h | 25 - tools/testing/selftests/kvm/include/aarch64/gic.h | 64 - .../testing/selftests/kvm/include/aarch64/gic_v3.h | 604 --------- .../selftests/kvm/include/aarch64/gic_v3_its.h | 19 - .../selftests/kvm/include/aarch64/kvm_util_arch.h | 7 - .../selftests/kvm/include/aarch64/processor.h | 238 ---- .../selftests/kvm/include/aarch64/spinlock.h | 13 - .../testing/selftests/kvm/include/aarch64/ucall.h | 20 - tools/testing/selftests/kvm/include/aarch64/vgic.h | 37 - .../selftests/kvm/include/arm64/arch_timer.h | 158 +++ tools/testing/selftests/kvm/include/arm64/delay.h | 25 + tools/testing/selftests/kvm/include/arm64/gic.h | 64 + tools/testing/selftests/kvm/include/arm64/gic_v3.h | 604 +++++++++ .../selftests/kvm/include/arm64/gic_v3_its.h | 19 + .../selftests/kvm/include/arm64/kvm_util_arch.h | 7 + .../selftests/kvm/include/arm64/processor.h | 238 ++++ .../testing/selftests/kvm/include/arm64/spinlock.h | 13 + tools/testing/selftests/kvm/include/arm64/ucall.h | 20 + tools/testing/selftests/kvm/include/arm64/vgic.h | 37 + .../selftests/kvm/include/s390/debug_print.h | 69 + .../kvm/include/s390/diag318_test_handler.h | 13 + .../testing/selftests/kvm/include/s390/facility.h | 50 + .../selftests/kvm/include/s390/kvm_util_arch.h | 7 + .../testing/selftests/kvm/include/s390/processor.h | 41 + tools/testing/selftests/kvm/include/s390/sie.h | 240 ++++ tools/testing/selftests/kvm/include/s390/ucall.h | 19 + .../selftests/kvm/include/s390x/debug_print.h | 69 - .../kvm/include/s390x/diag318_test_handler.h | 13 - .../testing/selftests/kvm/include/s390x/facility.h | 50 - .../selftests/kvm/include/s390x/kvm_util_arch.h | 7 - .../selftests/kvm/include/s390x/processor.h | 41 - tools/testing/selftests/kvm/include/s390x/sie.h | 240 ---- tools/testing/selftests/kvm/include/s390x/ucall.h | 19 - tools/testing/selftests/kvm/include/x86/apic.h | 118 ++ tools/testing/selftests/kvm/include/x86/evmcs.h | 1276 ++++++++++++++++++ tools/testing/selftests/kvm/include/x86/hyperv.h | 361 +++++ .../selftests/kvm/include/x86/kvm_util_arch.h | 51 + tools/testing/selftests/kvm/include/x86/mce.h | 23 + tools/testing/selftests/kvm/include/x86/pmu.h | 97 ++ .../testing/selftests/kvm/include/x86/processor.h | 1395 +++++++++++++++++++ tools/testing/selftests/kvm/include/x86/sev.h | 96 ++ tools/testing/selftests/kvm/include/x86/svm.h | 320 +++++ tools/testing/selftests/kvm/include/x86/svm_util.h | 62 + tools/testing/selftests/kvm/include/x86/ucall.h | 13 + tools/testing/selftests/kvm/include/x86/vmx.h | 575 ++++++++ tools/testing/selftests/kvm/include/x86_64/apic.h | 120 -- tools/testing/selftests/kvm/include/x86_64/evmcs.h | 1279 ------------------ .../testing/selftests/kvm/include/x86_64/hyperv.h | 364 ----- .../selftests/kvm/include/x86_64/kvm_util_arch.h | 51 - tools/testing/selftests/kvm/include/x86_64/mce.h | 25 - tools/testing/selftests/kvm/include/x86_64/pmu.h | 97 -- .../selftests/kvm/include/x86_64/processor.h | 1397 -------------------- tools/testing/selftests/kvm/include/x86_64/sev.h | 96 -- tools/testing/selftests/kvm/include/x86_64/svm.h | 326 ----- .../selftests/kvm/include/x86_64/svm_util.h | 65 - tools/testing/selftests/kvm/include/x86_64/ucall.h | 13 - tools/testing/selftests/kvm/include/x86_64/vmx.h | 577 -------- tools/testing/selftests/kvm/lib/aarch64/gic.c | 157 --- .../selftests/kvm/lib/aarch64/gic_private.h | 32 - tools/testing/selftests/kvm/lib/aarch64/gic_v3.c | 427 ------ .../testing/selftests/kvm/lib/aarch64/gic_v3_its.c | 248 ---- tools/testing/selftests/kvm/lib/aarch64/handlers.S | 126 -- .../testing/selftests/kvm/lib/aarch64/processor.c | 647 --------- tools/testing/selftests/kvm/lib/aarch64/spinlock.c | 27 - tools/testing/selftests/kvm/lib/aarch64/ucall.c | 34 - tools/testing/selftests/kvm/lib/aarch64/vgic.c | 188 --- tools/testing/selftests/kvm/lib/arm64/gic.c | 157 +++ .../testing/selftests/kvm/lib/arm64/gic_private.h | 32 + tools/testing/selftests/kvm/lib/arm64/gic_v3.c | 427 ++++++ tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c | 248 ++++ tools/testing/selftests/kvm/lib/arm64/handlers.S | 126 ++ tools/testing/selftests/kvm/lib/arm64/processor.c | 647 +++++++++ tools/testing/selftests/kvm/lib/arm64/spinlock.c | 27 + tools/testing/selftests/kvm/lib/arm64/ucall.c | 34 + tools/testing/selftests/kvm/lib/arm64/vgic.c | 188 +++ .../selftests/kvm/lib/s390/diag318_test_handler.c | 80 ++ tools/testing/selftests/kvm/lib/s390/facility.c | 14 + tools/testing/selftests/kvm/lib/s390/processor.c | 223 ++++ tools/testing/selftests/kvm/lib/s390/ucall.c | 22 + .../selftests/kvm/lib/s390x/diag318_test_handler.c | 80 -- tools/testing/selftests/kvm/lib/s390x/facility.c | 14 - tools/testing/selftests/kvm/lib/s390x/processor.c | 223 ---- tools/testing/selftests/kvm/lib/s390x/ucall.c | 22 - tools/testing/selftests/kvm/lib/x86/apic.c | 43 + tools/testing/selftests/kvm/lib/x86/handlers.S | 81 ++ tools/testing/selftests/kvm/lib/x86/hyperv.c | 113 ++ tools/testing/selftests/kvm/lib/x86/memstress.c | 112 ++ tools/testing/selftests/kvm/lib/x86/pmu.c | 31 + tools/testing/selftests/kvm/lib/x86/processor.c | 1293 ++++++++++++++++++ tools/testing/selftests/kvm/lib/x86/sev.c | 141 ++ tools/testing/selftests/kvm/lib/x86/svm.c | 163 +++ tools/testing/selftests/kvm/lib/x86/ucall.c | 56 + tools/testing/selftests/kvm/lib/x86/vmx.c | 552 ++++++++ tools/testing/selftests/kvm/lib/x86_64/apic.c | 43 - tools/testing/selftests/kvm/lib/x86_64/handlers.S | 81 -- tools/testing/selftests/kvm/lib/x86_64/hyperv.c | 113 -- tools/testing/selftests/kvm/lib/x86_64/memstress.c | 112 -- tools/testing/selftests/kvm/lib/x86_64/pmu.c | 31 - tools/testing/selftests/kvm/lib/x86_64/processor.c | 1295 ------------------ tools/testing/selftests/kvm/lib/x86_64/sev.c | 141 -- tools/testing/selftests/kvm/lib/x86_64/svm.c | 164 --- tools/testing/selftests/kvm/lib/x86_64/ucall.c | 56 - tools/testing/selftests/kvm/lib/x86_64/vmx.c | 554 -------- tools/testing/selftests/kvm/s390/cmma_test.c | 695 ++++++++++ tools/testing/selftests/kvm/s390/config | 2 + .../selftests/kvm/s390/cpumodel_subfuncs_test.c | 301 +++++ tools/testing/selftests/kvm/s390/debug_test.c | 160 +++ tools/testing/selftests/kvm/s390/memop.c | 1187 +++++++++++++++++ tools/testing/selftests/kvm/s390/resets.c | 313 +++++ .../selftests/kvm/s390/shared_zeropage_test.c | 111 ++ tools/testing/selftests/kvm/s390/sync_regs_test.c | 238 ++++ tools/testing/selftests/kvm/s390/tprot.c | 244 ++++ tools/testing/selftests/kvm/s390/ucontrol_test.c | 638 +++++++++ tools/testing/selftests/kvm/s390x/cmma_test.c | 695 ---------- tools/testing/selftests/kvm/s390x/config | 2 - .../selftests/kvm/s390x/cpumodel_subfuncs_test.c | 301 ----- tools/testing/selftests/kvm/s390x/debug_test.c | 160 --- tools/testing/selftests/kvm/s390x/memop.c | 1187 ----------------- tools/testing/selftests/kvm/s390x/resets.c | 313 ----- .../selftests/kvm/s390x/shared_zeropage_test.c | 111 -- tools/testing/selftests/kvm/s390x/sync_regs_test.c | 238 ---- tools/testing/selftests/kvm/s390x/tprot.c | 244 ---- tools/testing/selftests/kvm/s390x/ucontrol_test.c | 638 --------- .../testing/selftests/kvm/set_memory_region_test.c | 6 +- tools/testing/selftests/kvm/x86/amx_test.c | 315 +++++ .../selftests/kvm/x86/apic_bus_clock_test.c | 194 +++ tools/testing/selftests/kvm/x86/cpuid_test.c | 225 ++++ .../selftests/kvm/x86/cr4_cpuid_sync_test.c | 100 ++ tools/testing/selftests/kvm/x86/debug_regs.c | 217 +++ .../kvm/x86/dirty_log_page_splitting_test.c | 263 ++++ .../kvm/x86/exit_on_emulation_failure_test.c | 39 + .../testing/selftests/kvm/x86/feature_msrs_test.c | 113 ++ .../testing/selftests/kvm/x86/fix_hypercall_test.c | 142 ++ tools/testing/selftests/kvm/x86/flds_emulation.h | 52 + tools/testing/selftests/kvm/x86/hwcr_msr_test.c | 45 + tools/testing/selftests/kvm/x86/hyperv_clock.c | 263 ++++ tools/testing/selftests/kvm/x86/hyperv_cpuid.c | 172 +++ tools/testing/selftests/kvm/x86/hyperv_evmcs.c | 307 +++++ .../selftests/kvm/x86/hyperv_extended_hypercalls.c | 98 ++ tools/testing/selftests/kvm/x86/hyperv_features.c | 695 ++++++++++ tools/testing/selftests/kvm/x86/hyperv_ipi.c | 308 +++++ tools/testing/selftests/kvm/x86/hyperv_svm_test.c | 199 +++ tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c | 680 ++++++++++ tools/testing/selftests/kvm/x86/kvm_clock_test.c | 156 +++ tools/testing/selftests/kvm/x86/kvm_pv_test.c | 190 +++ .../selftests/kvm/x86/max_vcpuid_cap_test.c | 62 + .../testing/selftests/kvm/x86/monitor_mwait_test.c | 129 ++ .../selftests/kvm/x86/nested_exceptions_test.c | 288 ++++ .../testing/selftests/kvm/x86/nx_huge_pages_test.c | 266 ++++ .../selftests/kvm/x86/nx_huge_pages_test.sh | 69 + .../testing/selftests/kvm/x86/platform_info_test.c | 78 ++ .../testing/selftests/kvm/x86/pmu_counters_test.c | 644 +++++++++ .../selftests/kvm/x86/pmu_event_filter_test.c | 876 ++++++++++++ .../kvm/x86/private_mem_conversions_test.c | 483 +++++++ .../selftests/kvm/x86/private_mem_kvm_exits_test.c | 120 ++ .../selftests/kvm/x86/recalc_apic_map_test.c | 74 ++ tools/testing/selftests/kvm/x86/set_boot_cpu_id.c | 146 ++ tools/testing/selftests/kvm/x86/set_sregs_test.c | 141 ++ tools/testing/selftests/kvm/x86/sev_init2_tests.c | 152 +++ .../testing/selftests/kvm/x86/sev_migrate_tests.c | 397 ++++++ tools/testing/selftests/kvm/x86/sev_smoke_test.c | 205 +++ .../kvm/x86/smaller_maxphyaddr_emulation_test.c | 105 ++ tools/testing/selftests/kvm/x86/smm_test.c | 209 +++ tools/testing/selftests/kvm/x86/state_test.c | 323 +++++ tools/testing/selftests/kvm/x86/svm_int_ctl_test.c | 118 ++ .../selftests/kvm/x86/svm_nested_shutdown_test.c | 59 + .../kvm/x86/svm_nested_soft_inject_test.c | 210 +++ tools/testing/selftests/kvm/x86/svm_vmcall_test.c | 70 + tools/testing/selftests/kvm/x86/sync_regs_test.c | 411 ++++++ .../selftests/kvm/x86/triple_fault_event_test.c | 124 ++ tools/testing/selftests/kvm/x86/tsc_msrs_test.c | 161 +++ tools/testing/selftests/kvm/x86/tsc_scaling_sync.c | 110 ++ .../selftests/kvm/x86/ucna_injection_test.c | 295 +++++ .../testing/selftests/kvm/x86/userspace_io_test.c | 103 ++ .../selftests/kvm/x86/userspace_msr_exit_test.c | 769 +++++++++++ .../selftests/kvm/x86/vmx_apic_access_test.c | 124 ++ .../kvm/x86/vmx_close_while_nested_test.c | 80 ++ .../testing/selftests/kvm/x86/vmx_dirty_log_test.c | 179 +++ .../x86/vmx_exception_with_invalid_guest_state.c | 142 ++ .../kvm/x86/vmx_invalid_nested_guest_state.c | 103 ++ tools/testing/selftests/kvm/x86/vmx_msrs_test.c | 131 ++ .../kvm/x86/vmx_nested_tsc_scaling_test.c | 206 +++ .../testing/selftests/kvm/x86/vmx_pmu_caps_test.c | 247 ++++ .../selftests/kvm/x86/vmx_preemption_timer_test.c | 245 ++++ .../selftests/kvm/x86/vmx_set_nested_state_test.c | 304 +++++ .../selftests/kvm/x86/vmx_tsc_adjust_test.c | 156 +++ tools/testing/selftests/kvm/x86/xapic_ipi_test.c | 487 +++++++ tools/testing/selftests/kvm/x86/xapic_state_test.c | 262 ++++ tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c | 139 ++ tools/testing/selftests/kvm/x86/xen_shinfo_test.c | 1161 ++++++++++++++++ tools/testing/selftests/kvm/x86/xen_vmcall_test.c | 143 ++ tools/testing/selftests/kvm/x86/xss_msr_test.c | 54 + tools/testing/selftests/kvm/x86_64/amx_test.c | 315 ----- .../selftests/kvm/x86_64/apic_bus_clock_test.c | 194 --- tools/testing/selftests/kvm/x86_64/cpuid_test.c | 225 ---- .../selftests/kvm/x86_64/cr4_cpuid_sync_test.c | 100 -- tools/testing/selftests/kvm/x86_64/debug_regs.c | 217 --- .../kvm/x86_64/dirty_log_page_splitting_test.c | 263 ---- .../kvm/x86_64/exit_on_emulation_failure_test.c | 39 - .../selftests/kvm/x86_64/feature_msrs_test.c | 113 -- .../selftests/kvm/x86_64/fix_hypercall_test.c | 142 -- .../testing/selftests/kvm/x86_64/flds_emulation.h | 52 - tools/testing/selftests/kvm/x86_64/hwcr_msr_test.c | 45 - tools/testing/selftests/kvm/x86_64/hyperv_clock.c | 263 ---- tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c | 172 --- tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c | 307 ----- .../kvm/x86_64/hyperv_extended_hypercalls.c | 98 -- .../testing/selftests/kvm/x86_64/hyperv_features.c | 695 ---------- tools/testing/selftests/kvm/x86_64/hyperv_ipi.c | 308 ----- .../testing/selftests/kvm/x86_64/hyperv_svm_test.c | 199 --- .../selftests/kvm/x86_64/hyperv_tlb_flush.c | 680 ---------- .../testing/selftests/kvm/x86_64/kvm_clock_test.c | 156 --- tools/testing/selftests/kvm/x86_64/kvm_pv_test.c | 190 --- .../selftests/kvm/x86_64/max_vcpuid_cap_test.c | 62 - .../selftests/kvm/x86_64/monitor_mwait_test.c | 129 -- .../selftests/kvm/x86_64/nested_exceptions_test.c | 288 ---- .../selftests/kvm/x86_64/nx_huge_pages_test.c | 266 ---- .../selftests/kvm/x86_64/nx_huge_pages_test.sh | 69 - .../selftests/kvm/x86_64/platform_info_test.c | 78 -- .../selftests/kvm/x86_64/pmu_counters_test.c | 644 --------- .../selftests/kvm/x86_64/pmu_event_filter_test.c | 876 ------------ .../kvm/x86_64/private_mem_conversions_test.c | 483 ------- .../kvm/x86_64/private_mem_kvm_exits_test.c | 120 -- .../selftests/kvm/x86_64/recalc_apic_map_test.c | 74 -- .../testing/selftests/kvm/x86_64/set_boot_cpu_id.c | 146 -- .../testing/selftests/kvm/x86_64/set_sregs_test.c | 141 -- .../testing/selftests/kvm/x86_64/sev_init2_tests.c | 152 --- .../selftests/kvm/x86_64/sev_migrate_tests.c | 397 ------ .../testing/selftests/kvm/x86_64/sev_smoke_test.c | 205 --- .../kvm/x86_64/smaller_maxphyaddr_emulation_test.c | 105 -- tools/testing/selftests/kvm/x86_64/smm_test.c | 209 --- tools/testing/selftests/kvm/x86_64/state_test.c | 323 ----- .../selftests/kvm/x86_64/svm_int_ctl_test.c | 118 -- .../kvm/x86_64/svm_nested_shutdown_test.c | 59 - .../kvm/x86_64/svm_nested_soft_inject_test.c | 210 --- .../testing/selftests/kvm/x86_64/svm_vmcall_test.c | 70 - .../testing/selftests/kvm/x86_64/sync_regs_test.c | 411 ------ .../selftests/kvm/x86_64/triple_fault_event_test.c | 124 -- tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c | 161 --- .../selftests/kvm/x86_64/tsc_scaling_sync.c | 110 -- .../selftests/kvm/x86_64/ucna_injection_test.c | 295 ----- .../selftests/kvm/x86_64/userspace_io_test.c | 103 -- .../selftests/kvm/x86_64/userspace_msr_exit_test.c | 769 ----------- .../selftests/kvm/x86_64/vmx_apic_access_test.c | 124 -- .../kvm/x86_64/vmx_close_while_nested_test.c | 80 -- .../selftests/kvm/x86_64/vmx_dirty_log_test.c | 179 --- .../vmx_exception_with_invalid_guest_state.c | 142 -- .../kvm/x86_64/vmx_invalid_nested_guest_state.c | 103 -- tools/testing/selftests/kvm/x86_64/vmx_msrs_test.c | 131 -- .../kvm/x86_64/vmx_nested_tsc_scaling_test.c | 206 --- .../selftests/kvm/x86_64/vmx_pmu_caps_test.c | 247 ---- .../kvm/x86_64/vmx_preemption_timer_test.c | 245 ---- .../kvm/x86_64/vmx_set_nested_state_test.c | 304 ----- .../selftests/kvm/x86_64/vmx_tsc_adjust_test.c | 156 --- .../testing/selftests/kvm/x86_64/xapic_ipi_test.c | 487 ------- .../selftests/kvm/x86_64/xapic_state_test.c | 262 ---- .../testing/selftests/kvm/x86_64/xcr0_cpuid_test.c | 139 -- .../testing/selftests/kvm/x86_64/xen_shinfo_test.c | 1161 ---------------- .../testing/selftests/kvm/x86_64/xen_vmcall_test.c | 143 -- tools/testing/selftests/kvm/x86_64/xss_msr_test.c | 54 - 298 files changed, 39659 insertions(+), 39695 deletions(-) delete mode 100644 tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c delete mode 100644 tools/testing/selftests/kvm/aarch64/arch_timer.c delete mode 100644 tools/testing/selftests/kvm/aarch64/arch_timer_edge_cases.c delete mode 100644 tools/testing/selftests/kvm/aarch64/debug-exceptions.c delete mode 100644 tools/testing/selftests/kvm/aarch64/get-reg-list.c delete mode 100644 tools/testing/selftests/kvm/aarch64/hypercalls.c delete mode 100644 tools/testing/selftests/kvm/aarch64/mmio_abort.c delete mode 100644 tools/testing/selftests/kvm/aarch64/no-vgic-v3.c delete mode 100644 tools/testing/selftests/kvm/aarch64/page_fault_test.c delete mode 100644 tools/testing/selftests/kvm/aarch64/psci_test.c delete mode 100644 tools/testing/selftests/kvm/aarch64/set_id_regs.c delete mode 100644 tools/testing/selftests/kvm/aarch64/smccc_filter.c delete mode 100644 tools/testing/selftests/kvm/aarch64/vcpu_width_config.c delete mode 100644 tools/testing/selftests/kvm/aarch64/vgic_init.c delete mode 100644 tools/testing/selftests/kvm/aarch64/vgic_irq.c delete mode 100644 tools/testing/selftests/kvm/aarch64/vgic_lpi_stress.c delete mode 100644 tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c create mode 100644 tools/testing/selftests/kvm/arm64/aarch32_id_regs.c create mode 100644 tools/testing/selftests/kvm/arm64/arch_timer.c create mode 100644 tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c create mode 100644 tools/testing/selftests/kvm/arm64/debug-exceptions.c create mode 100644 tools/testing/selftests/kvm/arm64/get-reg-list.c create mode 100644 tools/testing/selftests/kvm/arm64/hypercalls.c create mode 100644 tools/testing/selftests/kvm/arm64/mmio_abort.c create mode 100644 tools/testing/selftests/kvm/arm64/no-vgic-v3.c create mode 100644 tools/testing/selftests/kvm/arm64/page_fault_test.c create mode 100644 tools/testing/selftests/kvm/arm64/psci_test.c create mode 100644 tools/testing/selftests/kvm/arm64/set_id_regs.c create mode 100644 tools/testing/selftests/kvm/arm64/smccc_filter.c create mode 100644 tools/testing/selftests/kvm/arm64/vcpu_width_config.c create mode 100644 tools/testing/selftests/kvm/arm64/vgic_init.c create mode 100644 tools/testing/selftests/kvm/arm64/vgic_irq.c create mode 100644 tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c create mode 100644 tools/testing/selftests/kvm/arm64/vpmu_counter_access.c delete mode 100644 tools/testing/selftests/kvm/include/aarch64/arch_timer.h delete mode 100644 tools/testing/selftests/kvm/include/aarch64/delay.h delete mode 100644 tools/testing/selftests/kvm/include/aarch64/gic.h delete mode 100644 tools/testing/selftests/kvm/include/aarch64/gic_v3.h delete mode 100644 tools/testing/selftests/kvm/include/aarch64/gic_v3_its.h delete mode 100644 tools/testing/selftests/kvm/include/aarch64/kvm_util_arch.h delete mode 100644 tools/testing/selftests/kvm/include/aarch64/processor.h delete mode 100644 tools/testing/selftests/kvm/include/aarch64/spinlock.h delete mode 100644 tools/testing/selftests/kvm/include/aarch64/ucall.h delete mode 100644 tools/testing/selftests/kvm/include/aarch64/vgic.h create mode 100644 tools/testing/selftests/kvm/include/arm64/arch_timer.h create mode 100644 tools/testing/selftests/kvm/include/arm64/delay.h create mode 100644 tools/testing/selftests/kvm/include/arm64/gic.h create mode 100644 tools/testing/selftests/kvm/include/arm64/gic_v3.h create mode 100644 tools/testing/selftests/kvm/include/arm64/gic_v3_its.h create mode 100644 tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h create mode 100644 tools/testing/selftests/kvm/include/arm64/processor.h create mode 100644 tools/testing/selftests/kvm/include/arm64/spinlock.h create mode 100644 tools/testing/selftests/kvm/include/arm64/ucall.h create mode 100644 tools/testing/selftests/kvm/include/arm64/vgic.h create mode 100644 tools/testing/selftests/kvm/include/s390/debug_print.h create mode 100644 tools/testing/selftests/kvm/include/s390/diag318_test_handler.h create mode 100644 tools/testing/selftests/kvm/include/s390/facility.h create mode 100644 tools/testing/selftests/kvm/include/s390/kvm_util_arch.h create mode 100644 tools/testing/selftests/kvm/include/s390/processor.h create mode 100644 tools/testing/selftests/kvm/include/s390/sie.h create mode 100644 tools/testing/selftests/kvm/include/s390/ucall.h delete mode 100644 tools/testing/selftests/kvm/include/s390x/debug_print.h delete mode 100644 tools/testing/selftests/kvm/include/s390x/diag318_test_handler.h delete mode 100644 tools/testing/selftests/kvm/include/s390x/facility.h delete mode 100644 tools/testing/selftests/kvm/include/s390x/kvm_util_arch.h delete mode 100644 tools/testing/selftests/kvm/include/s390x/processor.h delete mode 100644 tools/testing/selftests/kvm/include/s390x/sie.h delete mode 100644 tools/testing/selftests/kvm/include/s390x/ucall.h create mode 100644 tools/testing/selftests/kvm/include/x86/apic.h create mode 100644 tools/testing/selftests/kvm/include/x86/evmcs.h create mode 100644 tools/testing/selftests/kvm/include/x86/hyperv.h create mode 100644 tools/testing/selftests/kvm/include/x86/kvm_util_arch.h create mode 100644 tools/testing/selftests/kvm/include/x86/mce.h create mode 100644 tools/testing/selftests/kvm/include/x86/pmu.h create mode 100644 tools/testing/selftests/kvm/include/x86/processor.h create mode 100644 tools/testing/selftests/kvm/include/x86/sev.h create mode 100644 tools/testing/selftests/kvm/include/x86/svm.h create mode 100644 tools/testing/selftests/kvm/include/x86/svm_util.h create mode 100644 tools/testing/selftests/kvm/include/x86/ucall.h create mode 100644 tools/testing/selftests/kvm/include/x86/vmx.h delete mode 100644 tools/testing/selftests/kvm/include/x86_64/apic.h delete mode 100644 tools/testing/selftests/kvm/include/x86_64/evmcs.h delete mode 100644 tools/testing/selftests/kvm/include/x86_64/hyperv.h delete mode 100644 tools/testing/selftests/kvm/include/x86_64/kvm_util_arch.h delete mode 100644 tools/testing/selftests/kvm/include/x86_64/mce.h delete mode 100644 tools/testing/selftests/kvm/include/x86_64/pmu.h delete mode 100644 tools/testing/selftests/kvm/include/x86_64/processor.h delete mode 100644 tools/testing/selftests/kvm/include/x86_64/sev.h delete mode 100644 tools/testing/selftests/kvm/include/x86_64/svm.h delete mode 100644 tools/testing/selftests/kvm/include/x86_64/svm_util.h delete mode 100644 tools/testing/selftests/kvm/include/x86_64/ucall.h delete mode 100644 tools/testing/selftests/kvm/include/x86_64/vmx.h delete mode 100644 tools/testing/selftests/kvm/lib/aarch64/gic.c delete mode 100644 tools/testing/selftests/kvm/lib/aarch64/gic_private.h delete mode 100644 tools/testing/selftests/kvm/lib/aarch64/gic_v3.c delete mode 100644 tools/testing/selftests/kvm/lib/aarch64/gic_v3_its.c delete mode 100644 tools/testing/selftests/kvm/lib/aarch64/handlers.S delete mode 100644 tools/testing/selftests/kvm/lib/aarch64/processor.c delete mode 100644 tools/testing/selftests/kvm/lib/aarch64/spinlock.c delete mode 100644 tools/testing/selftests/kvm/lib/aarch64/ucall.c delete mode 100644 tools/testing/selftests/kvm/lib/aarch64/vgic.c create mode 100644 tools/testing/selftests/kvm/lib/arm64/gic.c create mode 100644 tools/testing/selftests/kvm/lib/arm64/gic_private.h create mode 100644 tools/testing/selftests/kvm/lib/arm64/gic_v3.c create mode 100644 tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c create mode 100644 tools/testing/selftests/kvm/lib/arm64/handlers.S create mode 100644 tools/testing/selftests/kvm/lib/arm64/processor.c create mode 100644 tools/testing/selftests/kvm/lib/arm64/spinlock.c create mode 100644 tools/testing/selftests/kvm/lib/arm64/ucall.c create mode 100644 tools/testing/selftests/kvm/lib/arm64/vgic.c create mode 100644 tools/testing/selftests/kvm/lib/s390/diag318_test_handler.c create mode 100644 tools/testing/selftests/kvm/lib/s390/facility.c create mode 100644 tools/testing/selftests/kvm/lib/s390/processor.c create mode 100644 tools/testing/selftests/kvm/lib/s390/ucall.c delete mode 100644 tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c delete mode 100644 tools/testing/selftests/kvm/lib/s390x/facility.c delete mode 100644 tools/testing/selftests/kvm/lib/s390x/processor.c delete mode 100644 tools/testing/selftests/kvm/lib/s390x/ucall.c create mode 100644 tools/testing/selftests/kvm/lib/x86/apic.c create mode 100644 tools/testing/selftests/kvm/lib/x86/handlers.S create mode 100644 tools/testing/selftests/kvm/lib/x86/hyperv.c create mode 100644 tools/testing/selftests/kvm/lib/x86/memstress.c create mode 100644 tools/testing/selftests/kvm/lib/x86/pmu.c create mode 100644 tools/testing/selftests/kvm/lib/x86/processor.c create mode 100644 tools/testing/selftests/kvm/lib/x86/sev.c create mode 100644 tools/testing/selftests/kvm/lib/x86/svm.c create mode 100644 tools/testing/selftests/kvm/lib/x86/ucall.c create mode 100644 tools/testing/selftests/kvm/lib/x86/vmx.c delete mode 100644 tools/testing/selftests/kvm/lib/x86_64/apic.c delete mode 100644 tools/testing/selftests/kvm/lib/x86_64/handlers.S delete mode 100644 tools/testing/selftests/kvm/lib/x86_64/hyperv.c delete mode 100644 tools/testing/selftests/kvm/lib/x86_64/memstress.c delete mode 100644 tools/testing/selftests/kvm/lib/x86_64/pmu.c delete mode 100644 tools/testing/selftests/kvm/lib/x86_64/processor.c delete mode 100644 tools/testing/selftests/kvm/lib/x86_64/sev.c delete mode 100644 tools/testing/selftests/kvm/lib/x86_64/svm.c delete mode 100644 tools/testing/selftests/kvm/lib/x86_64/ucall.c delete mode 100644 tools/testing/selftests/kvm/lib/x86_64/vmx.c create mode 100644 tools/testing/selftests/kvm/s390/cmma_test.c create mode 100644 tools/testing/selftests/kvm/s390/config create mode 100644 tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c create mode 100644 tools/testing/selftests/kvm/s390/debug_test.c create mode 100644 tools/testing/selftests/kvm/s390/memop.c create mode 100644 tools/testing/selftests/kvm/s390/resets.c create mode 100644 tools/testing/selftests/kvm/s390/shared_zeropage_test.c create mode 100644 tools/testing/selftests/kvm/s390/sync_regs_test.c create mode 100644 tools/testing/selftests/kvm/s390/tprot.c create mode 100644 tools/testing/selftests/kvm/s390/ucontrol_test.c delete mode 100644 tools/testing/selftests/kvm/s390x/cmma_test.c delete mode 100644 tools/testing/selftests/kvm/s390x/config delete mode 100644 tools/testing/selftests/kvm/s390x/cpumodel_subfuncs_test.c delete mode 100644 tools/testing/selftests/kvm/s390x/debug_test.c delete mode 100644 tools/testing/selftests/kvm/s390x/memop.c delete mode 100644 tools/testing/selftests/kvm/s390x/resets.c delete mode 100644 tools/testing/selftests/kvm/s390x/shared_zeropage_test.c delete mode 100644 tools/testing/selftests/kvm/s390x/sync_regs_test.c delete mode 100644 tools/testing/selftests/kvm/s390x/tprot.c delete mode 100644 tools/testing/selftests/kvm/s390x/ucontrol_test.c create mode 100644 tools/testing/selftests/kvm/x86/amx_test.c create mode 100644 tools/testing/selftests/kvm/x86/apic_bus_clock_test.c create mode 100644 tools/testing/selftests/kvm/x86/cpuid_test.c create mode 100644 tools/testing/selftests/kvm/x86/cr4_cpuid_sync_test.c create mode 100644 tools/testing/selftests/kvm/x86/debug_regs.c create mode 100644 tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c create mode 100644 tools/testing/selftests/kvm/x86/exit_on_emulation_failure_test.c create mode 100644 tools/testing/selftests/kvm/x86/feature_msrs_test.c create mode 100644 tools/testing/selftests/kvm/x86/fix_hypercall_test.c create mode 100644 tools/testing/selftests/kvm/x86/flds_emulation.h create mode 100644 tools/testing/selftests/kvm/x86/hwcr_msr_test.c create mode 100644 tools/testing/selftests/kvm/x86/hyperv_clock.c create mode 100644 tools/testing/selftests/kvm/x86/hyperv_cpuid.c create mode 100644 tools/testing/selftests/kvm/x86/hyperv_evmcs.c create mode 100644 tools/testing/selftests/kvm/x86/hyperv_extended_hypercalls.c create mode 100644 tools/testing/selftests/kvm/x86/hyperv_features.c create mode 100644 tools/testing/selftests/kvm/x86/hyperv_ipi.c create mode 100644 tools/testing/selftests/kvm/x86/hyperv_svm_test.c create mode 100644 tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c create mode 100644 tools/testing/selftests/kvm/x86/kvm_clock_test.c create mode 100644 tools/testing/selftests/kvm/x86/kvm_pv_test.c create mode 100644 tools/testing/selftests/kvm/x86/max_vcpuid_cap_test.c create mode 100644 tools/testing/selftests/kvm/x86/monitor_mwait_test.c create mode 100644 tools/testing/selftests/kvm/x86/nested_exceptions_test.c create mode 100644 tools/testing/selftests/kvm/x86/nx_huge_pages_test.c create mode 100755 tools/testing/selftests/kvm/x86/nx_huge_pages_test.sh create mode 100644 tools/testing/selftests/kvm/x86/platform_info_test.c create mode 100644 tools/testing/selftests/kvm/x86/pmu_counters_test.c create mode 100644 tools/testing/selftests/kvm/x86/pmu_event_filter_test.c create mode 100644 tools/testing/selftests/kvm/x86/private_mem_conversions_test.c create mode 100644 tools/testing/selftests/kvm/x86/private_mem_kvm_exits_test.c create mode 100644 tools/testing/selftests/kvm/x86/recalc_apic_map_test.c create mode 100644 tools/testing/selftests/kvm/x86/set_boot_cpu_id.c create mode 100644 tools/testing/selftests/kvm/x86/set_sregs_test.c create mode 100644 tools/testing/selftests/kvm/x86/sev_init2_tests.c create mode 100644 tools/testing/selftests/kvm/x86/sev_migrate_tests.c create mode 100644 tools/testing/selftests/kvm/x86/sev_smoke_test.c create mode 100644 tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c create mode 100644 tools/testing/selftests/kvm/x86/smm_test.c create mode 100644 tools/testing/selftests/kvm/x86/state_test.c create mode 100644 tools/testing/selftests/kvm/x86/svm_int_ctl_test.c create mode 100644 tools/testing/selftests/kvm/x86/svm_nested_shutdown_test.c create mode 100644 tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c create mode 100644 tools/testing/selftests/kvm/x86/svm_vmcall_test.c create mode 100644 tools/testing/selftests/kvm/x86/sync_regs_test.c create mode 100644 tools/testing/selftests/kvm/x86/triple_fault_event_test.c create mode 100644 tools/testing/selftests/kvm/x86/tsc_msrs_test.c create mode 100644 tools/testing/selftests/kvm/x86/tsc_scaling_sync.c create mode 100644 tools/testing/selftests/kvm/x86/ucna_injection_test.c create mode 100644 tools/testing/selftests/kvm/x86/userspace_io_test.c create mode 100644 tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c create mode 100644 tools/testing/selftests/kvm/x86/vmx_apic_access_test.c create mode 100644 tools/testing/selftests/kvm/x86/vmx_close_while_nested_test.c create mode 100644 tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c create mode 100644 tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c create mode 100644 tools/testing/selftests/kvm/x86/vmx_invalid_nested_guest_state.c create mode 100644 tools/testing/selftests/kvm/x86/vmx_msrs_test.c create mode 100644 tools/testing/selftests/kvm/x86/vmx_nested_tsc_scaling_test.c create mode 100644 tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c create mode 100644 tools/testing/selftests/kvm/x86/vmx_preemption_timer_test.c create mode 100644 tools/testing/selftests/kvm/x86/vmx_set_nested_state_test.c create mode 100644 tools/testing/selftests/kvm/x86/vmx_tsc_adjust_test.c create mode 100644 tools/testing/selftests/kvm/x86/xapic_ipi_test.c create mode 100644 tools/testing/selftests/kvm/x86/xapic_state_test.c create mode 100644 tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c create mode 100644 tools/testing/selftests/kvm/x86/xen_shinfo_test.c create mode 100644 tools/testing/selftests/kvm/x86/xen_vmcall_test.c create mode 100644 tools/testing/selftests/kvm/x86/xss_msr_test.c delete mode 100644 tools/testing/selftests/kvm/x86_64/amx_test.c delete mode 100644 tools/testing/selftests/kvm/x86_64/apic_bus_clock_test.c delete mode 100644 tools/testing/selftests/kvm/x86_64/cpuid_test.c delete mode 100644 tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c delete mode 100644 tools/testing/selftests/kvm/x86_64/debug_regs.c delete mode 100644 tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c delete mode 100644 tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c delete mode 100644 tools/testing/selftests/kvm/x86_64/feature_msrs_test.c delete mode 100644 tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c delete mode 100644 tools/testing/selftests/kvm/x86_64/flds_emulation.h delete mode 100644 tools/testing/selftests/kvm/x86_64/hwcr_msr_test.c delete mode 100644 tools/testing/selftests/kvm/x86_64/hyperv_clock.c delete mode 100644 tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c delete mode 100644 tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c delete mode 100644 tools/testing/selftests/kvm/x86_64/hyperv_extended_hypercalls.c delete mode 100644 tools/testing/selftests/kvm/x86_64/hyperv_features.c delete mode 100644 tools/testing/selftests/kvm/x86_64/hyperv_ipi.c delete mode 100644 tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c delete mode 100644 tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c delete mode 100644 tools/testing/selftests/kvm/x86_64/kvm_clock_test.c delete mode 100644 tools/testing/selftests/kvm/x86_64/kvm_pv_test.c delete mode 100644 tools/testing/selftests/kvm/x86_64/max_vcpuid_cap_test.c delete mode 100644 tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c delete mode 100644 tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c delete mode 100644 tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c delete mode 100755 tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh delete mode 100644 tools/testing/selftests/kvm/x86_64/platform_info_test.c delete mode 100644 tools/testing/selftests/kvm/x86_64/pmu_counters_test.c delete mode 100644 tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c delete mode 100644 tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c delete mode 100644 tools/testing/selftests/kvm/x86_64/private_mem_kvm_exits_test.c delete mode 100644 tools/testing/selftests/kvm/x86_64/recalc_apic_map_test.c delete mode 100644 tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c delete mode 100644 tools/testing/selftests/kvm/x86_64/set_sregs_test.c delete mode 100644 tools/testing/selftests/kvm/x86_64/sev_init2_tests.c delete mode 100644 tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c delete mode 100644 tools/testing/selftests/kvm/x86_64/sev_smoke_test.c delete mode 100644 tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c delete mode 100644 tools/testing/selftests/kvm/x86_64/smm_test.c delete mode 100644 tools/testing/selftests/kvm/x86_64/state_test.c delete mode 100644 tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c delete mode 100644 tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c delete mode 100644 tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c delete mode 100644 tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c delete mode 100644 tools/testing/selftests/kvm/x86_64/sync_regs_test.c delete mode 100644 tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c delete mode 100644 tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c delete mode 100644 tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c delete mode 100644 tools/testing/selftests/kvm/x86_64/ucna_injection_test.c delete mode 100644 tools/testing/selftests/kvm/x86_64/userspace_io_test.c delete mode 100644 tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c delete mode 100644 tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c delete mode 100644 tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c delete mode 100644 tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c delete mode 100644 tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c delete mode 100644 tools/testing/selftests/kvm/x86_64/vmx_invalid_nested_guest_state.c delete mode 100644 tools/testing/selftests/kvm/x86_64/vmx_msrs_test.c delete mode 100644 tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c delete mode 100644 tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c delete mode 100644 tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c delete mode 100644 tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c delete mode 100644 tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c delete mode 100644 tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c delete mode 100644 tools/testing/selftests/kvm/x86_64/xapic_state_test.c delete mode 100644 tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c delete mode 100644 tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c delete mode 100644 tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c delete mode 100644 tools/testing/selftests/kvm/x86_64/xss_msr_test.c (limited to 'tools') diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 7b33464bf8cc..9bc2eba1af1c 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -4,16 +4,12 @@ include $(top_srcdir)/scripts/subarch.include ARCH ?= $(SUBARCH) ifeq ($(ARCH),$(filter $(ARCH),arm64 s390 riscv x86 x86_64)) -ifeq ($(ARCH),x86) - ARCH_DIR := x86_64 -else ifeq ($(ARCH),arm64) - ARCH_DIR := aarch64 -else ifeq ($(ARCH),s390) - ARCH_DIR := s390x +# Top-level selftests allows ARCH=x86_64 :-( +ifeq ($(ARCH),x86_64) + ARCH_DIR := x86 else ARCH_DIR := $(ARCH) endif - include Makefile.kvm else # Empty targets for unsupported architectures diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm index e988a72f8c20..9888dd6bb483 100644 --- a/tools/testing/selftests/kvm/Makefile.kvm +++ b/tools/testing/selftests/kvm/Makefile.kvm @@ -18,177 +18,177 @@ LIBKVM += lib/userfaultfd_util.c LIBKVM_STRING += lib/string_override.c -LIBKVM_x86_64 += lib/x86_64/apic.c -LIBKVM_x86_64 += lib/x86_64/handlers.S -LIBKVM_x86_64 += lib/x86_64/hyperv.c -LIBKVM_x86_64 += lib/x86_64/memstress.c -LIBKVM_x86_64 += lib/x86_64/pmu.c -LIBKVM_x86_64 += lib/x86_64/processor.c -LIBKVM_x86_64 += lib/x86_64/sev.c -LIBKVM_x86_64 += lib/x86_64/svm.c -LIBKVM_x86_64 += lib/x86_64/ucall.c -LIBKVM_x86_64 += lib/x86_64/vmx.c - -LIBKVM_aarch64 += lib/aarch64/gic.c -LIBKVM_aarch64 += lib/aarch64/gic_v3.c -LIBKVM_aarch64 += lib/aarch64/gic_v3_its.c -LIBKVM_aarch64 += lib/aarch64/handlers.S -LIBKVM_aarch64 += lib/aarch64/processor.c -LIBKVM_aarch64 += lib/aarch64/spinlock.c -LIBKVM_aarch64 += lib/aarch64/ucall.c -LIBKVM_aarch64 += lib/aarch64/vgic.c - -LIBKVM_s390x += lib/s390x/diag318_test_handler.c -LIBKVM_s390x += lib/s390x/processor.c -LIBKVM_s390x += lib/s390x/ucall.c -LIBKVM_s390x += lib/s390x/facility.c +LIBKVM_x86 += lib/x86/apic.c +LIBKVM_x86 += lib/x86/handlers.S +LIBKVM_x86 += lib/x86/hyperv.c +LIBKVM_x86 += lib/x86/memstress.c +LIBKVM_x86 += lib/x86/pmu.c +LIBKVM_x86 += lib/x86/processor.c +LIBKVM_x86 += lib/x86/sev.c +LIBKVM_x86 += lib/x86/svm.c +LIBKVM_x86 += lib/x86/ucall.c +LIBKVM_x86 += lib/x86/vmx.c + +LIBKVM_arm64 += lib/arm64/gic.c +LIBKVM_arm64 += lib/arm64/gic_v3.c +LIBKVM_arm64 += lib/arm64/gic_v3_its.c +LIBKVM_arm64 += lib/arm64/handlers.S +LIBKVM_arm64 += lib/arm64/processor.c +LIBKVM_arm64 += lib/arm64/spinlock.c +LIBKVM_arm64 += lib/arm64/ucall.c +LIBKVM_arm64 += lib/arm64/vgic.c + +LIBKVM_s390 += lib/s390/diag318_test_handler.c +LIBKVM_s390 += lib/s390/processor.c +LIBKVM_s390 += lib/s390/ucall.c +LIBKVM_s390 += lib/s390/facility.c LIBKVM_riscv += lib/riscv/handlers.S LIBKVM_riscv += lib/riscv/processor.c LIBKVM_riscv += lib/riscv/ucall.c # Non-compiled test targets -TEST_PROGS_x86_64 += x86_64/nx_huge_pages_test.sh +TEST_PROGS_x86 += x86/nx_huge_pages_test.sh # Compiled test targets -TEST_GEN_PROGS_x86_64 = x86_64/cpuid_test -TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test -TEST_GEN_PROGS_x86_64 += x86_64/dirty_log_page_splitting_test -TEST_GEN_PROGS_x86_64 += x86_64/feature_msrs_test -TEST_GEN_PROGS_x86_64 += x86_64/exit_on_emulation_failure_test -TEST_GEN_PROGS_x86_64 += x86_64/fix_hypercall_test -TEST_GEN_PROGS_x86_64 += x86_64/hwcr_msr_test -TEST_GEN_PROGS_x86_64 += x86_64/hyperv_clock -TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid -TEST_GEN_PROGS_x86_64 += x86_64/hyperv_evmcs -TEST_GEN_PROGS_x86_64 += x86_64/hyperv_extended_hypercalls -TEST_GEN_PROGS_x86_64 += x86_64/hyperv_features -TEST_GEN_PROGS_x86_64 += x86_64/hyperv_ipi -TEST_GEN_PROGS_x86_64 += x86_64/hyperv_svm_test -TEST_GEN_PROGS_x86_64 += x86_64/hyperv_tlb_flush -TEST_GEN_PROGS_x86_64 += x86_64/kvm_clock_test -TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test -TEST_GEN_PROGS_x86_64 += x86_64/monitor_mwait_test -TEST_GEN_PROGS_x86_64 += x86_64/nested_exceptions_test -TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test -TEST_GEN_PROGS_x86_64 += x86_64/pmu_counters_test -TEST_GEN_PROGS_x86_64 += x86_64/pmu_event_filter_test -TEST_GEN_PROGS_x86_64 += x86_64/private_mem_conversions_test -TEST_GEN_PROGS_x86_64 += x86_64/private_mem_kvm_exits_test -TEST_GEN_PROGS_x86_64 += x86_64/set_boot_cpu_id -TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test -TEST_GEN_PROGS_x86_64 += x86_64/smaller_maxphyaddr_emulation_test -TEST_GEN_PROGS_x86_64 += x86_64/smm_test -TEST_GEN_PROGS_x86_64 += x86_64/state_test -TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test -TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test -TEST_GEN_PROGS_x86_64 += x86_64/svm_int_ctl_test -TEST_GEN_PROGS_x86_64 += x86_64/svm_nested_shutdown_test -TEST_GEN_PROGS_x86_64 += x86_64/svm_nested_soft_inject_test -TEST_GEN_PROGS_x86_64 += x86_64/tsc_scaling_sync -TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test -TEST_GEN_PROGS_x86_64 += x86_64/ucna_injection_test -TEST_GEN_PROGS_x86_64 += x86_64/userspace_io_test -TEST_GEN_PROGS_x86_64 += x86_64/userspace_msr_exit_test -TEST_GEN_PROGS_x86_64 += x86_64/vmx_apic_access_test -TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test -TEST_GEN_PROGS_x86_64 += x86_64/vmx_dirty_log_test -TEST_GEN_PROGS_x86_64 += x86_64/vmx_exception_with_invalid_guest_state -TEST_GEN_PROGS_x86_64 += x86_64/vmx_msrs_test -TEST_GEN_PROGS_x86_64 += x86_64/vmx_invalid_nested_guest_state -TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test -TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test -TEST_GEN_PROGS_x86_64 += x86_64/vmx_nested_tsc_scaling_test -TEST_GEN_PROGS_x86_64 += x86_64/apic_bus_clock_test -TEST_GEN_PROGS_x86_64 += x86_64/xapic_ipi_test -TEST_GEN_PROGS_x86_64 += x86_64/xapic_state_test -TEST_GEN_PROGS_x86_64 += x86_64/xcr0_cpuid_test -TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test -TEST_GEN_PROGS_x86_64 += x86_64/debug_regs -TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test -TEST_GEN_PROGS_x86_64 += x86_64/vmx_pmu_caps_test -TEST_GEN_PROGS_x86_64 += x86_64/xen_shinfo_test -TEST_GEN_PROGS_x86_64 += x86_64/xen_vmcall_test -TEST_GEN_PROGS_x86_64 += x86_64/sev_init2_tests -TEST_GEN_PROGS_x86_64 += x86_64/sev_migrate_tests -TEST_GEN_PROGS_x86_64 += x86_64/sev_smoke_test -TEST_GEN_PROGS_x86_64 += x86_64/amx_test -TEST_GEN_PROGS_x86_64 += x86_64/max_vcpuid_cap_test -TEST_GEN_PROGS_x86_64 += x86_64/triple_fault_event_test -TEST_GEN_PROGS_x86_64 += x86_64/recalc_apic_map_test -TEST_GEN_PROGS_x86_64 += access_tracking_perf_test -TEST_GEN_PROGS_x86_64 += coalesced_io_test -TEST_GEN_PROGS_x86_64 += demand_paging_test -TEST_GEN_PROGS_x86_64 += dirty_log_test -TEST_GEN_PROGS_x86_64 += dirty_log_perf_test -TEST_GEN_PROGS_x86_64 += guest_memfd_test -TEST_GEN_PROGS_x86_64 += guest_print_test -TEST_GEN_PROGS_x86_64 += hardware_disable_test -TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus -TEST_GEN_PROGS_x86_64 += kvm_page_table_test -TEST_GEN_PROGS_x86_64 += mmu_stress_test -TEST_GEN_PROGS_x86_64 += memslot_modification_stress_test -TEST_GEN_PROGS_x86_64 += memslot_perf_test -TEST_GEN_PROGS_x86_64 += rseq_test -TEST_GEN_PROGS_x86_64 += set_memory_region_test -TEST_GEN_PROGS_x86_64 += steal_time -TEST_GEN_PROGS_x86_64 += kvm_binary_stats_test -TEST_GEN_PROGS_x86_64 += system_counter_offset_test -TEST_GEN_PROGS_x86_64 += pre_fault_memory_test +TEST_GEN_PROGS_x86 = x86/cpuid_test +TEST_GEN_PROGS_x86 += x86/cr4_cpuid_sync_test +TEST_GEN_PROGS_x86 += x86/dirty_log_page_splitting_test +TEST_GEN_PROGS_x86 += x86/feature_msrs_test +TEST_GEN_PROGS_x86 += x86/exit_on_emulation_failure_test +TEST_GEN_PROGS_x86 += x86/fix_hypercall_test +TEST_GEN_PROGS_x86 += x86/hwcr_msr_test +TEST_GEN_PROGS_x86 += x86/hyperv_clock +TEST_GEN_PROGS_x86 += x86/hyperv_cpuid +TEST_GEN_PROGS_x86 += x86/hyperv_evmcs +TEST_GEN_PROGS_x86 += x86/hyperv_extended_hypercalls +TEST_GEN_PROGS_x86 += x86/hyperv_features +TEST_GEN_PROGS_x86 += x86/hyperv_ipi +TEST_GEN_PROGS_x86 += x86/hyperv_svm_test +TEST_GEN_PROGS_x86 += x86/hyperv_tlb_flush +TEST_GEN_PROGS_x86 += x86/kvm_clock_test +TEST_GEN_PROGS_x86 += x86/kvm_pv_test +TEST_GEN_PROGS_x86 += x86/monitor_mwait_test +TEST_GEN_PROGS_x86 += x86/nested_exceptions_test +TEST_GEN_PROGS_x86 += x86/platform_info_test +TEST_GEN_PROGS_x86 += x86/pmu_counters_test +TEST_GEN_PROGS_x86 += x86/pmu_event_filter_test +TEST_GEN_PROGS_x86 += x86/private_mem_conversions_test +TEST_GEN_PROGS_x86 += x86/private_mem_kvm_exits_test +TEST_GEN_PROGS_x86 += x86/set_boot_cpu_id +TEST_GEN_PROGS_x86 += x86/set_sregs_test +TEST_GEN_PROGS_x86 += x86/smaller_maxphyaddr_emulation_test +TEST_GEN_PROGS_x86 += x86/smm_test +TEST_GEN_PROGS_x86 += x86/state_test +TEST_GEN_PROGS_x86 += x86/vmx_preemption_timer_test +TEST_GEN_PROGS_x86 += x86/svm_vmcall_test +TEST_GEN_PROGS_x86 += x86/svm_int_ctl_test +TEST_GEN_PROGS_x86 += x86/svm_nested_shutdown_test +TEST_GEN_PROGS_x86 += x86/svm_nested_soft_inject_test +TEST_GEN_PROGS_x86 += x86/tsc_scaling_sync +TEST_GEN_PROGS_x86 += x86/sync_regs_test +TEST_GEN_PROGS_x86 += x86/ucna_injection_test +TEST_GEN_PROGS_x86 += x86/userspace_io_test +TEST_GEN_PROGS_x86 += x86/userspace_msr_exit_test +TEST_GEN_PROGS_x86 += x86/vmx_apic_access_test +TEST_GEN_PROGS_x86 += x86/vmx_close_while_nested_test +TEST_GEN_PROGS_x86 += x86/vmx_dirty_log_test +TEST_GEN_PROGS_x86 += x86/vmx_exception_with_invalid_guest_state +TEST_GEN_PROGS_x86 += x86/vmx_msrs_test +TEST_GEN_PROGS_x86 += x86/vmx_invalid_nested_guest_state +TEST_GEN_PROGS_x86 += x86/vmx_set_nested_state_test +TEST_GEN_PROGS_x86 += x86/vmx_tsc_adjust_test +TEST_GEN_PROGS_x86 += x86/vmx_nested_tsc_scaling_test +TEST_GEN_PROGS_x86 += x86/apic_bus_clock_test +TEST_GEN_PROGS_x86 += x86/xapic_ipi_test +TEST_GEN_PROGS_x86 += x86/xapic_state_test +TEST_GEN_PROGS_x86 += x86/xcr0_cpuid_test +TEST_GEN_PROGS_x86 += x86/xss_msr_test +TEST_GEN_PROGS_x86 += x86/debug_regs +TEST_GEN_PROGS_x86 += x86/tsc_msrs_test +TEST_GEN_PROGS_x86 += x86/vmx_pmu_caps_test +TEST_GEN_PROGS_x86 += x86/xen_shinfo_test +TEST_GEN_PROGS_x86 += x86/xen_vmcall_test +TEST_GEN_PROGS_x86 += x86/sev_init2_tests +TEST_GEN_PROGS_x86 += x86/sev_migrate_tests +TEST_GEN_PROGS_x86 += x86/sev_smoke_test +TEST_GEN_PROGS_x86 += x86/amx_test +TEST_GEN_PROGS_x86 += x86/max_vcpuid_cap_test +TEST_GEN_PROGS_x86 += x86/triple_fault_event_test +TEST_GEN_PROGS_x86 += x86/recalc_apic_map_test +TEST_GEN_PROGS_x86 += access_tracking_perf_test +TEST_GEN_PROGS_x86 += coalesced_io_test +TEST_GEN_PROGS_x86 += demand_paging_test +TEST_GEN_PROGS_x86 += dirty_log_test +TEST_GEN_PROGS_x86 += dirty_log_perf_test +TEST_GEN_PROGS_x86 += guest_memfd_test +TEST_GEN_PROGS_x86 += guest_print_test +TEST_GEN_PROGS_x86 += hardware_disable_test +TEST_GEN_PROGS_x86 += kvm_create_max_vcpus +TEST_GEN_PROGS_x86 += kvm_page_table_test +TEST_GEN_PROGS_x86 += memslot_modification_stress_test +TEST_GEN_PROGS_x86 += memslot_perf_test +TEST_GEN_PROGS_x86 += mmu_stress_test +TEST_GEN_PROGS_x86 += rseq_test +TEST_GEN_PROGS_x86 += set_memory_region_test +TEST_GEN_PROGS_x86 += steal_time +TEST_GEN_PROGS_x86 += kvm_binary_stats_test +TEST_GEN_PROGS_x86 += system_counter_offset_test +TEST_GEN_PROGS_x86 += pre_fault_memory_test # Compiled outputs used by test targets -TEST_GEN_PROGS_EXTENDED_x86_64 += x86_64/nx_huge_pages_test - -TEST_GEN_PROGS_aarch64 += aarch64/aarch32_id_regs -TEST_GEN_PROGS_aarch64 += aarch64/arch_timer_edge_cases -TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions -TEST_GEN_PROGS_aarch64 += aarch64/hypercalls -TEST_GEN_PROGS_aarch64 += aarch64/mmio_abort -TEST_GEN_PROGS_aarch64 += aarch64/page_fault_test -TEST_GEN_PROGS_aarch64 += aarch64/psci_test -TEST_GEN_PROGS_aarch64 += aarch64/set_id_regs -TEST_GEN_PROGS_aarch64 += aarch64/smccc_filter -TEST_GEN_PROGS_aarch64 += aarch64/vcpu_width_config -TEST_GEN_PROGS_aarch64 += aarch64/vgic_init -TEST_GEN_PROGS_aarch64 += aarch64/vgic_irq -TEST_GEN_PROGS_aarch64 += aarch64/vgic_lpi_stress -TEST_GEN_PROGS_aarch64 += aarch64/vpmu_counter_access -TEST_GEN_PROGS_aarch64 += aarch64/no-vgic-v3 -TEST_GEN_PROGS_aarch64 += access_tracking_perf_test -TEST_GEN_PROGS_aarch64 += arch_timer -TEST_GEN_PROGS_aarch64 += coalesced_io_test -TEST_GEN_PROGS_aarch64 += demand_paging_test -TEST_GEN_PROGS_aarch64 += dirty_log_test -TEST_GEN_PROGS_aarch64 += dirty_log_perf_test -TEST_GEN_PROGS_aarch64 += guest_print_test -TEST_GEN_PROGS_aarch64 += get-reg-list -TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus -TEST_GEN_PROGS_aarch64 += kvm_page_table_test -TEST_GEN_PROGS_aarch64 += memslot_modification_stress_test -TEST_GEN_PROGS_aarch64 += memslot_perf_test -TEST_GEN_PROGS_aarch64 += mmu_stress_test -TEST_GEN_PROGS_aarch64 += rseq_test -TEST_GEN_PROGS_aarch64 += set_memory_region_test -TEST_GEN_PROGS_aarch64 += steal_time -TEST_GEN_PROGS_aarch64 += kvm_binary_stats_test - -TEST_GEN_PROGS_s390x = s390x/memop -TEST_GEN_PROGS_s390x += s390x/resets -TEST_GEN_PROGS_s390x += s390x/sync_regs_test -TEST_GEN_PROGS_s390x += s390x/tprot -TEST_GEN_PROGS_s390x += s390x/cmma_test -TEST_GEN_PROGS_s390x += s390x/debug_test -TEST_GEN_PROGS_s390x += s390x/cpumodel_subfuncs_test -TEST_GEN_PROGS_s390x += s390x/shared_zeropage_test -TEST_GEN_PROGS_s390x += s390x/ucontrol_test -TEST_GEN_PROGS_s390x += demand_paging_test -TEST_GEN_PROGS_s390x += dirty_log_test -TEST_GEN_PROGS_s390x += guest_print_test -TEST_GEN_PROGS_s390x += kvm_create_max_vcpus -TEST_GEN_PROGS_s390x += kvm_page_table_test -TEST_GEN_PROGS_s390x += rseq_test -TEST_GEN_PROGS_s390x += set_memory_region_test -TEST_GEN_PROGS_s390x += kvm_binary_stats_test +TEST_GEN_PROGS_EXTENDED_x86 += x86/nx_huge_pages_test + +TEST_GEN_PROGS_arm64 += arm64/aarch32_id_regs +TEST_GEN_PROGS_arm64 += arm64/arch_timer_edge_cases +TEST_GEN_PROGS_arm64 += arm64/debug-exceptions +TEST_GEN_PROGS_arm64 += arm64/hypercalls +TEST_GEN_PROGS_arm64 += arm64/mmio_abort +TEST_GEN_PROGS_arm64 += arm64/page_fault_test +TEST_GEN_PROGS_arm64 += arm64/psci_test +TEST_GEN_PROGS_arm64 += arm64/set_id_regs +TEST_GEN_PROGS_arm64 += arm64/smccc_filter +TEST_GEN_PROGS_arm64 += arm64/vcpu_width_config +TEST_GEN_PROGS_arm64 += arm64/vgic_init +TEST_GEN_PROGS_arm64 += arm64/vgic_irq +TEST_GEN_PROGS_arm64 += arm64/vgic_lpi_stress +TEST_GEN_PROGS_arm64 += arm64/vpmu_counter_access +TEST_GEN_PROGS_arm64 += arm64/no-vgic-v3 +TEST_GEN_PROGS_arm64 += access_tracking_perf_test +TEST_GEN_PROGS_arm64 += arch_timer +TEST_GEN_PROGS_arm64 += coalesced_io_test +TEST_GEN_PROGS_arm64 += demand_paging_test +TEST_GEN_PROGS_arm64 += dirty_log_test +TEST_GEN_PROGS_arm64 += dirty_log_perf_test +TEST_GEN_PROGS_arm64 += guest_print_test +TEST_GEN_PROGS_arm64 += get-reg-list +TEST_GEN_PROGS_arm64 += kvm_create_max_vcpus +TEST_GEN_PROGS_arm64 += kvm_page_table_test +TEST_GEN_PROGS_arm64 += memslot_modification_stress_test +TEST_GEN_PROGS_arm64 += memslot_perf_test +TEST_GEN_PROGS_arm64 += mmu_stress_test +TEST_GEN_PROGS_arm64 += rseq_test +TEST_GEN_PROGS_arm64 += set_memory_region_test +TEST_GEN_PROGS_arm64 += steal_time +TEST_GEN_PROGS_arm64 += kvm_binary_stats_test + +TEST_GEN_PROGS_s390 = s390/memop +TEST_GEN_PROGS_s390 += s390/resets +TEST_GEN_PROGS_s390 += s390/sync_regs_test +TEST_GEN_PROGS_s390 += s390/tprot +TEST_GEN_PROGS_s390 += s390/cmma_test +TEST_GEN_PROGS_s390 += s390/debug_test +TEST_GEN_PROGS_s390 += s390/cpumodel_subfuncs_test +TEST_GEN_PROGS_s390 += s390/shared_zeropage_test +TEST_GEN_PROGS_s390 += s390/ucontrol_test +TEST_GEN_PROGS_s390 += demand_paging_test +TEST_GEN_PROGS_s390 += dirty_log_test +TEST_GEN_PROGS_s390 += guest_print_test +TEST_GEN_PROGS_s390 += kvm_create_max_vcpus +TEST_GEN_PROGS_s390 += kvm_page_table_test +TEST_GEN_PROGS_s390 += rseq_test +TEST_GEN_PROGS_s390 += set_memory_region_test +TEST_GEN_PROGS_s390 += kvm_binary_stats_test TEST_GEN_PROGS_riscv += riscv/sbi_pmu_test TEST_GEN_PROGS_riscv += riscv/ebreak_test @@ -222,11 +222,7 @@ include ../lib.mk INSTALL_HDR_PATH = $(top_srcdir)/usr LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/ LINUX_TOOL_INCLUDE = $(top_srcdir)/tools/include -ifeq ($(ARCH),x86_64) -LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/x86/include -else -LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include -endif +LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH_DIR)/include CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \ -Wno-gnu-variable-sized-type-not-at-end -MD -MP -DCONFIG_64BIT \ -fno-builtin-memcmp -fno-builtin-memcpy \ diff --git a/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c b/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c deleted file mode 100644 index 447d61cae4db..000000000000 --- a/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c +++ /dev/null @@ -1,167 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * aarch32_id_regs - Test for ID register behavior on AArch64-only systems - * - * Copyright (c) 2022 Google LLC. - * - * Test that KVM handles the AArch64 views of the AArch32 ID registers as RAZ - * and WI from userspace. - */ - -#include - -#include "kvm_util.h" -#include "processor.h" -#include "test_util.h" -#include - -#define BAD_ID_REG_VAL 0x1badc0deul - -#define GUEST_ASSERT_REG_RAZ(reg) GUEST_ASSERT_EQ(read_sysreg_s(reg), 0) - -static void guest_main(void) -{ - GUEST_ASSERT_REG_RAZ(SYS_ID_PFR0_EL1); - GUEST_ASSERT_REG_RAZ(SYS_ID_PFR1_EL1); - GUEST_ASSERT_REG_RAZ(SYS_ID_DFR0_EL1); - GUEST_ASSERT_REG_RAZ(SYS_ID_AFR0_EL1); - GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR0_EL1); - GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR1_EL1); - GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR2_EL1); - GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR3_EL1); - GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR0_EL1); - GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR1_EL1); - GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR2_EL1); - GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR3_EL1); - GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR4_EL1); - GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR5_EL1); - GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR4_EL1); - GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR6_EL1); - GUEST_ASSERT_REG_RAZ(SYS_MVFR0_EL1); - GUEST_ASSERT_REG_RAZ(SYS_MVFR1_EL1); - GUEST_ASSERT_REG_RAZ(SYS_MVFR2_EL1); - GUEST_ASSERT_REG_RAZ(sys_reg(3, 0, 0, 3, 3)); - GUEST_ASSERT_REG_RAZ(SYS_ID_PFR2_EL1); - GUEST_ASSERT_REG_RAZ(SYS_ID_DFR1_EL1); - GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR5_EL1); - GUEST_ASSERT_REG_RAZ(sys_reg(3, 0, 0, 3, 7)); - - GUEST_DONE(); -} - -static void test_guest_raz(struct kvm_vcpu *vcpu) -{ - struct ucall uc; - - vcpu_run(vcpu); - - switch (get_ucall(vcpu, &uc)) { - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - break; - case UCALL_DONE: - break; - default: - TEST_FAIL("Unexpected ucall: %lu", uc.cmd); - } -} - -static uint64_t raz_wi_reg_ids[] = { - KVM_ARM64_SYS_REG(SYS_ID_PFR0_EL1), - KVM_ARM64_SYS_REG(SYS_ID_PFR1_EL1), - KVM_ARM64_SYS_REG(SYS_ID_DFR0_EL1), - KVM_ARM64_SYS_REG(SYS_ID_MMFR0_EL1), - KVM_ARM64_SYS_REG(SYS_ID_MMFR1_EL1), - KVM_ARM64_SYS_REG(SYS_ID_MMFR2_EL1), - KVM_ARM64_SYS_REG(SYS_ID_MMFR3_EL1), - KVM_ARM64_SYS_REG(SYS_ID_ISAR0_EL1), - KVM_ARM64_SYS_REG(SYS_ID_ISAR1_EL1), - KVM_ARM64_SYS_REG(SYS_ID_ISAR2_EL1), - KVM_ARM64_SYS_REG(SYS_ID_ISAR3_EL1), - KVM_ARM64_SYS_REG(SYS_ID_ISAR4_EL1), - KVM_ARM64_SYS_REG(SYS_ID_ISAR5_EL1), - KVM_ARM64_SYS_REG(SYS_ID_MMFR4_EL1), - KVM_ARM64_SYS_REG(SYS_ID_ISAR6_EL1), - KVM_ARM64_SYS_REG(SYS_MVFR0_EL1), - KVM_ARM64_SYS_REG(SYS_MVFR1_EL1), - KVM_ARM64_SYS_REG(SYS_MVFR2_EL1), - KVM_ARM64_SYS_REG(SYS_ID_PFR2_EL1), - KVM_ARM64_SYS_REG(SYS_ID_MMFR5_EL1), -}; - -static void test_user_raz_wi(struct kvm_vcpu *vcpu) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(raz_wi_reg_ids); i++) { - uint64_t reg_id = raz_wi_reg_ids[i]; - uint64_t val; - - val = vcpu_get_reg(vcpu, reg_id); - TEST_ASSERT_EQ(val, 0); - - /* - * Expect the ioctl to succeed with no effect on the register - * value. - */ - vcpu_set_reg(vcpu, reg_id, BAD_ID_REG_VAL); - - val = vcpu_get_reg(vcpu, reg_id); - TEST_ASSERT_EQ(val, 0); - } -} - -static uint64_t raz_invariant_reg_ids[] = { - KVM_ARM64_SYS_REG(SYS_ID_AFR0_EL1), - KVM_ARM64_SYS_REG(sys_reg(3, 0, 0, 3, 3)), - KVM_ARM64_SYS_REG(SYS_ID_DFR1_EL1), - KVM_ARM64_SYS_REG(sys_reg(3, 0, 0, 3, 7)), -}; - -static void test_user_raz_invariant(struct kvm_vcpu *vcpu) -{ - int i, r; - - for (i = 0; i < ARRAY_SIZE(raz_invariant_reg_ids); i++) { - uint64_t reg_id = raz_invariant_reg_ids[i]; - uint64_t val; - - val = vcpu_get_reg(vcpu, reg_id); - TEST_ASSERT_EQ(val, 0); - - r = __vcpu_set_reg(vcpu, reg_id, BAD_ID_REG_VAL); - TEST_ASSERT(r < 0 && errno == EINVAL, - "unexpected KVM_SET_ONE_REG error: r=%d, errno=%d", r, errno); - - val = vcpu_get_reg(vcpu, reg_id); - TEST_ASSERT_EQ(val, 0); - } -} - - - -static bool vcpu_aarch64_only(struct kvm_vcpu *vcpu) -{ - uint64_t val, el0; - - val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1)); - - el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), val); - return el0 == ID_AA64PFR0_EL1_ELx_64BIT_ONLY; -} - -int main(void) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - - vm = vm_create_with_one_vcpu(&vcpu, guest_main); - - TEST_REQUIRE(vcpu_aarch64_only(vcpu)); - - test_user_raz_wi(vcpu); - test_user_raz_invariant(vcpu); - test_guest_raz(vcpu); - - kvm_vm_free(vm); -} diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer.c b/tools/testing/selftests/kvm/aarch64/arch_timer.c deleted file mode 100644 index eeba1cc87ff8..000000000000 --- a/tools/testing/selftests/kvm/aarch64/arch_timer.c +++ /dev/null @@ -1,220 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * The test validates both the virtual and physical timer IRQs using - * CVAL and TVAL registers. - * - * Copyright (c) 2021, Google LLC. - */ -#include "arch_timer.h" -#include "delay.h" -#include "gic.h" -#include "processor.h" -#include "timer_test.h" -#include "ucall_common.h" -#include "vgic.h" - -enum guest_stage { - GUEST_STAGE_VTIMER_CVAL = 1, - GUEST_STAGE_VTIMER_TVAL, - GUEST_STAGE_PTIMER_CVAL, - GUEST_STAGE_PTIMER_TVAL, - GUEST_STAGE_MAX, -}; - -static int vtimer_irq, ptimer_irq; - -static void -guest_configure_timer_action(struct test_vcpu_shared_data *shared_data) -{ - switch (shared_data->guest_stage) { - case GUEST_STAGE_VTIMER_CVAL: - timer_set_next_cval_ms(VIRTUAL, test_args.timer_period_ms); - shared_data->xcnt = timer_get_cntct(VIRTUAL); - timer_set_ctl(VIRTUAL, CTL_ENABLE); - break; - case GUEST_STAGE_VTIMER_TVAL: - timer_set_next_tval_ms(VIRTUAL, test_args.timer_period_ms); - shared_data->xcnt = timer_get_cntct(VIRTUAL); - timer_set_ctl(VIRTUAL, CTL_ENABLE); - break; - case GUEST_STAGE_PTIMER_CVAL: - timer_set_next_cval_ms(PHYSICAL, test_args.timer_period_ms); - shared_data->xcnt = timer_get_cntct(PHYSICAL); - timer_set_ctl(PHYSICAL, CTL_ENABLE); - break; - case GUEST_STAGE_PTIMER_TVAL: - timer_set_next_tval_ms(PHYSICAL, test_args.timer_period_ms); - shared_data->xcnt = timer_get_cntct(PHYSICAL); - timer_set_ctl(PHYSICAL, CTL_ENABLE); - break; - default: - GUEST_ASSERT(0); - } -} - -static void guest_validate_irq(unsigned int intid, - struct test_vcpu_shared_data *shared_data) -{ - enum guest_stage stage = shared_data->guest_stage; - uint64_t xcnt = 0, xcnt_diff_us, cval = 0; - unsigned long xctl = 0; - unsigned int timer_irq = 0; - unsigned int accessor; - - if (intid == IAR_SPURIOUS) - return; - - switch (stage) { - case GUEST_STAGE_VTIMER_CVAL: - case GUEST_STAGE_VTIMER_TVAL: - accessor = VIRTUAL; - timer_irq = vtimer_irq; - break; - case GUEST_STAGE_PTIMER_CVAL: - case GUEST_STAGE_PTIMER_TVAL: - accessor = PHYSICAL; - timer_irq = ptimer_irq; - break; - default: - GUEST_ASSERT(0); - return; - } - - xctl = timer_get_ctl(accessor); - if ((xctl & CTL_IMASK) || !(xctl & CTL_ENABLE)) - return; - - timer_set_ctl(accessor, CTL_IMASK); - xcnt = timer_get_cntct(accessor); - cval = timer_get_cval(accessor); - - xcnt_diff_us = cycles_to_usec(xcnt - shared_data->xcnt); - - /* Make sure we are dealing with the correct timer IRQ */ - GUEST_ASSERT_EQ(intid, timer_irq); - - /* Basic 'timer condition met' check */ - __GUEST_ASSERT(xcnt >= cval, - "xcnt = 0x%lx, cval = 0x%lx, xcnt_diff_us = 0x%lx", - xcnt, cval, xcnt_diff_us); - __GUEST_ASSERT(xctl & CTL_ISTATUS, "xctl = 0x%lx", xctl); - - WRITE_ONCE(shared_data->nr_iter, shared_data->nr_iter + 1); -} - -static void guest_irq_handler(struct ex_regs *regs) -{ - unsigned int intid = gic_get_and_ack_irq(); - uint32_t cpu = guest_get_vcpuid(); - struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu]; - - guest_validate_irq(intid, shared_data); - - gic_set_eoi(intid); -} - -static void guest_run_stage(struct test_vcpu_shared_data *shared_data, - enum guest_stage stage) -{ - uint32_t irq_iter, config_iter; - - shared_data->guest_stage = stage; - shared_data->nr_iter = 0; - - for (config_iter = 0; config_iter < test_args.nr_iter; config_iter++) { - /* Setup the next interrupt */ - guest_configure_timer_action(shared_data); - - /* Setup a timeout for the interrupt to arrive */ - udelay(msecs_to_usecs(test_args.timer_period_ms) + - test_args.timer_err_margin_us); - - irq_iter = READ_ONCE(shared_data->nr_iter); - __GUEST_ASSERT(config_iter + 1 == irq_iter, - "config_iter + 1 = 0x%x, irq_iter = 0x%x.\n" - " Guest timer interrupt was not triggered within the specified\n" - " interval, try to increase the error margin by [-e] option.\n", - config_iter + 1, irq_iter); - } -} - -static void guest_code(void) -{ - uint32_t cpu = guest_get_vcpuid(); - struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu]; - - local_irq_disable(); - - gic_init(GIC_V3, test_args.nr_vcpus); - - timer_set_ctl(VIRTUAL, CTL_IMASK); - timer_set_ctl(PHYSICAL, CTL_IMASK); - - gic_irq_enable(vtimer_irq); - gic_irq_enable(ptimer_irq); - local_irq_enable(); - - guest_run_stage(shared_data, GUEST_STAGE_VTIMER_CVAL); - guest_run_stage(shared_data, GUEST_STAGE_VTIMER_TVAL); - guest_run_stage(shared_data, GUEST_STAGE_PTIMER_CVAL); - guest_run_stage(shared_data, GUEST_STAGE_PTIMER_TVAL); - - GUEST_DONE(); -} - -static void test_init_timer_irq(struct kvm_vm *vm) -{ - /* Timer initid should be same for all the vCPUs, so query only vCPU-0 */ - vcpu_device_attr_get(vcpus[0], KVM_ARM_VCPU_TIMER_CTRL, - KVM_ARM_VCPU_TIMER_IRQ_PTIMER, &ptimer_irq); - vcpu_device_attr_get(vcpus[0], KVM_ARM_VCPU_TIMER_CTRL, - KVM_ARM_VCPU_TIMER_IRQ_VTIMER, &vtimer_irq); - - sync_global_to_guest(vm, ptimer_irq); - sync_global_to_guest(vm, vtimer_irq); - - pr_debug("ptimer_irq: %d; vtimer_irq: %d\n", ptimer_irq, vtimer_irq); -} - -static int gic_fd; - -struct kvm_vm *test_vm_create(void) -{ - struct kvm_vm *vm; - unsigned int i; - int nr_vcpus = test_args.nr_vcpus; - - vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus); - - vm_init_descriptor_tables(vm); - vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT, guest_irq_handler); - - if (!test_args.reserved) { - if (kvm_has_cap(KVM_CAP_COUNTER_OFFSET)) { - struct kvm_arm_counter_offset offset = { - .counter_offset = test_args.counter_offset, - .reserved = 0, - }; - vm_ioctl(vm, KVM_ARM_SET_COUNTER_OFFSET, &offset); - } else - TEST_FAIL("no support for global offset"); - } - - for (i = 0; i < nr_vcpus; i++) - vcpu_init_descriptor_tables(vcpus[i]); - - test_init_timer_irq(vm); - gic_fd = vgic_v3_setup(vm, nr_vcpus, 64); - __TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3"); - - /* Make all the test's cmdline args visible to the guest */ - sync_global_to_guest(vm, test_args); - - return vm; -} - -void test_vm_cleanup(struct kvm_vm *vm) -{ - close(gic_fd); - kvm_vm_free(vm); -} diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer_edge_cases.c b/tools/testing/selftests/kvm/aarch64/arch_timer_edge_cases.c deleted file mode 100644 index a36a7e2db434..000000000000 --- a/tools/testing/selftests/kvm/aarch64/arch_timer_edge_cases.c +++ /dev/null @@ -1,1062 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * arch_timer_edge_cases.c - Tests the aarch64 timer IRQ functionality. - * - * The test validates some edge cases related to the arch-timer: - * - timers above the max TVAL value. - * - timers in the past - * - moving counters ahead and behind pending timers. - * - reprograming timers. - * - timers fired multiple times. - * - masking/unmasking using the timer control mask. - * - * Copyright (c) 2021, Google LLC. - */ - -#define _GNU_SOURCE - -#include -#include - -#include "arch_timer.h" -#include "gic.h" -#include "vgic.h" - -static const uint64_t CVAL_MAX = ~0ULL; -/* tval is a signed 32-bit int. */ -static const int32_t TVAL_MAX = INT32_MAX; -static const int32_t TVAL_MIN = INT32_MIN; - -/* After how much time we say there is no IRQ. */ -static const uint32_t TIMEOUT_NO_IRQ_US = 50000; - -/* A nice counter value to use as the starting one for most tests. */ -static const uint64_t DEF_CNT = (CVAL_MAX / 2); - -/* Number of runs. */ -static const uint32_t NR_TEST_ITERS_DEF = 5; - -/* Default wait test time in ms. */ -static const uint32_t WAIT_TEST_MS = 10; - -/* Default "long" wait test time in ms. */ -static const uint32_t LONG_WAIT_TEST_MS = 100; - -/* Shared with IRQ handler. */ -struct test_vcpu_shared_data { - atomic_t handled; - atomic_t spurious; -} shared_data; - -struct test_args { - /* Virtual or physical timer and counter tests. */ - enum arch_timer timer; - /* Delay used for most timer tests. */ - uint64_t wait_ms; - /* Delay used in the test_long_timer_delays test. */ - uint64_t long_wait_ms; - /* Number of iterations. */ - int iterations; - /* Whether to test the physical timer. */ - bool test_physical; - /* Whether to test the virtual timer. */ - bool test_virtual; -}; - -struct test_args test_args = { - .wait_ms = WAIT_TEST_MS, - .long_wait_ms = LONG_WAIT_TEST_MS, - .iterations = NR_TEST_ITERS_DEF, - .test_physical = true, - .test_virtual = true, -}; - -static int vtimer_irq, ptimer_irq; - -enum sync_cmd { - SET_COUNTER_VALUE, - USERSPACE_USLEEP, - USERSPACE_SCHED_YIELD, - USERSPACE_MIGRATE_SELF, - NO_USERSPACE_CMD, -}; - -typedef void (*sleep_method_t)(enum arch_timer timer, uint64_t usec); - -static void sleep_poll(enum arch_timer timer, uint64_t usec); -static void sleep_sched_poll(enum arch_timer timer, uint64_t usec); -static void sleep_in_userspace(enum arch_timer timer, uint64_t usec); -static void sleep_migrate(enum arch_timer timer, uint64_t usec); - -sleep_method_t sleep_method[] = { - sleep_poll, - sleep_sched_poll, - sleep_migrate, - sleep_in_userspace, -}; - -typedef void (*irq_wait_method_t)(void); - -static void wait_for_non_spurious_irq(void); -static void wait_poll_for_irq(void); -static void wait_sched_poll_for_irq(void); -static void wait_migrate_poll_for_irq(void); - -irq_wait_method_t irq_wait_method[] = { - wait_for_non_spurious_irq, - wait_poll_for_irq, - wait_sched_poll_for_irq, - wait_migrate_poll_for_irq, -}; - -enum timer_view { - TIMER_CVAL, - TIMER_TVAL, -}; - -static void assert_irqs_handled(uint32_t n) -{ - int h = atomic_read(&shared_data.handled); - - __GUEST_ASSERT(h == n, "Handled %d IRQS but expected %d", h, n); -} - -static void userspace_cmd(uint64_t cmd) -{ - GUEST_SYNC_ARGS(cmd, 0, 0, 0, 0); -} - -static void userspace_migrate_vcpu(void) -{ - userspace_cmd(USERSPACE_MIGRATE_SELF); -} - -static void userspace_sleep(uint64_t usecs) -{ - GUEST_SYNC_ARGS(USERSPACE_USLEEP, usecs, 0, 0, 0); -} - -static void set_counter(enum arch_timer timer, uint64_t counter) -{ - GUEST_SYNC_ARGS(SET_COUNTER_VALUE, counter, timer, 0, 0); -} - -static void guest_irq_handler(struct ex_regs *regs) -{ - unsigned int intid = gic_get_and_ack_irq(); - enum arch_timer timer; - uint64_t cnt, cval; - uint32_t ctl; - bool timer_condition, istatus; - - if (intid == IAR_SPURIOUS) { - atomic_inc(&shared_data.spurious); - goto out; - } - - if (intid == ptimer_irq) - timer = PHYSICAL; - else if (intid == vtimer_irq) - timer = VIRTUAL; - else - goto out; - - ctl = timer_get_ctl(timer); - cval = timer_get_cval(timer); - cnt = timer_get_cntct(timer); - timer_condition = cnt >= cval; - istatus = (ctl & CTL_ISTATUS) && (ctl & CTL_ENABLE); - GUEST_ASSERT_EQ(timer_condition, istatus); - - /* Disable and mask the timer. */ - timer_set_ctl(timer, CTL_IMASK); - - atomic_inc(&shared_data.handled); - -out: - gic_set_eoi(intid); -} - -static void set_cval_irq(enum arch_timer timer, uint64_t cval_cycles, - uint32_t ctl) -{ - atomic_set(&shared_data.handled, 0); - atomic_set(&shared_data.spurious, 0); - timer_set_cval(timer, cval_cycles); - timer_set_ctl(timer, ctl); -} - -static void set_tval_irq(enum arch_timer timer, uint64_t tval_cycles, - uint32_t ctl) -{ - atomic_set(&shared_data.handled, 0); - atomic_set(&shared_data.spurious, 0); - timer_set_ctl(timer, ctl); - timer_set_tval(timer, tval_cycles); -} - -static void set_xval_irq(enum arch_timer timer, uint64_t xval, uint32_t ctl, - enum timer_view tv) -{ - switch (tv) { - case TIMER_CVAL: - set_cval_irq(timer, xval, ctl); - break; - case TIMER_TVAL: - set_tval_irq(timer, xval, ctl); - break; - default: - GUEST_FAIL("Could not get timer %d", timer); - } -} - -/* - * Note that this can theoretically hang forever, so we rely on having - * a timeout mechanism in the "runner", like: - * tools/testing/selftests/kselftest/runner.sh. - */ -static void wait_for_non_spurious_irq(void) -{ - int h; - - local_irq_disable(); - - for (h = atomic_read(&shared_data.handled); h == atomic_read(&shared_data.handled);) { - wfi(); - local_irq_enable(); - isb(); /* handle IRQ */ - local_irq_disable(); - } -} - -/* - * Wait for an non-spurious IRQ by polling in the guest or in - * userspace (e.g. userspace_cmd=USERSPACE_SCHED_YIELD). - * - * Note that this can theoretically hang forever, so we rely on having - * a timeout mechanism in the "runner", like: - * tools/testing/selftests/kselftest/runner.sh. - */ -static void poll_for_non_spurious_irq(enum sync_cmd usp_cmd) -{ - int h; - - local_irq_disable(); - - h = atomic_read(&shared_data.handled); - - local_irq_enable(); - while (h == atomic_read(&shared_data.handled)) { - if (usp_cmd == NO_USERSPACE_CMD) - cpu_relax(); - else - userspace_cmd(usp_cmd); - } - local_irq_disable(); -} - -static void wait_poll_for_irq(void) -{ - poll_for_non_spurious_irq(NO_USERSPACE_CMD); -} - -static void wait_sched_poll_for_irq(void) -{ - poll_for_non_spurious_irq(USERSPACE_SCHED_YIELD); -} - -static void wait_migrate_poll_for_irq(void) -{ - poll_for_non_spurious_irq(USERSPACE_MIGRATE_SELF); -} - -/* - * Sleep for usec microseconds by polling in the guest or in - * userspace (e.g. userspace_cmd=USERSPACE_SCHEDULE). - */ -static void guest_poll(enum arch_timer test_timer, uint64_t usec, - enum sync_cmd usp_cmd) -{ - uint64_t cycles = usec_to_cycles(usec); - /* Whichever timer we are testing with, sleep with the other. */ - enum arch_timer sleep_timer = 1 - test_timer; - uint64_t start = timer_get_cntct(sleep_timer); - - while ((timer_get_cntct(sleep_timer) - start) < cycles) { - if (usp_cmd == NO_USERSPACE_CMD) - cpu_relax(); - else - userspace_cmd(usp_cmd); - } -} - -static void sleep_poll(enum arch_timer timer, uint64_t usec) -{ - guest_poll(timer, usec, NO_USERSPACE_CMD); -} - -static void sleep_sched_poll(enum arch_timer timer, uint64_t usec) -{ - guest_poll(timer, usec, USERSPACE_SCHED_YIELD); -} - -static void sleep_migrate(enum arch_timer timer, uint64_t usec) -{ - guest_poll(timer, usec, USERSPACE_MIGRATE_SELF); -} - -static void sleep_in_userspace(enum arch_timer timer, uint64_t usec) -{ - userspace_sleep(usec); -} - -/* - * Reset the timer state to some nice values like the counter not being close - * to the edge, and the control register masked and disabled. - */ -static void reset_timer_state(enum arch_timer timer, uint64_t cnt) -{ - set_counter(timer, cnt); - timer_set_ctl(timer, CTL_IMASK); -} - -static void test_timer_xval(enum arch_timer timer, uint64_t xval, - enum timer_view tv, irq_wait_method_t wm, bool reset_state, - uint64_t reset_cnt) -{ - local_irq_disable(); - - if (reset_state) - reset_timer_state(timer, reset_cnt); - - set_xval_irq(timer, xval, CTL_ENABLE, tv); - - /* This method re-enables IRQs to handle the one we're looking for. */ - wm(); - - assert_irqs_handled(1); - local_irq_enable(); -} - -/* - * The test_timer_* functions will program the timer, wait for it, and assert - * the firing of the correct IRQ. - * - * These functions don't have a timeout and return as soon as they receive an - * IRQ. They can hang (forever), so we rely on having a timeout mechanism in - * the "runner", like: tools/testing/selftests/kselftest/runner.sh. - */ - -static void test_timer_cval(enum arch_timer timer, uint64_t cval, - irq_wait_method_t wm, bool reset_state, - uint64_t reset_cnt) -{ - test_timer_xval(timer, cval, TIMER_CVAL, wm, reset_state, reset_cnt); -} - -static void test_timer_tval(enum arch_timer timer, int32_t tval, - irq_wait_method_t wm, bool reset_state, - uint64_t reset_cnt) -{ - test_timer_xval(timer, (uint64_t) tval, TIMER_TVAL, wm, reset_state, - reset_cnt); -} - -static void test_xval_check_no_irq(enum arch_timer timer, uint64_t xval, - uint64_t usec, enum timer_view timer_view, - sleep_method_t guest_sleep) -{ - local_irq_disable(); - - set_xval_irq(timer, xval, CTL_ENABLE | CTL_IMASK, timer_view); - guest_sleep(timer, usec); - - local_irq_enable(); - isb(); - - /* Assume success (no IRQ) after waiting usec microseconds */ - assert_irqs_handled(0); -} - -static void test_cval_no_irq(enum arch_timer timer, uint64_t cval, - uint64_t usec, sleep_method_t wm) -{ - test_xval_check_no_irq(timer, cval, usec, TIMER_CVAL, wm); -} - -static void test_tval_no_irq(enum arch_timer timer, int32_t tval, uint64_t usec, - sleep_method_t wm) -{ - /* tval will be cast to an int32_t in test_xval_check_no_irq */ - test_xval_check_no_irq(timer, (uint64_t) tval, usec, TIMER_TVAL, wm); -} - -/* Test masking/unmasking a timer using the timer mask (not the IRQ mask). */ -static void test_timer_control_mask_then_unmask(enum arch_timer timer) -{ - reset_timer_state(timer, DEF_CNT); - set_tval_irq(timer, -1, CTL_ENABLE | CTL_IMASK); - - /* Unmask the timer, and then get an IRQ. */ - local_irq_disable(); - timer_set_ctl(timer, CTL_ENABLE); - /* This method re-enables IRQs to handle the one we're looking for. */ - wait_for_non_spurious_irq(); - - assert_irqs_handled(1); - local_irq_enable(); -} - -/* Check that timer control masks actually mask a timer being fired. */ -static void test_timer_control_masks(enum arch_timer timer) -{ - reset_timer_state(timer, DEF_CNT); - - /* Local IRQs are not masked at this point. */ - - set_tval_irq(timer, -1, CTL_ENABLE | CTL_IMASK); - - /* Assume no IRQ after waiting TIMEOUT_NO_IRQ_US microseconds */ - sleep_poll(timer, TIMEOUT_NO_IRQ_US); - - assert_irqs_handled(0); - timer_set_ctl(timer, CTL_IMASK); -} - -static void test_fire_a_timer_multiple_times(enum arch_timer timer, - irq_wait_method_t wm, int num) -{ - int i; - - local_irq_disable(); - reset_timer_state(timer, DEF_CNT); - - set_tval_irq(timer, 0, CTL_ENABLE); - - for (i = 1; i <= num; i++) { - /* This method re-enables IRQs to handle the one we're looking for. */ - wm(); - - /* The IRQ handler masked and disabled the timer. - * Enable and unmmask it again. - */ - timer_set_ctl(timer, CTL_ENABLE); - - assert_irqs_handled(i); - } - - local_irq_enable(); -} - -static void test_timers_fired_multiple_times(enum arch_timer timer) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) - test_fire_a_timer_multiple_times(timer, irq_wait_method[i], 10); -} - -/* - * Set a timer for tval=delta_1_ms then reprogram it to - * tval=delta_2_ms. Check that we get the timer fired. There is no - * timeout for the wait: we use the wfi instruction. - */ -static void test_reprogramming_timer(enum arch_timer timer, irq_wait_method_t wm, - int32_t delta_1_ms, int32_t delta_2_ms) -{ - local_irq_disable(); - reset_timer_state(timer, DEF_CNT); - - /* Program the timer to DEF_CNT + delta_1_ms. */ - set_tval_irq(timer, msec_to_cycles(delta_1_ms), CTL_ENABLE); - - /* Reprogram the timer to DEF_CNT + delta_2_ms. */ - timer_set_tval(timer, msec_to_cycles(delta_2_ms)); - - /* This method re-enables IRQs to handle the one we're looking for. */ - wm(); - - /* The IRQ should arrive at DEF_CNT + delta_2_ms (or after). */ - GUEST_ASSERT(timer_get_cntct(timer) >= - DEF_CNT + msec_to_cycles(delta_2_ms)); - - local_irq_enable(); - assert_irqs_handled(1); -}; - -static void test_reprogram_timers(enum arch_timer timer) -{ - int i; - uint64_t base_wait = test_args.wait_ms; - - for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) { - /* - * Ensure reprogramming works whether going from a - * longer time to a shorter or vice versa. - */ - test_reprogramming_timer(timer, irq_wait_method[i], 2 * base_wait, - base_wait); - test_reprogramming_timer(timer, irq_wait_method[i], base_wait, - 2 * base_wait); - } -} - -static void test_basic_functionality(enum arch_timer timer) -{ - int32_t tval = (int32_t) msec_to_cycles(test_args.wait_ms); - uint64_t cval = DEF_CNT + msec_to_cycles(test_args.wait_ms); - int i; - - for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) { - irq_wait_method_t wm = irq_wait_method[i]; - - test_timer_cval(timer, cval, wm, true, DEF_CNT); - test_timer_tval(timer, tval, wm, true, DEF_CNT); - } -} - -/* - * This test checks basic timer behavior without actually firing timers, things - * like: the relationship between cval and tval, tval down-counting. - */ -static void timers_sanity_checks(enum arch_timer timer, bool use_sched) -{ - reset_timer_state(timer, DEF_CNT); - - local_irq_disable(); - - /* cval in the past */ - timer_set_cval(timer, - timer_get_cntct(timer) - - msec_to_cycles(test_args.wait_ms)); - if (use_sched) - userspace_migrate_vcpu(); - GUEST_ASSERT(timer_get_tval(timer) < 0); - - /* tval in the past */ - timer_set_tval(timer, -1); - if (use_sched) - userspace_migrate_vcpu(); - GUEST_ASSERT(timer_get_cval(timer) < timer_get_cntct(timer)); - - /* tval larger than TVAL_MAX. This requires programming with - * timer_set_cval instead so the value is expressible - */ - timer_set_cval(timer, - timer_get_cntct(timer) + TVAL_MAX + - msec_to_cycles(test_args.wait_ms)); - if (use_sched) - userspace_migrate_vcpu(); - GUEST_ASSERT(timer_get_tval(timer) <= 0); - - /* - * tval larger than 2 * TVAL_MAX. - * Twice the TVAL_MAX completely loops around the TVAL. - */ - timer_set_cval(timer, - timer_get_cntct(timer) + 2ULL * TVAL_MAX + - msec_to_cycles(test_args.wait_ms)); - if (use_sched) - userspace_migrate_vcpu(); - GUEST_ASSERT(timer_get_tval(timer) <= - msec_to_cycles(test_args.wait_ms)); - - /* negative tval that rollovers from 0. */ - set_counter(timer, msec_to_cycles(1)); - timer_set_tval(timer, -1 * msec_to_cycles(test_args.wait_ms)); - if (use_sched) - userspace_migrate_vcpu(); - GUEST_ASSERT(timer_get_cval(timer) >= (CVAL_MAX - msec_to_cycles(test_args.wait_ms))); - - /* tval should keep down-counting from 0 to -1. */ - timer_set_tval(timer, 0); - sleep_poll(timer, 1); - GUEST_ASSERT(timer_get_tval(timer) < 0); - - local_irq_enable(); - - /* Mask and disable any pending timer. */ - timer_set_ctl(timer, CTL_IMASK); -} - -static void test_timers_sanity_checks(enum arch_timer timer) -{ - timers_sanity_checks(timer, false); - /* Check how KVM saves/restores these edge-case values. */ - timers_sanity_checks(timer, true); -} - -static void test_set_cnt_after_tval_max(enum arch_timer timer, irq_wait_method_t wm) -{ - local_irq_disable(); - reset_timer_state(timer, DEF_CNT); - - set_cval_irq(timer, - (uint64_t) TVAL_MAX + - msec_to_cycles(test_args.wait_ms) / 2, CTL_ENABLE); - - set_counter(timer, TVAL_MAX); - - /* This method re-enables IRQs to handle the one we're looking for. */ - wm(); - - assert_irqs_handled(1); - local_irq_enable(); -} - -/* Test timers set for: cval = now + TVAL_MAX + wait_ms / 2 */ -static void test_timers_above_tval_max(enum arch_timer timer) -{ - uint64_t cval; - int i; - - /* - * Test that the system is not implementing cval in terms of - * tval. If that was the case, setting a cval to "cval = now - * + TVAL_MAX + wait_ms" would wrap to "cval = now + - * wait_ms", and the timer would fire immediately. Test that it - * doesn't. - */ - for (i = 0; i < ARRAY_SIZE(sleep_method); i++) { - reset_timer_state(timer, DEF_CNT); - cval = timer_get_cntct(timer) + TVAL_MAX + - msec_to_cycles(test_args.wait_ms); - test_cval_no_irq(timer, cval, - msecs_to_usecs(test_args.wait_ms) + - TIMEOUT_NO_IRQ_US, sleep_method[i]); - } - - for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) { - /* Get the IRQ by moving the counter forward. */ - test_set_cnt_after_tval_max(timer, irq_wait_method[i]); - } -} - -/* - * Template function to be used by the test_move_counter_ahead_* tests. It - * sets the counter to cnt_1, the [c|t]val, the counter to cnt_2, and - * then waits for an IRQ. - */ -static void test_set_cnt_after_xval(enum arch_timer timer, uint64_t cnt_1, - uint64_t xval, uint64_t cnt_2, - irq_wait_method_t wm, enum timer_view tv) -{ - local_irq_disable(); - - set_counter(timer, cnt_1); - timer_set_ctl(timer, CTL_IMASK); - - set_xval_irq(timer, xval, CTL_ENABLE, tv); - set_counter(timer, cnt_2); - /* This method re-enables IRQs to handle the one we're looking for. */ - wm(); - - assert_irqs_handled(1); - local_irq_enable(); -} - -/* - * Template function to be used by the test_move_counter_ahead_* tests. It - * sets the counter to cnt_1, the [c|t]val, the counter to cnt_2, and - * then waits for an IRQ. - */ -static void test_set_cnt_after_xval_no_irq(enum arch_timer timer, - uint64_t cnt_1, uint64_t xval, - uint64_t cnt_2, - sleep_method_t guest_sleep, - enum timer_view tv) -{ - local_irq_disable(); - - set_counter(timer, cnt_1); - timer_set_ctl(timer, CTL_IMASK); - - set_xval_irq(timer, xval, CTL_ENABLE, tv); - set_counter(timer, cnt_2); - guest_sleep(timer, TIMEOUT_NO_IRQ_US); - - local_irq_enable(); - isb(); - - /* Assume no IRQ after waiting TIMEOUT_NO_IRQ_US microseconds */ - assert_irqs_handled(0); - timer_set_ctl(timer, CTL_IMASK); -} - -static void test_set_cnt_after_tval(enum arch_timer timer, uint64_t cnt_1, - int32_t tval, uint64_t cnt_2, - irq_wait_method_t wm) -{ - test_set_cnt_after_xval(timer, cnt_1, tval, cnt_2, wm, TIMER_TVAL); -} - -static void test_set_cnt_after_cval(enum arch_timer timer, uint64_t cnt_1, - uint64_t cval, uint64_t cnt_2, - irq_wait_method_t wm) -{ - test_set_cnt_after_xval(timer, cnt_1, cval, cnt_2, wm, TIMER_CVAL); -} - -static void test_set_cnt_after_tval_no_irq(enum arch_timer timer, - uint64_t cnt_1, int32_t tval, - uint64_t cnt_2, sleep_method_t wm) -{ - test_set_cnt_after_xval_no_irq(timer, cnt_1, tval, cnt_2, wm, - TIMER_TVAL); -} - -static void test_set_cnt_after_cval_no_irq(enum arch_timer timer, - uint64_t cnt_1, uint64_t cval, - uint64_t cnt_2, sleep_method_t wm) -{ - test_set_cnt_after_xval_no_irq(timer, cnt_1, cval, cnt_2, wm, - TIMER_CVAL); -} - -/* Set a timer and then move the counter ahead of it. */ -static void test_move_counters_ahead_of_timers(enum arch_timer timer) -{ - int i; - int32_t tval; - - for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) { - irq_wait_method_t wm = irq_wait_method[i]; - - test_set_cnt_after_cval(timer, 0, DEF_CNT, DEF_CNT + 1, wm); - test_set_cnt_after_cval(timer, CVAL_MAX, 1, 2, wm); - - /* Move counter ahead of negative tval. */ - test_set_cnt_after_tval(timer, 0, -1, DEF_CNT + 1, wm); - test_set_cnt_after_tval(timer, 0, -1, TVAL_MAX, wm); - tval = TVAL_MAX; - test_set_cnt_after_tval(timer, 0, tval, (uint64_t) tval + 1, - wm); - } - - for (i = 0; i < ARRAY_SIZE(sleep_method); i++) { - sleep_method_t sm = sleep_method[i]; - - test_set_cnt_after_cval_no_irq(timer, 0, DEF_CNT, CVAL_MAX, sm); - } -} - -/* - * Program a timer, mask it, and then change the tval or counter to cancel it. - * Unmask it and check that nothing fires. - */ -static void test_move_counters_behind_timers(enum arch_timer timer) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(sleep_method); i++) { - sleep_method_t sm = sleep_method[i]; - - test_set_cnt_after_cval_no_irq(timer, DEF_CNT, DEF_CNT - 1, 0, - sm); - test_set_cnt_after_tval_no_irq(timer, DEF_CNT, -1, 0, sm); - } -} - -static void test_timers_in_the_past(enum arch_timer timer) -{ - int32_t tval = -1 * (int32_t) msec_to_cycles(test_args.wait_ms); - uint64_t cval; - int i; - - for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) { - irq_wait_method_t wm = irq_wait_method[i]; - - /* set a timer wait_ms the past. */ - cval = DEF_CNT - msec_to_cycles(test_args.wait_ms); - test_timer_cval(timer, cval, wm, true, DEF_CNT); - test_timer_tval(timer, tval, wm, true, DEF_CNT); - - /* Set a timer to counter=0 (in the past) */ - test_timer_cval(timer, 0, wm, true, DEF_CNT); - - /* Set a time for tval=0 (now) */ - test_timer_tval(timer, 0, wm, true, DEF_CNT); - - /* Set a timer to as far in the past as possible */ - test_timer_tval(timer, TVAL_MIN, wm, true, DEF_CNT); - } - - /* - * Set the counter to wait_ms, and a tval to -wait_ms. There should be no - * IRQ as that tval means cval=CVAL_MAX-wait_ms. - */ - for (i = 0; i < ARRAY_SIZE(sleep_method); i++) { - sleep_method_t sm = sleep_method[i]; - - set_counter(timer, msec_to_cycles(test_args.wait_ms)); - test_tval_no_irq(timer, tval, TIMEOUT_NO_IRQ_US, sm); - } -} - -static void test_long_timer_delays(enum arch_timer timer) -{ - int32_t tval = (int32_t) msec_to_cycles(test_args.long_wait_ms); - uint64_t cval = DEF_CNT + msec_to_cycles(test_args.long_wait_ms); - int i; - - for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) { - irq_wait_method_t wm = irq_wait_method[i]; - - test_timer_cval(timer, cval, wm, true, DEF_CNT); - test_timer_tval(timer, tval, wm, true, DEF_CNT); - } -} - -static void guest_run_iteration(enum arch_timer timer) -{ - test_basic_functionality(timer); - test_timers_sanity_checks(timer); - - test_timers_above_tval_max(timer); - test_timers_in_the_past(timer); - - test_move_counters_ahead_of_timers(timer); - test_move_counters_behind_timers(timer); - test_reprogram_timers(timer); - - test_timers_fired_multiple_times(timer); - - test_timer_control_mask_then_unmask(timer); - test_timer_control_masks(timer); -} - -static void guest_code(enum arch_timer timer) -{ - int i; - - local_irq_disable(); - - gic_init(GIC_V3, 1); - - timer_set_ctl(VIRTUAL, CTL_IMASK); - timer_set_ctl(PHYSICAL, CTL_IMASK); - - gic_irq_enable(vtimer_irq); - gic_irq_enable(ptimer_irq); - local_irq_enable(); - - for (i = 0; i < test_args.iterations; i++) { - GUEST_SYNC(i); - guest_run_iteration(timer); - } - - test_long_timer_delays(timer); - GUEST_DONE(); -} - -static uint32_t next_pcpu(void) -{ - uint32_t max = get_nprocs(); - uint32_t cur = sched_getcpu(); - uint32_t next = cur; - cpu_set_t cpuset; - - TEST_ASSERT(max > 1, "Need at least two physical cpus"); - - sched_getaffinity(0, sizeof(cpuset), &cpuset); - - do { - next = (next + 1) % CPU_SETSIZE; - } while (!CPU_ISSET(next, &cpuset)); - - return next; -} - -static void migrate_self(uint32_t new_pcpu) -{ - int ret; - cpu_set_t cpuset; - pthread_t thread; - - thread = pthread_self(); - - CPU_ZERO(&cpuset); - CPU_SET(new_pcpu, &cpuset); - - pr_debug("Migrating from %u to %u\n", sched_getcpu(), new_pcpu); - - ret = pthread_setaffinity_np(thread, sizeof(cpuset), &cpuset); - - TEST_ASSERT(ret == 0, "Failed to migrate to pCPU: %u; ret: %d\n", - new_pcpu, ret); -} - -static void kvm_set_cntxct(struct kvm_vcpu *vcpu, uint64_t cnt, - enum arch_timer timer) -{ - if (timer == PHYSICAL) - vcpu_set_reg(vcpu, KVM_REG_ARM_PTIMER_CNT, cnt); - else - vcpu_set_reg(vcpu, KVM_REG_ARM_TIMER_CNT, cnt); -} - -static void handle_sync(struct kvm_vcpu *vcpu, struct ucall *uc) -{ - enum sync_cmd cmd = uc->args[1]; - uint64_t val = uc->args[2]; - enum arch_timer timer = uc->args[3]; - - switch (cmd) { - case SET_COUNTER_VALUE: - kvm_set_cntxct(vcpu, val, timer); - break; - case USERSPACE_USLEEP: - usleep(val); - break; - case USERSPACE_SCHED_YIELD: - sched_yield(); - break; - case USERSPACE_MIGRATE_SELF: - migrate_self(next_pcpu()); - break; - default: - break; - } -} - -static void test_run(struct kvm_vm *vm, struct kvm_vcpu *vcpu) -{ - struct ucall uc; - - /* Start on CPU 0 */ - migrate_self(0); - - while (true) { - vcpu_run(vcpu); - switch (get_ucall(vcpu, &uc)) { - case UCALL_SYNC: - handle_sync(vcpu, &uc); - break; - case UCALL_DONE: - goto out; - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - goto out; - default: - TEST_FAIL("Unexpected guest exit\n"); - } - } - - out: - return; -} - -static void test_init_timer_irq(struct kvm_vm *vm, struct kvm_vcpu *vcpu) -{ - vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL, - KVM_ARM_VCPU_TIMER_IRQ_PTIMER, &ptimer_irq); - vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL, - KVM_ARM_VCPU_TIMER_IRQ_VTIMER, &vtimer_irq); - - sync_global_to_guest(vm, ptimer_irq); - sync_global_to_guest(vm, vtimer_irq); - - pr_debug("ptimer_irq: %d; vtimer_irq: %d\n", ptimer_irq, vtimer_irq); -} - -static void test_vm_create(struct kvm_vm **vm, struct kvm_vcpu **vcpu, - enum arch_timer timer) -{ - *vm = vm_create_with_one_vcpu(vcpu, guest_code); - TEST_ASSERT(*vm, "Failed to create the test VM\n"); - - vm_init_descriptor_tables(*vm); - vm_install_exception_handler(*vm, VECTOR_IRQ_CURRENT, - guest_irq_handler); - - vcpu_init_descriptor_tables(*vcpu); - vcpu_args_set(*vcpu, 1, timer); - - test_init_timer_irq(*vm, *vcpu); - vgic_v3_setup(*vm, 1, 64); - sync_global_to_guest(*vm, test_args); -} - -static void test_print_help(char *name) -{ - pr_info("Usage: %s [-h] [-b] [-i iterations] [-l long_wait_ms] [-p] [-v]\n" - , name); - pr_info("\t-i: Number of iterations (default: %u)\n", - NR_TEST_ITERS_DEF); - pr_info("\t-b: Test both physical and virtual timers (default: true)\n"); - pr_info("\t-l: Delta (in ms) used for long wait time test (default: %u)\n", - LONG_WAIT_TEST_MS); - pr_info("\t-l: Delta (in ms) used for wait times (default: %u)\n", - WAIT_TEST_MS); - pr_info("\t-p: Test physical timer (default: true)\n"); - pr_info("\t-v: Test virtual timer (default: true)\n"); - pr_info("\t-h: Print this help message\n"); -} - -static bool parse_args(int argc, char *argv[]) -{ - int opt; - - while ((opt = getopt(argc, argv, "bhi:l:pvw:")) != -1) { - switch (opt) { - case 'b': - test_args.test_physical = true; - test_args.test_virtual = true; - break; - case 'i': - test_args.iterations = - atoi_positive("Number of iterations", optarg); - break; - case 'l': - test_args.long_wait_ms = - atoi_positive("Long wait time", optarg); - break; - case 'p': - test_args.test_physical = true; - test_args.test_virtual = false; - break; - case 'v': - test_args.test_virtual = true; - test_args.test_physical = false; - break; - case 'w': - test_args.wait_ms = atoi_positive("Wait time", optarg); - break; - case 'h': - default: - goto err; - } - } - - return true; - - err: - test_print_help(argv[0]); - return false; -} - -int main(int argc, char *argv[]) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - - if (!parse_args(argc, argv)) - exit(KSFT_SKIP); - - if (test_args.test_virtual) { - test_vm_create(&vm, &vcpu, VIRTUAL); - test_run(vm, vcpu); - kvm_vm_free(vm); - } - - if (test_args.test_physical) { - test_vm_create(&vm, &vcpu, PHYSICAL); - test_run(vm, vcpu); - kvm_vm_free(vm); - } - - return 0; -} diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c deleted file mode 100644 index c7fb55c9135b..000000000000 --- a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c +++ /dev/null @@ -1,607 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include -#include -#include - -#define MDSCR_KDE (1 << 13) -#define MDSCR_MDE (1 << 15) -#define MDSCR_SS (1 << 0) - -#define DBGBCR_LEN8 (0xff << 5) -#define DBGBCR_EXEC (0x0 << 3) -#define DBGBCR_EL1 (0x1 << 1) -#define DBGBCR_E (0x1 << 0) -#define DBGBCR_LBN_SHIFT 16 -#define DBGBCR_BT_SHIFT 20 -#define DBGBCR_BT_ADDR_LINK_CTX (0x1 << DBGBCR_BT_SHIFT) -#define DBGBCR_BT_CTX_LINK (0x3 << DBGBCR_BT_SHIFT) - -#define DBGWCR_LEN8 (0xff << 5) -#define DBGWCR_RD (0x1 << 3) -#define DBGWCR_WR (0x2 << 3) -#define DBGWCR_EL1 (0x1 << 1) -#define DBGWCR_E (0x1 << 0) -#define DBGWCR_LBN_SHIFT 16 -#define DBGWCR_WT_SHIFT 20 -#define DBGWCR_WT_LINK (0x1 << DBGWCR_WT_SHIFT) - -#define SPSR_D (1 << 9) -#define SPSR_SS (1 << 21) - -extern unsigned char sw_bp, sw_bp2, hw_bp, hw_bp2, bp_svc, bp_brk, hw_wp, ss_start, hw_bp_ctx; -extern unsigned char iter_ss_begin, iter_ss_end; -static volatile uint64_t sw_bp_addr, hw_bp_addr; -static volatile uint64_t wp_addr, wp_data_addr; -static volatile uint64_t svc_addr; -static volatile uint64_t ss_addr[4], ss_idx; -#define PC(v) ((uint64_t)&(v)) - -#define GEN_DEBUG_WRITE_REG(reg_name) \ -static void write_##reg_name(int num, uint64_t val) \ -{ \ - switch (num) { \ - case 0: \ - write_sysreg(val, reg_name##0_el1); \ - break; \ - case 1: \ - write_sysreg(val, reg_name##1_el1); \ - break; \ - case 2: \ - write_sysreg(val, reg_name##2_el1); \ - break; \ - case 3: \ - write_sysreg(val, reg_name##3_el1); \ - break; \ - case 4: \ - write_sysreg(val, reg_name##4_el1); \ - break; \ - case 5: \ - write_sysreg(val, reg_name##5_el1); \ - break; \ - case 6: \ - write_sysreg(val, reg_name##6_el1); \ - break; \ - case 7: \ - write_sysreg(val, reg_name##7_el1); \ - break; \ - case 8: \ - write_sysreg(val, reg_name##8_el1); \ - break; \ - case 9: \ - write_sysreg(val, reg_name##9_el1); \ - break; \ - case 10: \ - write_sysreg(val, reg_name##10_el1); \ - break; \ - case 11: \ - write_sysreg(val, reg_name##11_el1); \ - break; \ - case 12: \ - write_sysreg(val, reg_name##12_el1); \ - break; \ - case 13: \ - write_sysreg(val, reg_name##13_el1); \ - break; \ - case 14: \ - write_sysreg(val, reg_name##14_el1); \ - break; \ - case 15: \ - write_sysreg(val, reg_name##15_el1); \ - break; \ - default: \ - GUEST_ASSERT(0); \ - } \ -} - -/* Define write_dbgbcr()/write_dbgbvr()/write_dbgwcr()/write_dbgwvr() */ -GEN_DEBUG_WRITE_REG(dbgbcr) -GEN_DEBUG_WRITE_REG(dbgbvr) -GEN_DEBUG_WRITE_REG(dbgwcr) -GEN_DEBUG_WRITE_REG(dbgwvr) - -static void reset_debug_state(void) -{ - uint8_t brps, wrps, i; - uint64_t dfr0; - - asm volatile("msr daifset, #8"); - - write_sysreg(0, osdlr_el1); - write_sysreg(0, oslar_el1); - isb(); - - write_sysreg(0, mdscr_el1); - write_sysreg(0, contextidr_el1); - - /* Reset all bcr/bvr/wcr/wvr registers */ - dfr0 = read_sysreg(id_aa64dfr0_el1); - brps = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_BRPs), dfr0); - for (i = 0; i <= brps; i++) { - write_dbgbcr(i, 0); - write_dbgbvr(i, 0); - } - wrps = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_WRPs), dfr0); - for (i = 0; i <= wrps; i++) { - write_dbgwcr(i, 0); - write_dbgwvr(i, 0); - } - - isb(); -} - -static void enable_os_lock(void) -{ - write_sysreg(1, oslar_el1); - isb(); - - GUEST_ASSERT(read_sysreg(oslsr_el1) & 2); -} - -static void enable_monitor_debug_exceptions(void) -{ - uint32_t mdscr; - - asm volatile("msr daifclr, #8"); - - mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_MDE; - write_sysreg(mdscr, mdscr_el1); - isb(); -} - -static void install_wp(uint8_t wpn, uint64_t addr) -{ - uint32_t wcr; - - wcr = DBGWCR_LEN8 | DBGWCR_RD | DBGWCR_WR | DBGWCR_EL1 | DBGWCR_E; - write_dbgwcr(wpn, wcr); - write_dbgwvr(wpn, addr); - - isb(); - - enable_monitor_debug_exceptions(); -} - -static void install_hw_bp(uint8_t bpn, uint64_t addr) -{ - uint32_t bcr; - - bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E; - write_dbgbcr(bpn, bcr); - write_dbgbvr(bpn, addr); - isb(); - - enable_monitor_debug_exceptions(); -} - -static void install_wp_ctx(uint8_t addr_wp, uint8_t ctx_bp, uint64_t addr, - uint64_t ctx) -{ - uint32_t wcr; - uint64_t ctx_bcr; - - /* Setup a context-aware breakpoint for Linked Context ID Match */ - ctx_bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E | - DBGBCR_BT_CTX_LINK; - write_dbgbcr(ctx_bp, ctx_bcr); - write_dbgbvr(ctx_bp, ctx); - - /* Setup a linked watchpoint (linked to the context-aware breakpoint) */ - wcr = DBGWCR_LEN8 | DBGWCR_RD | DBGWCR_WR | DBGWCR_EL1 | DBGWCR_E | - DBGWCR_WT_LINK | ((uint32_t)ctx_bp << DBGWCR_LBN_SHIFT); - write_dbgwcr(addr_wp, wcr); - write_dbgwvr(addr_wp, addr); - isb(); - - enable_monitor_debug_exceptions(); -} - -void install_hw_bp_ctx(uint8_t addr_bp, uint8_t ctx_bp, uint64_t addr, - uint64_t ctx) -{ - uint32_t addr_bcr, ctx_bcr; - - /* Setup a context-aware breakpoint for Linked Context ID Match */ - ctx_bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E | - DBGBCR_BT_CTX_LINK; - write_dbgbcr(ctx_bp, ctx_bcr); - write_dbgbvr(ctx_bp, ctx); - - /* - * Setup a normal breakpoint for Linked Address Match, and link it - * to the context-aware breakpoint. - */ - addr_bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E | - DBGBCR_BT_ADDR_LINK_CTX | - ((uint32_t)ctx_bp << DBGBCR_LBN_SHIFT); - write_dbgbcr(addr_bp, addr_bcr); - write_dbgbvr(addr_bp, addr); - isb(); - - enable_monitor_debug_exceptions(); -} - -static void install_ss(void) -{ - uint32_t mdscr; - - asm volatile("msr daifclr, #8"); - - mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_SS; - write_sysreg(mdscr, mdscr_el1); - isb(); -} - -static volatile char write_data; - -static void guest_code(uint8_t bpn, uint8_t wpn, uint8_t ctx_bpn) -{ - uint64_t ctx = 0xabcdef; /* a random context number */ - - /* Software-breakpoint */ - reset_debug_state(); - asm volatile("sw_bp: brk #0"); - GUEST_ASSERT_EQ(sw_bp_addr, PC(sw_bp)); - - /* Hardware-breakpoint */ - reset_debug_state(); - install_hw_bp(bpn, PC(hw_bp)); - asm volatile("hw_bp: nop"); - GUEST_ASSERT_EQ(hw_bp_addr, PC(hw_bp)); - - /* Hardware-breakpoint + svc */ - reset_debug_state(); - install_hw_bp(bpn, PC(bp_svc)); - asm volatile("bp_svc: svc #0"); - GUEST_ASSERT_EQ(hw_bp_addr, PC(bp_svc)); - GUEST_ASSERT_EQ(svc_addr, PC(bp_svc) + 4); - - /* Hardware-breakpoint + software-breakpoint */ - reset_debug_state(); - install_hw_bp(bpn, PC(bp_brk)); - asm volatile("bp_brk: brk #0"); - GUEST_ASSERT_EQ(sw_bp_addr, PC(bp_brk)); - GUEST_ASSERT_EQ(hw_bp_addr, PC(bp_brk)); - - /* Watchpoint */ - reset_debug_state(); - install_wp(wpn, PC(write_data)); - write_data = 'x'; - GUEST_ASSERT_EQ(write_data, 'x'); - GUEST_ASSERT_EQ(wp_data_addr, PC(write_data)); - - /* Single-step */ - reset_debug_state(); - install_ss(); - ss_idx = 0; - asm volatile("ss_start:\n" - "mrs x0, esr_el1\n" - "add x0, x0, #1\n" - "msr daifset, #8\n" - : : : "x0"); - GUEST_ASSERT_EQ(ss_addr[0], PC(ss_start)); - GUEST_ASSERT_EQ(ss_addr[1], PC(ss_start) + 4); - GUEST_ASSERT_EQ(ss_addr[2], PC(ss_start) + 8); - - /* OS Lock does not block software-breakpoint */ - reset_debug_state(); - enable_os_lock(); - sw_bp_addr = 0; - asm volatile("sw_bp2: brk #0"); - GUEST_ASSERT_EQ(sw_bp_addr, PC(sw_bp2)); - - /* OS Lock blocking hardware-breakpoint */ - reset_debug_state(); - enable_os_lock(); - install_hw_bp(bpn, PC(hw_bp2)); - hw_bp_addr = 0; - asm volatile("hw_bp2: nop"); - GUEST_ASSERT_EQ(hw_bp_addr, 0); - - /* OS Lock blocking watchpoint */ - reset_debug_state(); - enable_os_lock(); - write_data = '\0'; - wp_data_addr = 0; - install_wp(wpn, PC(write_data)); - write_data = 'x'; - GUEST_ASSERT_EQ(write_data, 'x'); - GUEST_ASSERT_EQ(wp_data_addr, 0); - - /* OS Lock blocking single-step */ - reset_debug_state(); - enable_os_lock(); - ss_addr[0] = 0; - install_ss(); - ss_idx = 0; - asm volatile("mrs x0, esr_el1\n\t" - "add x0, x0, #1\n\t" - "msr daifset, #8\n\t" - : : : "x0"); - GUEST_ASSERT_EQ(ss_addr[0], 0); - - /* Linked hardware-breakpoint */ - hw_bp_addr = 0; - reset_debug_state(); - install_hw_bp_ctx(bpn, ctx_bpn, PC(hw_bp_ctx), ctx); - /* Set context id */ - write_sysreg(ctx, contextidr_el1); - isb(); - asm volatile("hw_bp_ctx: nop"); - write_sysreg(0, contextidr_el1); - GUEST_ASSERT_EQ(hw_bp_addr, PC(hw_bp_ctx)); - - /* Linked watchpoint */ - reset_debug_state(); - install_wp_ctx(wpn, ctx_bpn, PC(write_data), ctx); - /* Set context id */ - write_sysreg(ctx, contextidr_el1); - isb(); - write_data = 'x'; - GUEST_ASSERT_EQ(write_data, 'x'); - GUEST_ASSERT_EQ(wp_data_addr, PC(write_data)); - - GUEST_DONE(); -} - -static void guest_sw_bp_handler(struct ex_regs *regs) -{ - sw_bp_addr = regs->pc; - regs->pc += 4; -} - -static void guest_hw_bp_handler(struct ex_regs *regs) -{ - hw_bp_addr = regs->pc; - regs->pstate |= SPSR_D; -} - -static void guest_wp_handler(struct ex_regs *regs) -{ - wp_data_addr = read_sysreg(far_el1); - wp_addr = regs->pc; - regs->pstate |= SPSR_D; -} - -static void guest_ss_handler(struct ex_regs *regs) -{ - __GUEST_ASSERT(ss_idx < 4, "Expected index < 4, got '%lu'", ss_idx); - ss_addr[ss_idx++] = regs->pc; - regs->pstate |= SPSR_SS; -} - -static void guest_svc_handler(struct ex_regs *regs) -{ - svc_addr = regs->pc; -} - -static void guest_code_ss(int test_cnt) -{ - uint64_t i; - uint64_t bvr, wvr, w_bvr, w_wvr; - - for (i = 0; i < test_cnt; i++) { - /* Bits [1:0] of dbg{b,w}vr are RES0 */ - w_bvr = i << 2; - w_wvr = i << 2; - - /* - * Enable Single Step execution. Note! This _must_ be a bare - * ucall as the ucall() path uses atomic operations to manage - * the ucall structures, and the built-in "atomics" are usually - * implemented via exclusive access instructions. The exlusive - * monitor is cleared on ERET, and so taking debug exceptions - * during a LDREX=>STREX sequence will prevent forward progress - * and hang the guest/test. - */ - GUEST_UCALL_NONE(); - - /* - * The userspace will verify that the pc is as expected during - * single step execution between iter_ss_begin and iter_ss_end. - */ - asm volatile("iter_ss_begin:nop\n"); - - write_sysreg(w_bvr, dbgbvr0_el1); - write_sysreg(w_wvr, dbgwvr0_el1); - bvr = read_sysreg(dbgbvr0_el1); - wvr = read_sysreg(dbgwvr0_el1); - - /* Userspace disables Single Step when the end is nigh. */ - asm volatile("iter_ss_end:\n"); - - GUEST_ASSERT_EQ(bvr, w_bvr); - GUEST_ASSERT_EQ(wvr, w_wvr); - } - GUEST_DONE(); -} - -static int debug_version(uint64_t id_aa64dfr0) -{ - return FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer), id_aa64dfr0); -} - -static void test_guest_debug_exceptions(uint8_t bpn, uint8_t wpn, uint8_t ctx_bpn) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - struct ucall uc; - - vm = vm_create_with_one_vcpu(&vcpu, guest_code); - - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vcpu); - - vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, - ESR_ELx_EC_BRK64, guest_sw_bp_handler); - vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, - ESR_ELx_EC_BREAKPT_CUR, guest_hw_bp_handler); - vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, - ESR_ELx_EC_WATCHPT_CUR, guest_wp_handler); - vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, - ESR_ELx_EC_SOFTSTP_CUR, guest_ss_handler); - vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, - ESR_ELx_EC_SVC64, guest_svc_handler); - - /* Specify bpn/wpn/ctx_bpn to be tested */ - vcpu_args_set(vcpu, 3, bpn, wpn, ctx_bpn); - pr_debug("Use bpn#%d, wpn#%d and ctx_bpn#%d\n", bpn, wpn, ctx_bpn); - - vcpu_run(vcpu); - switch (get_ucall(vcpu, &uc)) { - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - break; - case UCALL_DONE: - goto done; - default: - TEST_FAIL("Unknown ucall %lu", uc.cmd); - } - -done: - kvm_vm_free(vm); -} - -void test_single_step_from_userspace(int test_cnt) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - struct ucall uc; - struct kvm_run *run; - uint64_t pc, cmd; - uint64_t test_pc = 0; - bool ss_enable = false; - struct kvm_guest_debug debug = {}; - - vm = vm_create_with_one_vcpu(&vcpu, guest_code_ss); - run = vcpu->run; - vcpu_args_set(vcpu, 1, test_cnt); - - while (1) { - vcpu_run(vcpu); - if (run->exit_reason != KVM_EXIT_DEBUG) { - cmd = get_ucall(vcpu, &uc); - if (cmd == UCALL_ABORT) { - REPORT_GUEST_ASSERT(uc); - /* NOT REACHED */ - } else if (cmd == UCALL_DONE) { - break; - } - - TEST_ASSERT(cmd == UCALL_NONE, - "Unexpected ucall cmd 0x%lx", cmd); - - debug.control = KVM_GUESTDBG_ENABLE | - KVM_GUESTDBG_SINGLESTEP; - ss_enable = true; - vcpu_guest_debug_set(vcpu, &debug); - continue; - } - - TEST_ASSERT(ss_enable, "Unexpected KVM_EXIT_DEBUG"); - - /* Check if the current pc is expected. */ - pc = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc)); - TEST_ASSERT(!test_pc || pc == test_pc, - "Unexpected pc 0x%lx (expected 0x%lx)", - pc, test_pc); - - if ((pc + 4) == (uint64_t)&iter_ss_end) { - test_pc = 0; - debug.control = KVM_GUESTDBG_ENABLE; - ss_enable = false; - vcpu_guest_debug_set(vcpu, &debug); - continue; - } - - /* - * If the current pc is between iter_ss_bgin and - * iter_ss_end, the pc for the next KVM_EXIT_DEBUG should - * be the current pc + 4. - */ - if ((pc >= (uint64_t)&iter_ss_begin) && - (pc < (uint64_t)&iter_ss_end)) - test_pc = pc + 4; - else - test_pc = 0; - } - - kvm_vm_free(vm); -} - -/* - * Run debug testing using the various breakpoint#, watchpoint# and - * context-aware breakpoint# with the given ID_AA64DFR0_EL1 configuration. - */ -void test_guest_debug_exceptions_all(uint64_t aa64dfr0) -{ - uint8_t brp_num, wrp_num, ctx_brp_num, normal_brp_num, ctx_brp_base; - int b, w, c; - - /* Number of breakpoints */ - brp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_BRPs), aa64dfr0) + 1; - __TEST_REQUIRE(brp_num >= 2, "At least two breakpoints are required"); - - /* Number of watchpoints */ - wrp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_WRPs), aa64dfr0) + 1; - - /* Number of context aware breakpoints */ - ctx_brp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_CTX_CMPs), aa64dfr0) + 1; - - pr_debug("%s brp_num:%d, wrp_num:%d, ctx_brp_num:%d\n", __func__, - brp_num, wrp_num, ctx_brp_num); - - /* Number of normal (non-context aware) breakpoints */ - normal_brp_num = brp_num - ctx_brp_num; - - /* Lowest context aware breakpoint number */ - ctx_brp_base = normal_brp_num; - - /* Run tests with all supported breakpoints/watchpoints */ - for (c = ctx_brp_base; c < ctx_brp_base + ctx_brp_num; c++) { - for (b = 0; b < normal_brp_num; b++) { - for (w = 0; w < wrp_num; w++) - test_guest_debug_exceptions(b, w, c); - } - } -} - -static void help(char *name) -{ - puts(""); - printf("Usage: %s [-h] [-i iterations of the single step test]\n", name); - puts(""); - exit(0); -} - -int main(int argc, char *argv[]) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - int opt; - int ss_iteration = 10000; - uint64_t aa64dfr0; - - vm = vm_create_with_one_vcpu(&vcpu, guest_code); - aa64dfr0 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1)); - __TEST_REQUIRE(debug_version(aa64dfr0) >= 6, - "Armv8 debug architecture not supported."); - kvm_vm_free(vm); - - while ((opt = getopt(argc, argv, "i:")) != -1) { - switch (opt) { - case 'i': - ss_iteration = atoi_positive("Number of iterations", optarg); - break; - case 'h': - default: - help(argv[0]); - break; - } - } - - test_guest_debug_exceptions_all(aa64dfr0); - test_single_step_from_userspace(ss_iteration); - - return 0; -} diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c deleted file mode 100644 index d43fb3f49050..000000000000 --- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c +++ /dev/null @@ -1,771 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Check for KVM_GET_REG_LIST regressions. - * - * Copyright (C) 2020, Red Hat, Inc. - * - * While the blessed list should be created from the oldest possible - * kernel, we can't go older than v5.2, though, because that's the first - * release which includes df205b5c6328 ("KVM: arm64: Filter out invalid - * core register IDs in KVM_GET_REG_LIST"). Without that commit the core - * registers won't match expectations. - */ -#include -#include "kvm_util.h" -#include "test_util.h" -#include "processor.h" - -struct feature_id_reg { - __u64 reg; - __u64 id_reg; - __u64 feat_shift; - __u64 feat_min; -}; - -static struct feature_id_reg feat_id_regs[] = { - { - ARM64_SYS_REG(3, 0, 2, 0, 3), /* TCR2_EL1 */ - ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ - 0, - 1 - }, - { - ARM64_SYS_REG(3, 0, 10, 2, 2), /* PIRE0_EL1 */ - ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ - 8, - 1 - }, - { - ARM64_SYS_REG(3, 0, 10, 2, 3), /* PIR_EL1 */ - ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ - 8, - 1 - }, - { - ARM64_SYS_REG(3, 0, 10, 2, 4), /* POR_EL1 */ - ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ - 16, - 1 - }, - { - ARM64_SYS_REG(3, 3, 10, 2, 4), /* POR_EL0 */ - ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ - 16, - 1 - } -}; - -bool filter_reg(__u64 reg) -{ - /* - * DEMUX register presence depends on the host's CLIDR_EL1. - * This means there's no set of them that we can bless. - */ - if ((reg & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX) - return true; - - return false; -} - -static bool check_supported_feat_reg(struct kvm_vcpu *vcpu, __u64 reg) -{ - int i, ret; - __u64 data, feat_val; - - for (i = 0; i < ARRAY_SIZE(feat_id_regs); i++) { - if (feat_id_regs[i].reg == reg) { - ret = __vcpu_get_reg(vcpu, feat_id_regs[i].id_reg, &data); - if (ret < 0) - return false; - - feat_val = ((data >> feat_id_regs[i].feat_shift) & 0xf); - return feat_val >= feat_id_regs[i].feat_min; - } - } - - return true; -} - -bool check_supported_reg(struct kvm_vcpu *vcpu, __u64 reg) -{ - return check_supported_feat_reg(vcpu, reg); -} - -bool check_reject_set(int err) -{ - return err == EPERM; -} - -void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c) -{ - struct vcpu_reg_sublist *s; - int feature; - - for_each_sublist(c, s) { - if (s->finalize) { - feature = s->feature; - vcpu_ioctl(vcpu, KVM_ARM_VCPU_FINALIZE, &feature); - } - } -} - -#define REG_MASK (KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_COPROC_MASK) - -#define CORE_REGS_XX_NR_WORDS 2 -#define CORE_SPSR_XX_NR_WORDS 2 -#define CORE_FPREGS_XX_NR_WORDS 4 - -static const char *core_id_to_str(const char *prefix, __u64 id) -{ - __u64 core_off = id & ~REG_MASK, idx; - - /* - * core_off is the offset into struct kvm_regs - */ - switch (core_off) { - case KVM_REG_ARM_CORE_REG(regs.regs[0]) ... - KVM_REG_ARM_CORE_REG(regs.regs[30]): - idx = (core_off - KVM_REG_ARM_CORE_REG(regs.regs[0])) / CORE_REGS_XX_NR_WORDS; - TEST_ASSERT(idx < 31, "%s: Unexpected regs.regs index: %lld", prefix, idx); - return strdup_printf("KVM_REG_ARM_CORE_REG(regs.regs[%lld])", idx); - case KVM_REG_ARM_CORE_REG(regs.sp): - return "KVM_REG_ARM_CORE_REG(regs.sp)"; - case KVM_REG_ARM_CORE_REG(regs.pc): - return "KVM_REG_ARM_CORE_REG(regs.pc)"; - case KVM_REG_ARM_CORE_REG(regs.pstate): - return "KVM_REG_ARM_CORE_REG(regs.pstate)"; - case KVM_REG_ARM_CORE_REG(sp_el1): - return "KVM_REG_ARM_CORE_REG(sp_el1)"; - case KVM_REG_ARM_CORE_REG(elr_el1): - return "KVM_REG_ARM_CORE_REG(elr_el1)"; - case KVM_REG_ARM_CORE_REG(spsr[0]) ... - KVM_REG_ARM_CORE_REG(spsr[KVM_NR_SPSR - 1]): - idx = (core_off - KVM_REG_ARM_CORE_REG(spsr[0])) / CORE_SPSR_XX_NR_WORDS; - TEST_ASSERT(idx < KVM_NR_SPSR, "%s: Unexpected spsr index: %lld", prefix, idx); - return strdup_printf("KVM_REG_ARM_CORE_REG(spsr[%lld])", idx); - case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ... - KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]): - idx = (core_off - KVM_REG_ARM_CORE_REG(fp_regs.vregs[0])) / CORE_FPREGS_XX_NR_WORDS; - TEST_ASSERT(idx < 32, "%s: Unexpected fp_regs.vregs index: %lld", prefix, idx); - return strdup_printf("KVM_REG_ARM_CORE_REG(fp_regs.vregs[%lld])", idx); - case KVM_REG_ARM_CORE_REG(fp_regs.fpsr): - return "KVM_REG_ARM_CORE_REG(fp_regs.fpsr)"; - case KVM_REG_ARM_CORE_REG(fp_regs.fpcr): - return "KVM_REG_ARM_CORE_REG(fp_regs.fpcr)"; - } - - TEST_FAIL("%s: Unknown core reg id: 0x%llx", prefix, id); - return NULL; -} - -static const char *sve_id_to_str(const char *prefix, __u64 id) -{ - __u64 sve_off, n, i; - - if (id == KVM_REG_ARM64_SVE_VLS) - return "KVM_REG_ARM64_SVE_VLS"; - - sve_off = id & ~(REG_MASK | ((1ULL << 5) - 1)); - i = id & (KVM_ARM64_SVE_MAX_SLICES - 1); - - TEST_ASSERT(i == 0, "%s: Currently we don't expect slice > 0, reg id 0x%llx", prefix, id); - - switch (sve_off) { - case KVM_REG_ARM64_SVE_ZREG_BASE ... - KVM_REG_ARM64_SVE_ZREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_ZREGS - 1: - n = (id >> 5) & (KVM_ARM64_SVE_NUM_ZREGS - 1); - TEST_ASSERT(id == KVM_REG_ARM64_SVE_ZREG(n, 0), - "%s: Unexpected bits set in SVE ZREG id: 0x%llx", prefix, id); - return strdup_printf("KVM_REG_ARM64_SVE_ZREG(%lld, 0)", n); - case KVM_REG_ARM64_SVE_PREG_BASE ... - KVM_REG_ARM64_SVE_PREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_PREGS - 1: - n = (id >> 5) & (KVM_ARM64_SVE_NUM_PREGS - 1); - TEST_ASSERT(id == KVM_REG_ARM64_SVE_PREG(n, 0), - "%s: Unexpected bits set in SVE PREG id: 0x%llx", prefix, id); - return strdup_printf("KVM_REG_ARM64_SVE_PREG(%lld, 0)", n); - case KVM_REG_ARM64_SVE_FFR_BASE: - TEST_ASSERT(id == KVM_REG_ARM64_SVE_FFR(0), - "%s: Unexpected bits set in SVE FFR id: 0x%llx", prefix, id); - return "KVM_REG_ARM64_SVE_FFR(0)"; - } - - return NULL; -} - -void print_reg(const char *prefix, __u64 id) -{ - unsigned op0, op1, crn, crm, op2; - const char *reg_size = NULL; - - TEST_ASSERT((id & KVM_REG_ARCH_MASK) == KVM_REG_ARM64, - "%s: KVM_REG_ARM64 missing in reg id: 0x%llx", prefix, id); - - switch (id & KVM_REG_SIZE_MASK) { - case KVM_REG_SIZE_U8: - reg_size = "KVM_REG_SIZE_U8"; - break; - case KVM_REG_SIZE_U16: - reg_size = "KVM_REG_SIZE_U16"; - break; - case KVM_REG_SIZE_U32: - reg_size = "KVM_REG_SIZE_U32"; - break; - case KVM_REG_SIZE_U64: - reg_size = "KVM_REG_SIZE_U64"; - break; - case KVM_REG_SIZE_U128: - reg_size = "KVM_REG_SIZE_U128"; - break; - case KVM_REG_SIZE_U256: - reg_size = "KVM_REG_SIZE_U256"; - break; - case KVM_REG_SIZE_U512: - reg_size = "KVM_REG_SIZE_U512"; - break; - case KVM_REG_SIZE_U1024: - reg_size = "KVM_REG_SIZE_U1024"; - break; - case KVM_REG_SIZE_U2048: - reg_size = "KVM_REG_SIZE_U2048"; - break; - default: - TEST_FAIL("%s: Unexpected reg size: 0x%llx in reg id: 0x%llx", - prefix, (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id); - } - - switch (id & KVM_REG_ARM_COPROC_MASK) { - case KVM_REG_ARM_CORE: - printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_CORE | %s,\n", reg_size, core_id_to_str(prefix, id)); - break; - case KVM_REG_ARM_DEMUX: - TEST_ASSERT(!(id & ~(REG_MASK | KVM_REG_ARM_DEMUX_ID_MASK | KVM_REG_ARM_DEMUX_VAL_MASK)), - "%s: Unexpected bits set in DEMUX reg id: 0x%llx", prefix, id); - printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | %lld,\n", - reg_size, id & KVM_REG_ARM_DEMUX_VAL_MASK); - break; - case KVM_REG_ARM64_SYSREG: - op0 = (id & KVM_REG_ARM64_SYSREG_OP0_MASK) >> KVM_REG_ARM64_SYSREG_OP0_SHIFT; - op1 = (id & KVM_REG_ARM64_SYSREG_OP1_MASK) >> KVM_REG_ARM64_SYSREG_OP1_SHIFT; - crn = (id & KVM_REG_ARM64_SYSREG_CRN_MASK) >> KVM_REG_ARM64_SYSREG_CRN_SHIFT; - crm = (id & KVM_REG_ARM64_SYSREG_CRM_MASK) >> KVM_REG_ARM64_SYSREG_CRM_SHIFT; - op2 = (id & KVM_REG_ARM64_SYSREG_OP2_MASK) >> KVM_REG_ARM64_SYSREG_OP2_SHIFT; - TEST_ASSERT(id == ARM64_SYS_REG(op0, op1, crn, crm, op2), - "%s: Unexpected bits set in SYSREG reg id: 0x%llx", prefix, id); - printf("\tARM64_SYS_REG(%d, %d, %d, %d, %d),\n", op0, op1, crn, crm, op2); - break; - case KVM_REG_ARM_FW: - TEST_ASSERT(id == KVM_REG_ARM_FW_REG(id & 0xffff), - "%s: Unexpected bits set in FW reg id: 0x%llx", prefix, id); - printf("\tKVM_REG_ARM_FW_REG(%lld),\n", id & 0xffff); - break; - case KVM_REG_ARM_FW_FEAT_BMAP: - TEST_ASSERT(id == KVM_REG_ARM_FW_FEAT_BMAP_REG(id & 0xffff), - "%s: Unexpected bits set in the bitmap feature FW reg id: 0x%llx", prefix, id); - printf("\tKVM_REG_ARM_FW_FEAT_BMAP_REG(%lld),\n", id & 0xffff); - break; - case KVM_REG_ARM64_SVE: - printf("\t%s,\n", sve_id_to_str(prefix, id)); - break; - default: - TEST_FAIL("%s: Unexpected coproc type: 0x%llx in reg id: 0x%llx", - prefix, (id & KVM_REG_ARM_COPROC_MASK) >> KVM_REG_ARM_COPROC_SHIFT, id); - } -} - -/* - * The original blessed list was primed with the output of kernel version - * v4.15 with --core-reg-fixup and then later updated with new registers. - * (The --core-reg-fixup option and it's fixup function have been removed - * from the test, as it's unlikely to use this type of test on a kernel - * older than v5.2.) - * - * The blessed list is up to date with kernel version v6.4 (or so we hope) - */ -static __u64 base_regs[] = { - KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[0]), - KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[1]), - KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[2]), - KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[3]), - KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[4]), - KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[5]), - KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[6]), - KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[7]), - KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[8]), - KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[9]), - KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[10]), - KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[11]), - KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[12]), - KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[13]), - KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[14]), - KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[15]), - KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[16]), - KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[17]), - KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[18]), - KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[19]), - KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[20]), - KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[21]), - KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[22]), - KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[23]), - KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[24]), - KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[25]), - KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[26]), - KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[27]), - KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[28]), - KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[29]), - KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[30]), - KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.sp), - KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.pc), - KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.pstate), - KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(sp_el1), - KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(elr_el1), - KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[0]), - KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[1]), - KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[2]), - KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[3]), - KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[4]), - KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.fpsr), - KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.fpcr), - KVM_REG_ARM_FW_REG(0), /* KVM_REG_ARM_PSCI_VERSION */ - KVM_REG_ARM_FW_REG(1), /* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1 */ - KVM_REG_ARM_FW_REG(2), /* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2 */ - KVM_REG_ARM_FW_REG(3), /* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3 */ - KVM_REG_ARM_FW_FEAT_BMAP_REG(0), /* KVM_REG_ARM_STD_BMAP */ - KVM_REG_ARM_FW_FEAT_BMAP_REG(1), /* KVM_REG_ARM_STD_HYP_BMAP */ - KVM_REG_ARM_FW_FEAT_BMAP_REG(2), /* KVM_REG_ARM_VENDOR_HYP_BMAP */ - ARM64_SYS_REG(3, 3, 14, 3, 1), /* CNTV_CTL_EL0 */ - ARM64_SYS_REG(3, 3, 14, 3, 2), /* CNTV_CVAL_EL0 */ - ARM64_SYS_REG(3, 3, 14, 0, 2), - ARM64_SYS_REG(3, 0, 0, 0, 0), /* MIDR_EL1 */ - ARM64_SYS_REG(3, 0, 0, 0, 6), /* REVIDR_EL1 */ - ARM64_SYS_REG(3, 1, 0, 0, 1), /* CLIDR_EL1 */ - ARM64_SYS_REG(3, 1, 0, 0, 7), /* AIDR_EL1 */ - ARM64_SYS_REG(3, 3, 0, 0, 1), /* CTR_EL0 */ - ARM64_SYS_REG(2, 0, 0, 0, 4), - ARM64_SYS_REG(2, 0, 0, 0, 5), - ARM64_SYS_REG(2, 0, 0, 0, 6), - ARM64_SYS_REG(2, 0, 0, 0, 7), - ARM64_SYS_REG(2, 0, 0, 1, 4), - ARM64_SYS_REG(2, 0, 0, 1, 5), - ARM64_SYS_REG(2, 0, 0, 1, 6), - ARM64_SYS_REG(2, 0, 0, 1, 7), - ARM64_SYS_REG(2, 0, 0, 2, 0), /* MDCCINT_EL1 */ - ARM64_SYS_REG(2, 0, 0, 2, 2), /* MDSCR_EL1 */ - ARM64_SYS_REG(2, 0, 0, 2, 4), - ARM64_SYS_REG(2, 0, 0, 2, 5), - ARM64_SYS_REG(2, 0, 0, 2, 6), - ARM64_SYS_REG(2, 0, 0, 2, 7), - ARM64_SYS_REG(2, 0, 0, 3, 4), - ARM64_SYS_REG(2, 0, 0, 3, 5), - ARM64_SYS_REG(2, 0, 0, 3, 6), - ARM64_SYS_REG(2, 0, 0, 3, 7), - ARM64_SYS_REG(2, 0, 0, 4, 4), - ARM64_SYS_REG(2, 0, 0, 4, 5), - ARM64_SYS_REG(2, 0, 0, 4, 6), - ARM64_SYS_REG(2, 0, 0, 4, 7), - ARM64_SYS_REG(2, 0, 0, 5, 4), - ARM64_SYS_REG(2, 0, 0, 5, 5), - ARM64_SYS_REG(2, 0, 0, 5, 6), - ARM64_SYS_REG(2, 0, 0, 5, 7), - ARM64_SYS_REG(2, 0, 0, 6, 4), - ARM64_SYS_REG(2, 0, 0, 6, 5), - ARM64_SYS_REG(2, 0, 0, 6, 6), - ARM64_SYS_REG(2, 0, 0, 6, 7), - ARM64_SYS_REG(2, 0, 0, 7, 4), - ARM64_SYS_REG(2, 0, 0, 7, 5), - ARM64_SYS_REG(2, 0, 0, 7, 6), - ARM64_SYS_REG(2, 0, 0, 7, 7), - ARM64_SYS_REG(2, 0, 0, 8, 4), - ARM64_SYS_REG(2, 0, 0, 8, 5), - ARM64_SYS_REG(2, 0, 0, 8, 6), - ARM64_SYS_REG(2, 0, 0, 8, 7), - ARM64_SYS_REG(2, 0, 0, 9, 4), - ARM64_SYS_REG(2, 0, 0, 9, 5), - ARM64_SYS_REG(2, 0, 0, 9, 6), - ARM64_SYS_REG(2, 0, 0, 9, 7), - ARM64_SYS_REG(2, 0, 0, 10, 4), - ARM64_SYS_REG(2, 0, 0, 10, 5), - ARM64_SYS_REG(2, 0, 0, 10, 6), - ARM64_SYS_REG(2, 0, 0, 10, 7), - ARM64_SYS_REG(2, 0, 0, 11, 4), - ARM64_SYS_REG(2, 0, 0, 11, 5), - ARM64_SYS_REG(2, 0, 0, 11, 6), - ARM64_SYS_REG(2, 0, 0, 11, 7), - ARM64_SYS_REG(2, 0, 0, 12, 4), - ARM64_SYS_REG(2, 0, 0, 12, 5), - ARM64_SYS_REG(2, 0, 0, 12, 6), - ARM64_SYS_REG(2, 0, 0, 12, 7), - ARM64_SYS_REG(2, 0, 0, 13, 4), - ARM64_SYS_REG(2, 0, 0, 13, 5), - ARM64_SYS_REG(2, 0, 0, 13, 6), - ARM64_SYS_REG(2, 0, 0, 13, 7), - ARM64_SYS_REG(2, 0, 0, 14, 4), - ARM64_SYS_REG(2, 0, 0, 14, 5), - ARM64_SYS_REG(2, 0, 0, 14, 6), - ARM64_SYS_REG(2, 0, 0, 14, 7), - ARM64_SYS_REG(2, 0, 0, 15, 4), - ARM64_SYS_REG(2, 0, 0, 15, 5), - ARM64_SYS_REG(2, 0, 0, 15, 6), - ARM64_SYS_REG(2, 0, 0, 15, 7), - ARM64_SYS_REG(2, 0, 1, 1, 4), /* OSLSR_EL1 */ - ARM64_SYS_REG(2, 4, 0, 7, 0), /* DBGVCR32_EL2 */ - ARM64_SYS_REG(3, 0, 0, 0, 5), /* MPIDR_EL1 */ - ARM64_SYS_REG(3, 0, 0, 1, 0), /* ID_PFR0_EL1 */ - ARM64_SYS_REG(3, 0, 0, 1, 1), /* ID_PFR1_EL1 */ - ARM64_SYS_REG(3, 0, 0, 1, 2), /* ID_DFR0_EL1 */ - ARM64_SYS_REG(3, 0, 0, 1, 3), /* ID_AFR0_EL1 */ - ARM64_SYS_REG(3, 0, 0, 1, 4), /* ID_MMFR0_EL1 */ - ARM64_SYS_REG(3, 0, 0, 1, 5), /* ID_MMFR1_EL1 */ - ARM64_SYS_REG(3, 0, 0, 1, 6), /* ID_MMFR2_EL1 */ - ARM64_SYS_REG(3, 0, 0, 1, 7), /* ID_MMFR3_EL1 */ - ARM64_SYS_REG(3, 0, 0, 2, 0), /* ID_ISAR0_EL1 */ - ARM64_SYS_REG(3, 0, 0, 2, 1), /* ID_ISAR1_EL1 */ - ARM64_SYS_REG(3, 0, 0, 2, 2), /* ID_ISAR2_EL1 */ - ARM64_SYS_REG(3, 0, 0, 2, 3), /* ID_ISAR3_EL1 */ - ARM64_SYS_REG(3, 0, 0, 2, 4), /* ID_ISAR4_EL1 */ - ARM64_SYS_REG(3, 0, 0, 2, 5), /* ID_ISAR5_EL1 */ - ARM64_SYS_REG(3, 0, 0, 2, 6), /* ID_MMFR4_EL1 */ - ARM64_SYS_REG(3, 0, 0, 2, 7), /* ID_ISAR6_EL1 */ - ARM64_SYS_REG(3, 0, 0, 3, 0), /* MVFR0_EL1 */ - ARM64_SYS_REG(3, 0, 0, 3, 1), /* MVFR1_EL1 */ - ARM64_SYS_REG(3, 0, 0, 3, 2), /* MVFR2_EL1 */ - ARM64_SYS_REG(3, 0, 0, 3, 3), - ARM64_SYS_REG(3, 0, 0, 3, 4), /* ID_PFR2_EL1 */ - ARM64_SYS_REG(3, 0, 0, 3, 5), /* ID_DFR1_EL1 */ - ARM64_SYS_REG(3, 0, 0, 3, 6), /* ID_MMFR5_EL1 */ - ARM64_SYS_REG(3, 0, 0, 3, 7), - ARM64_SYS_REG(3, 0, 0, 4, 0), /* ID_AA64PFR0_EL1 */ - ARM64_SYS_REG(3, 0, 0, 4, 1), /* ID_AA64PFR1_EL1 */ - ARM64_SYS_REG(3, 0, 0, 4, 2), /* ID_AA64PFR2_EL1 */ - ARM64_SYS_REG(3, 0, 0, 4, 3), - ARM64_SYS_REG(3, 0, 0, 4, 4), /* ID_AA64ZFR0_EL1 */ - ARM64_SYS_REG(3, 0, 0, 4, 5), /* ID_AA64SMFR0_EL1 */ - ARM64_SYS_REG(3, 0, 0, 4, 6), - ARM64_SYS_REG(3, 0, 0, 4, 7), - ARM64_SYS_REG(3, 0, 0, 5, 0), /* ID_AA64DFR0_EL1 */ - ARM64_SYS_REG(3, 0, 0, 5, 1), /* ID_AA64DFR1_EL1 */ - ARM64_SYS_REG(3, 0, 0, 5, 2), - ARM64_SYS_REG(3, 0, 0, 5, 3), - ARM64_SYS_REG(3, 0, 0, 5, 4), /* ID_AA64AFR0_EL1 */ - ARM64_SYS_REG(3, 0, 0, 5, 5), /* ID_AA64AFR1_EL1 */ - ARM64_SYS_REG(3, 0, 0, 5, 6), - ARM64_SYS_REG(3, 0, 0, 5, 7), - ARM64_SYS_REG(3, 0, 0, 6, 0), /* ID_AA64ISAR0_EL1 */ - ARM64_SYS_REG(3, 0, 0, 6, 1), /* ID_AA64ISAR1_EL1 */ - ARM64_SYS_REG(3, 0, 0, 6, 2), /* ID_AA64ISAR2_EL1 */ - ARM64_SYS_REG(3, 0, 0, 6, 3), - ARM64_SYS_REG(3, 0, 0, 6, 4), - ARM64_SYS_REG(3, 0, 0, 6, 5), - ARM64_SYS_REG(3, 0, 0, 6, 6), - ARM64_SYS_REG(3, 0, 0, 6, 7), - ARM64_SYS_REG(3, 0, 0, 7, 0), /* ID_AA64MMFR0_EL1 */ - ARM64_SYS_REG(3, 0, 0, 7, 1), /* ID_AA64MMFR1_EL1 */ - ARM64_SYS_REG(3, 0, 0, 7, 2), /* ID_AA64MMFR2_EL1 */ - ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ - ARM64_SYS_REG(3, 0, 0, 7, 4), /* ID_AA64MMFR4_EL1 */ - ARM64_SYS_REG(3, 0, 0, 7, 5), - ARM64_SYS_REG(3, 0, 0, 7, 6), - ARM64_SYS_REG(3, 0, 0, 7, 7), - ARM64_SYS_REG(3, 0, 1, 0, 0), /* SCTLR_EL1 */ - ARM64_SYS_REG(3, 0, 1, 0, 1), /* ACTLR_EL1 */ - ARM64_SYS_REG(3, 0, 1, 0, 2), /* CPACR_EL1 */ - ARM64_SYS_REG(3, 0, 2, 0, 0), /* TTBR0_EL1 */ - ARM64_SYS_REG(3, 0, 2, 0, 1), /* TTBR1_EL1 */ - ARM64_SYS_REG(3, 0, 2, 0, 2), /* TCR_EL1 */ - ARM64_SYS_REG(3, 0, 2, 0, 3), /* TCR2_EL1 */ - ARM64_SYS_REG(3, 0, 5, 1, 0), /* AFSR0_EL1 */ - ARM64_SYS_REG(3, 0, 5, 1, 1), /* AFSR1_EL1 */ - ARM64_SYS_REG(3, 0, 5, 2, 0), /* ESR_EL1 */ - ARM64_SYS_REG(3, 0, 6, 0, 0), /* FAR_EL1 */ - ARM64_SYS_REG(3, 0, 7, 4, 0), /* PAR_EL1 */ - ARM64_SYS_REG(3, 0, 10, 2, 0), /* MAIR_EL1 */ - ARM64_SYS_REG(3, 0, 10, 2, 2), /* PIRE0_EL1 */ - ARM64_SYS_REG(3, 0, 10, 2, 3), /* PIR_EL1 */ - ARM64_SYS_REG(3, 0, 10, 2, 4), /* POR_EL1 */ - ARM64_SYS_REG(3, 0, 10, 3, 0), /* AMAIR_EL1 */ - ARM64_SYS_REG(3, 0, 12, 0, 0), /* VBAR_EL1 */ - ARM64_SYS_REG(3, 0, 12, 1, 1), /* DISR_EL1 */ - ARM64_SYS_REG(3, 0, 13, 0, 1), /* CONTEXTIDR_EL1 */ - ARM64_SYS_REG(3, 0, 13, 0, 4), /* TPIDR_EL1 */ - ARM64_SYS_REG(3, 0, 14, 1, 0), /* CNTKCTL_EL1 */ - ARM64_SYS_REG(3, 2, 0, 0, 0), /* CSSELR_EL1 */ - ARM64_SYS_REG(3, 3, 10, 2, 4), /* POR_EL0 */ - ARM64_SYS_REG(3, 3, 13, 0, 2), /* TPIDR_EL0 */ - ARM64_SYS_REG(3, 3, 13, 0, 3), /* TPIDRRO_EL0 */ - ARM64_SYS_REG(3, 3, 14, 0, 1), /* CNTPCT_EL0 */ - ARM64_SYS_REG(3, 3, 14, 2, 1), /* CNTP_CTL_EL0 */ - ARM64_SYS_REG(3, 3, 14, 2, 2), /* CNTP_CVAL_EL0 */ - ARM64_SYS_REG(3, 4, 3, 0, 0), /* DACR32_EL2 */ - ARM64_SYS_REG(3, 4, 5, 0, 1), /* IFSR32_EL2 */ - ARM64_SYS_REG(3, 4, 5, 3, 0), /* FPEXC32_EL2 */ -}; - -static __u64 pmu_regs[] = { - ARM64_SYS_REG(3, 0, 9, 14, 1), /* PMINTENSET_EL1 */ - ARM64_SYS_REG(3, 0, 9, 14, 2), /* PMINTENCLR_EL1 */ - ARM64_SYS_REG(3, 3, 9, 12, 0), /* PMCR_EL0 */ - ARM64_SYS_REG(3, 3, 9, 12, 1), /* PMCNTENSET_EL0 */ - ARM64_SYS_REG(3, 3, 9, 12, 2), /* PMCNTENCLR_EL0 */ - ARM64_SYS_REG(3, 3, 9, 12, 3), /* PMOVSCLR_EL0 */ - ARM64_SYS_REG(3, 3, 9, 12, 4), /* PMSWINC_EL0 */ - ARM64_SYS_REG(3, 3, 9, 12, 5), /* PMSELR_EL0 */ - ARM64_SYS_REG(3, 3, 9, 13, 0), /* PMCCNTR_EL0 */ - ARM64_SYS_REG(3, 3, 9, 14, 0), /* PMUSERENR_EL0 */ - ARM64_SYS_REG(3, 3, 9, 14, 3), /* PMOVSSET_EL0 */ - ARM64_SYS_REG(3, 3, 14, 8, 0), - ARM64_SYS_REG(3, 3, 14, 8, 1), - ARM64_SYS_REG(3, 3, 14, 8, 2), - ARM64_SYS_REG(3, 3, 14, 8, 3), - ARM64_SYS_REG(3, 3, 14, 8, 4), - ARM64_SYS_REG(3, 3, 14, 8, 5), - ARM64_SYS_REG(3, 3, 14, 8, 6), - ARM64_SYS_REG(3, 3, 14, 8, 7), - ARM64_SYS_REG(3, 3, 14, 9, 0), - ARM64_SYS_REG(3, 3, 14, 9, 1), - ARM64_SYS_REG(3, 3, 14, 9, 2), - ARM64_SYS_REG(3, 3, 14, 9, 3), - ARM64_SYS_REG(3, 3, 14, 9, 4), - ARM64_SYS_REG(3, 3, 14, 9, 5), - ARM64_SYS_REG(3, 3, 14, 9, 6), - ARM64_SYS_REG(3, 3, 14, 9, 7), - ARM64_SYS_REG(3, 3, 14, 10, 0), - ARM64_SYS_REG(3, 3, 14, 10, 1), - ARM64_SYS_REG(3, 3, 14, 10, 2), - ARM64_SYS_REG(3, 3, 14, 10, 3), - ARM64_SYS_REG(3, 3, 14, 10, 4), - ARM64_SYS_REG(3, 3, 14, 10, 5), - ARM64_SYS_REG(3, 3, 14, 10, 6), - ARM64_SYS_REG(3, 3, 14, 10, 7), - ARM64_SYS_REG(3, 3, 14, 11, 0), - ARM64_SYS_REG(3, 3, 14, 11, 1), - ARM64_SYS_REG(3, 3, 14, 11, 2), - ARM64_SYS_REG(3, 3, 14, 11, 3), - ARM64_SYS_REG(3, 3, 14, 11, 4), - ARM64_SYS_REG(3, 3, 14, 11, 5), - ARM64_SYS_REG(3, 3, 14, 11, 6), - ARM64_SYS_REG(3, 3, 14, 12, 0), - ARM64_SYS_REG(3, 3, 14, 12, 1), - ARM64_SYS_REG(3, 3, 14, 12, 2), - ARM64_SYS_REG(3, 3, 14, 12, 3), - ARM64_SYS_REG(3, 3, 14, 12, 4), - ARM64_SYS_REG(3, 3, 14, 12, 5), - ARM64_SYS_REG(3, 3, 14, 12, 6), - ARM64_SYS_REG(3, 3, 14, 12, 7), - ARM64_SYS_REG(3, 3, 14, 13, 0), - ARM64_SYS_REG(3, 3, 14, 13, 1), - ARM64_SYS_REG(3, 3, 14, 13, 2), - ARM64_SYS_REG(3, 3, 14, 13, 3), - ARM64_SYS_REG(3, 3, 14, 13, 4), - ARM64_SYS_REG(3, 3, 14, 13, 5), - ARM64_SYS_REG(3, 3, 14, 13, 6), - ARM64_SYS_REG(3, 3, 14, 13, 7), - ARM64_SYS_REG(3, 3, 14, 14, 0), - ARM64_SYS_REG(3, 3, 14, 14, 1), - ARM64_SYS_REG(3, 3, 14, 14, 2), - ARM64_SYS_REG(3, 3, 14, 14, 3), - ARM64_SYS_REG(3, 3, 14, 14, 4), - ARM64_SYS_REG(3, 3, 14, 14, 5), - ARM64_SYS_REG(3, 3, 14, 14, 6), - ARM64_SYS_REG(3, 3, 14, 14, 7), - ARM64_SYS_REG(3, 3, 14, 15, 0), - ARM64_SYS_REG(3, 3, 14, 15, 1), - ARM64_SYS_REG(3, 3, 14, 15, 2), - ARM64_SYS_REG(3, 3, 14, 15, 3), - ARM64_SYS_REG(3, 3, 14, 15, 4), - ARM64_SYS_REG(3, 3, 14, 15, 5), - ARM64_SYS_REG(3, 3, 14, 15, 6), - ARM64_SYS_REG(3, 3, 14, 15, 7), /* PMCCFILTR_EL0 */ -}; - -static __u64 vregs[] = { - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[1]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[2]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[3]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[4]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[5]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[6]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[7]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[8]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[9]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[10]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[11]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[12]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[13]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[14]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[15]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[16]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[17]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[18]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[19]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[20]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[21]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[22]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[23]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[24]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[25]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[26]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[27]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[28]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[29]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[30]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]), -}; - -static __u64 sve_regs[] = { - KVM_REG_ARM64_SVE_VLS, - KVM_REG_ARM64_SVE_ZREG(0, 0), - KVM_REG_ARM64_SVE_ZREG(1, 0), - KVM_REG_ARM64_SVE_ZREG(2, 0), - KVM_REG_ARM64_SVE_ZREG(3, 0), - KVM_REG_ARM64_SVE_ZREG(4, 0), - KVM_REG_ARM64_SVE_ZREG(5, 0), - KVM_REG_ARM64_SVE_ZREG(6, 0), - KVM_REG_ARM64_SVE_ZREG(7, 0), - KVM_REG_ARM64_SVE_ZREG(8, 0), - KVM_REG_ARM64_SVE_ZREG(9, 0), - KVM_REG_ARM64_SVE_ZREG(10, 0), - KVM_REG_ARM64_SVE_ZREG(11, 0), - KVM_REG_ARM64_SVE_ZREG(12, 0), - KVM_REG_ARM64_SVE_ZREG(13, 0), - KVM_REG_ARM64_SVE_ZREG(14, 0), - KVM_REG_ARM64_SVE_ZREG(15, 0), - KVM_REG_ARM64_SVE_ZREG(16, 0), - KVM_REG_ARM64_SVE_ZREG(17, 0), - KVM_REG_ARM64_SVE_ZREG(18, 0), - KVM_REG_ARM64_SVE_ZREG(19, 0), - KVM_REG_ARM64_SVE_ZREG(20, 0), - KVM_REG_ARM64_SVE_ZREG(21, 0), - KVM_REG_ARM64_SVE_ZREG(22, 0), - KVM_REG_ARM64_SVE_ZREG(23, 0), - KVM_REG_ARM64_SVE_ZREG(24, 0), - KVM_REG_ARM64_SVE_ZREG(25, 0), - KVM_REG_ARM64_SVE_ZREG(26, 0), - KVM_REG_ARM64_SVE_ZREG(27, 0), - KVM_REG_ARM64_SVE_ZREG(28, 0), - KVM_REG_ARM64_SVE_ZREG(29, 0), - KVM_REG_ARM64_SVE_ZREG(30, 0), - KVM_REG_ARM64_SVE_ZREG(31, 0), - KVM_REG_ARM64_SVE_PREG(0, 0), - KVM_REG_ARM64_SVE_PREG(1, 0), - KVM_REG_ARM64_SVE_PREG(2, 0), - KVM_REG_ARM64_SVE_PREG(3, 0), - KVM_REG_ARM64_SVE_PREG(4, 0), - KVM_REG_ARM64_SVE_PREG(5, 0), - KVM_REG_ARM64_SVE_PREG(6, 0), - KVM_REG_ARM64_SVE_PREG(7, 0), - KVM_REG_ARM64_SVE_PREG(8, 0), - KVM_REG_ARM64_SVE_PREG(9, 0), - KVM_REG_ARM64_SVE_PREG(10, 0), - KVM_REG_ARM64_SVE_PREG(11, 0), - KVM_REG_ARM64_SVE_PREG(12, 0), - KVM_REG_ARM64_SVE_PREG(13, 0), - KVM_REG_ARM64_SVE_PREG(14, 0), - KVM_REG_ARM64_SVE_PREG(15, 0), - KVM_REG_ARM64_SVE_FFR(0), - ARM64_SYS_REG(3, 0, 1, 2, 0), /* ZCR_EL1 */ -}; - -static __u64 sve_rejects_set[] = { - KVM_REG_ARM64_SVE_VLS, -}; - -static __u64 pauth_addr_regs[] = { - ARM64_SYS_REG(3, 0, 2, 1, 0), /* APIAKEYLO_EL1 */ - ARM64_SYS_REG(3, 0, 2, 1, 1), /* APIAKEYHI_EL1 */ - ARM64_SYS_REG(3, 0, 2, 1, 2), /* APIBKEYLO_EL1 */ - ARM64_SYS_REG(3, 0, 2, 1, 3), /* APIBKEYHI_EL1 */ - ARM64_SYS_REG(3, 0, 2, 2, 0), /* APDAKEYLO_EL1 */ - ARM64_SYS_REG(3, 0, 2, 2, 1), /* APDAKEYHI_EL1 */ - ARM64_SYS_REG(3, 0, 2, 2, 2), /* APDBKEYLO_EL1 */ - ARM64_SYS_REG(3, 0, 2, 2, 3) /* APDBKEYHI_EL1 */ -}; - -static __u64 pauth_generic_regs[] = { - ARM64_SYS_REG(3, 0, 2, 3, 0), /* APGAKEYLO_EL1 */ - ARM64_SYS_REG(3, 0, 2, 3, 1), /* APGAKEYHI_EL1 */ -}; - -#define BASE_SUBLIST \ - { "base", .regs = base_regs, .regs_n = ARRAY_SIZE(base_regs), } -#define VREGS_SUBLIST \ - { "vregs", .regs = vregs, .regs_n = ARRAY_SIZE(vregs), } -#define PMU_SUBLIST \ - { "pmu", .capability = KVM_CAP_ARM_PMU_V3, .feature = KVM_ARM_VCPU_PMU_V3, \ - .regs = pmu_regs, .regs_n = ARRAY_SIZE(pmu_regs), } -#define SVE_SUBLIST \ - { "sve", .capability = KVM_CAP_ARM_SVE, .feature = KVM_ARM_VCPU_SVE, .finalize = true, \ - .regs = sve_regs, .regs_n = ARRAY_SIZE(sve_regs), \ - .rejects_set = sve_rejects_set, .rejects_set_n = ARRAY_SIZE(sve_rejects_set), } -#define PAUTH_SUBLIST \ - { \ - .name = "pauth_address", \ - .capability = KVM_CAP_ARM_PTRAUTH_ADDRESS, \ - .feature = KVM_ARM_VCPU_PTRAUTH_ADDRESS, \ - .regs = pauth_addr_regs, \ - .regs_n = ARRAY_SIZE(pauth_addr_regs), \ - }, \ - { \ - .name = "pauth_generic", \ - .capability = KVM_CAP_ARM_PTRAUTH_GENERIC, \ - .feature = KVM_ARM_VCPU_PTRAUTH_GENERIC, \ - .regs = pauth_generic_regs, \ - .regs_n = ARRAY_SIZE(pauth_generic_regs), \ - } - -static struct vcpu_reg_list vregs_config = { - .sublists = { - BASE_SUBLIST, - VREGS_SUBLIST, - {0}, - }, -}; -static struct vcpu_reg_list vregs_pmu_config = { - .sublists = { - BASE_SUBLIST, - VREGS_SUBLIST, - PMU_SUBLIST, - {0}, - }, -}; -static struct vcpu_reg_list sve_config = { - .sublists = { - BASE_SUBLIST, - SVE_SUBLIST, - {0}, - }, -}; -static struct vcpu_reg_list sve_pmu_config = { - .sublists = { - BASE_SUBLIST, - SVE_SUBLIST, - PMU_SUBLIST, - {0}, - }, -}; -static struct vcpu_reg_list pauth_config = { - .sublists = { - BASE_SUBLIST, - VREGS_SUBLIST, - PAUTH_SUBLIST, - {0}, - }, -}; -static struct vcpu_reg_list pauth_pmu_config = { - .sublists = { - BASE_SUBLIST, - VREGS_SUBLIST, - PAUTH_SUBLIST, - PMU_SUBLIST, - {0}, - }, -}; - -struct vcpu_reg_list *vcpu_configs[] = { - &vregs_config, - &vregs_pmu_config, - &sve_config, - &sve_pmu_config, - &pauth_config, - &pauth_pmu_config, -}; -int vcpu_configs_n = ARRAY_SIZE(vcpu_configs); diff --git a/tools/testing/selftests/kvm/aarch64/hypercalls.c b/tools/testing/selftests/kvm/aarch64/hypercalls.c deleted file mode 100644 index ec54ec7726e9..000000000000 --- a/tools/testing/selftests/kvm/aarch64/hypercalls.c +++ /dev/null @@ -1,308 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only - -/* hypercalls: Check the ARM64's psuedo-firmware bitmap register interface. - * - * The test validates the basic hypercall functionalities that are exposed - * via the psuedo-firmware bitmap register. This includes the registers' - * read/write behavior before and after the VM has started, and if the - * hypercalls are properly masked or unmasked to the guest when disabled or - * enabled from the KVM userspace, respectively. - */ -#include -#include -#include -#include - -#include "processor.h" - -#define FW_REG_ULIMIT_VAL(max_feat_bit) (GENMASK(max_feat_bit, 0)) - -/* Last valid bits of the bitmapped firmware registers */ -#define KVM_REG_ARM_STD_BMAP_BIT_MAX 0 -#define KVM_REG_ARM_STD_HYP_BMAP_BIT_MAX 0 -#define KVM_REG_ARM_VENDOR_HYP_BMAP_BIT_MAX 1 - -struct kvm_fw_reg_info { - uint64_t reg; /* Register definition */ - uint64_t max_feat_bit; /* Bit that represents the upper limit of the feature-map */ -}; - -#define FW_REG_INFO(r) \ - { \ - .reg = r, \ - .max_feat_bit = r##_BIT_MAX, \ - } - -static const struct kvm_fw_reg_info fw_reg_info[] = { - FW_REG_INFO(KVM_REG_ARM_STD_BMAP), - FW_REG_INFO(KVM_REG_ARM_STD_HYP_BMAP), - FW_REG_INFO(KVM_REG_ARM_VENDOR_HYP_BMAP), -}; - -enum test_stage { - TEST_STAGE_REG_IFACE, - TEST_STAGE_HVC_IFACE_FEAT_DISABLED, - TEST_STAGE_HVC_IFACE_FEAT_ENABLED, - TEST_STAGE_HVC_IFACE_FALSE_INFO, - TEST_STAGE_END, -}; - -static int stage = TEST_STAGE_REG_IFACE; - -struct test_hvc_info { - uint32_t func_id; - uint64_t arg1; -}; - -#define TEST_HVC_INFO(f, a1) \ - { \ - .func_id = f, \ - .arg1 = a1, \ - } - -static const struct test_hvc_info hvc_info[] = { - /* KVM_REG_ARM_STD_BMAP */ - TEST_HVC_INFO(ARM_SMCCC_TRNG_VERSION, 0), - TEST_HVC_INFO(ARM_SMCCC_TRNG_FEATURES, ARM_SMCCC_TRNG_RND64), - TEST_HVC_INFO(ARM_SMCCC_TRNG_GET_UUID, 0), - TEST_HVC_INFO(ARM_SMCCC_TRNG_RND32, 0), - TEST_HVC_INFO(ARM_SMCCC_TRNG_RND64, 0), - - /* KVM_REG_ARM_STD_HYP_BMAP */ - TEST_HVC_INFO(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, ARM_SMCCC_HV_PV_TIME_FEATURES), - TEST_HVC_INFO(ARM_SMCCC_HV_PV_TIME_FEATURES, ARM_SMCCC_HV_PV_TIME_ST), - TEST_HVC_INFO(ARM_SMCCC_HV_PV_TIME_ST, 0), - - /* KVM_REG_ARM_VENDOR_HYP_BMAP */ - TEST_HVC_INFO(ARM_SMCCC_VENDOR_HYP_KVM_FEATURES_FUNC_ID, - ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID), - TEST_HVC_INFO(ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID, 0), - TEST_HVC_INFO(ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID, KVM_PTP_VIRT_COUNTER), -}; - -/* Feed false hypercall info to test the KVM behavior */ -static const struct test_hvc_info false_hvc_info[] = { - /* Feature support check against a different family of hypercalls */ - TEST_HVC_INFO(ARM_SMCCC_TRNG_FEATURES, ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID), - TEST_HVC_INFO(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, ARM_SMCCC_TRNG_RND64), - TEST_HVC_INFO(ARM_SMCCC_HV_PV_TIME_FEATURES, ARM_SMCCC_TRNG_RND64), -}; - -static void guest_test_hvc(const struct test_hvc_info *hc_info) -{ - unsigned int i; - struct arm_smccc_res res; - unsigned int hvc_info_arr_sz; - - hvc_info_arr_sz = - hc_info == hvc_info ? ARRAY_SIZE(hvc_info) : ARRAY_SIZE(false_hvc_info); - - for (i = 0; i < hvc_info_arr_sz; i++, hc_info++) { - memset(&res, 0, sizeof(res)); - smccc_hvc(hc_info->func_id, hc_info->arg1, 0, 0, 0, 0, 0, 0, &res); - - switch (stage) { - case TEST_STAGE_HVC_IFACE_FEAT_DISABLED: - case TEST_STAGE_HVC_IFACE_FALSE_INFO: - __GUEST_ASSERT(res.a0 == SMCCC_RET_NOT_SUPPORTED, - "a0 = 0x%lx, func_id = 0x%x, arg1 = 0x%lx, stage = %u", - res.a0, hc_info->func_id, hc_info->arg1, stage); - break; - case TEST_STAGE_HVC_IFACE_FEAT_ENABLED: - __GUEST_ASSERT(res.a0 != SMCCC_RET_NOT_SUPPORTED, - "a0 = 0x%lx, func_id = 0x%x, arg1 = 0x%lx, stage = %u", - res.a0, hc_info->func_id, hc_info->arg1, stage); - break; - default: - GUEST_FAIL("Unexpected stage = %u", stage); - } - } -} - -static void guest_code(void) -{ - while (stage != TEST_STAGE_END) { - switch (stage) { - case TEST_STAGE_REG_IFACE: - break; - case TEST_STAGE_HVC_IFACE_FEAT_DISABLED: - case TEST_STAGE_HVC_IFACE_FEAT_ENABLED: - guest_test_hvc(hvc_info); - break; - case TEST_STAGE_HVC_IFACE_FALSE_INFO: - guest_test_hvc(false_hvc_info); - break; - default: - GUEST_FAIL("Unexpected stage = %u", stage); - } - - GUEST_SYNC(stage); - } - - GUEST_DONE(); -} - -struct st_time { - uint32_t rev; - uint32_t attr; - uint64_t st_time; -}; - -#define STEAL_TIME_SIZE ((sizeof(struct st_time) + 63) & ~63) -#define ST_GPA_BASE (1 << 30) - -static void steal_time_init(struct kvm_vcpu *vcpu) -{ - uint64_t st_ipa = (ulong)ST_GPA_BASE; - unsigned int gpages; - - gpages = vm_calc_num_guest_pages(VM_MODE_DEFAULT, STEAL_TIME_SIZE); - vm_userspace_mem_region_add(vcpu->vm, VM_MEM_SRC_ANONYMOUS, ST_GPA_BASE, 1, gpages, 0); - - vcpu_device_attr_set(vcpu, KVM_ARM_VCPU_PVTIME_CTRL, - KVM_ARM_VCPU_PVTIME_IPA, &st_ipa); -} - -static void test_fw_regs_before_vm_start(struct kvm_vcpu *vcpu) -{ - uint64_t val; - unsigned int i; - int ret; - - for (i = 0; i < ARRAY_SIZE(fw_reg_info); i++) { - const struct kvm_fw_reg_info *reg_info = &fw_reg_info[i]; - - /* First 'read' should be an upper limit of the features supported */ - val = vcpu_get_reg(vcpu, reg_info->reg); - TEST_ASSERT(val == FW_REG_ULIMIT_VAL(reg_info->max_feat_bit), - "Expected all the features to be set for reg: 0x%lx; expected: 0x%lx; read: 0x%lx", - reg_info->reg, FW_REG_ULIMIT_VAL(reg_info->max_feat_bit), val); - - /* Test a 'write' by disabling all the features of the register map */ - ret = __vcpu_set_reg(vcpu, reg_info->reg, 0); - TEST_ASSERT(ret == 0, - "Failed to clear all the features of reg: 0x%lx; ret: %d", - reg_info->reg, errno); - - val = vcpu_get_reg(vcpu, reg_info->reg); - TEST_ASSERT(val == 0, - "Expected all the features to be cleared for reg: 0x%lx", reg_info->reg); - - /* - * Test enabling a feature that's not supported. - * Avoid this check if all the bits are occupied. - */ - if (reg_info->max_feat_bit < 63) { - ret = __vcpu_set_reg(vcpu, reg_info->reg, BIT(reg_info->max_feat_bit + 1)); - TEST_ASSERT(ret != 0 && errno == EINVAL, - "Unexpected behavior or return value (%d) while setting an unsupported feature for reg: 0x%lx", - errno, reg_info->reg); - } - } -} - -static void test_fw_regs_after_vm_start(struct kvm_vcpu *vcpu) -{ - uint64_t val; - unsigned int i; - int ret; - - for (i = 0; i < ARRAY_SIZE(fw_reg_info); i++) { - const struct kvm_fw_reg_info *reg_info = &fw_reg_info[i]; - - /* - * Before starting the VM, the test clears all the bits. - * Check if that's still the case. - */ - val = vcpu_get_reg(vcpu, reg_info->reg); - TEST_ASSERT(val == 0, - "Expected all the features to be cleared for reg: 0x%lx", - reg_info->reg); - - /* - * Since the VM has run at least once, KVM shouldn't allow modification of - * the registers and should return EBUSY. Set the registers and check for - * the expected errno. - */ - ret = __vcpu_set_reg(vcpu, reg_info->reg, FW_REG_ULIMIT_VAL(reg_info->max_feat_bit)); - TEST_ASSERT(ret != 0 && errno == EBUSY, - "Unexpected behavior or return value (%d) while setting a feature while VM is running for reg: 0x%lx", - errno, reg_info->reg); - } -} - -static struct kvm_vm *test_vm_create(struct kvm_vcpu **vcpu) -{ - struct kvm_vm *vm; - - vm = vm_create_with_one_vcpu(vcpu, guest_code); - - steal_time_init(*vcpu); - - return vm; -} - -static void test_guest_stage(struct kvm_vm **vm, struct kvm_vcpu **vcpu) -{ - int prev_stage = stage; - - pr_debug("Stage: %d\n", prev_stage); - - /* Sync the stage early, the VM might be freed below. */ - stage++; - sync_global_to_guest(*vm, stage); - - switch (prev_stage) { - case TEST_STAGE_REG_IFACE: - test_fw_regs_after_vm_start(*vcpu); - break; - case TEST_STAGE_HVC_IFACE_FEAT_DISABLED: - /* Start a new VM so that all the features are now enabled by default */ - kvm_vm_free(*vm); - *vm = test_vm_create(vcpu); - break; - case TEST_STAGE_HVC_IFACE_FEAT_ENABLED: - case TEST_STAGE_HVC_IFACE_FALSE_INFO: - break; - default: - TEST_FAIL("Unknown test stage: %d", prev_stage); - } -} - -static void test_run(void) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - struct ucall uc; - bool guest_done = false; - - vm = test_vm_create(&vcpu); - - test_fw_regs_before_vm_start(vcpu); - - while (!guest_done) { - vcpu_run(vcpu); - - switch (get_ucall(vcpu, &uc)) { - case UCALL_SYNC: - test_guest_stage(&vm, &vcpu); - break; - case UCALL_DONE: - guest_done = true; - break; - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - break; - default: - TEST_FAIL("Unexpected guest exit"); - } - } - - kvm_vm_free(vm); -} - -int main(void) -{ - test_run(); - return 0; -} diff --git a/tools/testing/selftests/kvm/aarch64/mmio_abort.c b/tools/testing/selftests/kvm/aarch64/mmio_abort.c deleted file mode 100644 index 8b7a80a51b1c..000000000000 --- a/tools/testing/selftests/kvm/aarch64/mmio_abort.c +++ /dev/null @@ -1,159 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * mmio_abort - Tests for userspace MMIO abort injection - * - * Copyright (c) 2024 Google LLC - */ -#include "processor.h" -#include "test_util.h" - -#define MMIO_ADDR 0x8000000ULL - -static u64 expected_abort_pc; - -static void expect_sea_handler(struct ex_regs *regs) -{ - u64 esr = read_sysreg(esr_el1); - - GUEST_ASSERT_EQ(regs->pc, expected_abort_pc); - GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_CUR); - GUEST_ASSERT_EQ(esr & ESR_ELx_FSC_TYPE, ESR_ELx_FSC_EXTABT); - - GUEST_DONE(); -} - -static void unexpected_dabt_handler(struct ex_regs *regs) -{ - GUEST_FAIL("Unexpected data abort at PC: %lx\n", regs->pc); -} - -static struct kvm_vm *vm_create_with_dabt_handler(struct kvm_vcpu **vcpu, void *guest_code, - handler_fn dabt_handler) -{ - struct kvm_vm *vm = vm_create_with_one_vcpu(vcpu, guest_code); - - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(*vcpu); - vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, ESR_ELx_EC_DABT_CUR, dabt_handler); - - virt_map(vm, MMIO_ADDR, MMIO_ADDR, 1); - - return vm; -} - -static void vcpu_inject_extabt(struct kvm_vcpu *vcpu) -{ - struct kvm_vcpu_events events = {}; - - events.exception.ext_dabt_pending = true; - vcpu_events_set(vcpu, &events); -} - -static void vcpu_run_expect_done(struct kvm_vcpu *vcpu) -{ - struct ucall uc; - - vcpu_run(vcpu); - switch (get_ucall(vcpu, &uc)) { - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - break; - case UCALL_DONE: - break; - default: - TEST_FAIL("Unexpected ucall: %lu", uc.cmd); - } -} - -extern char test_mmio_abort_insn; - -static void test_mmio_abort_guest(void) -{ - WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_abort_insn); - - asm volatile("test_mmio_abort_insn:\n\t" - "ldr x0, [%0]\n\t" - : : "r" (MMIO_ADDR) : "x0", "memory"); - - GUEST_FAIL("MMIO instruction should not retire"); -} - -/* - * Test that KVM doesn't complete MMIO emulation when userspace has made an - * external abort pending for the instruction. - */ -static void test_mmio_abort(void) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_abort_guest, - expect_sea_handler); - struct kvm_run *run = vcpu->run; - - vcpu_run(vcpu); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_MMIO); - TEST_ASSERT_EQ(run->mmio.phys_addr, MMIO_ADDR); - TEST_ASSERT_EQ(run->mmio.len, sizeof(unsigned long)); - TEST_ASSERT(!run->mmio.is_write, "Expected MMIO read"); - - vcpu_inject_extabt(vcpu); - vcpu_run_expect_done(vcpu); - kvm_vm_free(vm); -} - -extern char test_mmio_nisv_insn; - -static void test_mmio_nisv_guest(void) -{ - WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_nisv_insn); - - asm volatile("test_mmio_nisv_insn:\n\t" - "ldr x0, [%0], #8\n\t" - : : "r" (MMIO_ADDR) : "x0", "memory"); - - GUEST_FAIL("MMIO instruction should not retire"); -} - -/* - * Test that the KVM_RUN ioctl fails for ESR_EL2.ISV=0 MMIO aborts if userspace - * hasn't enabled KVM_CAP_ARM_NISV_TO_USER. - */ -static void test_mmio_nisv(void) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest, - unexpected_dabt_handler); - - TEST_ASSERT(_vcpu_run(vcpu), "Expected nonzero return code from KVM_RUN"); - TEST_ASSERT_EQ(errno, ENOSYS); - - kvm_vm_free(vm); -} - -/* - * Test that ESR_EL2.ISV=0 MMIO aborts reach userspace and that an injected SEA - * reaches the guest. - */ -static void test_mmio_nisv_abort(void) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest, - expect_sea_handler); - struct kvm_run *run = vcpu->run; - - vm_enable_cap(vm, KVM_CAP_ARM_NISV_TO_USER, 1); - - vcpu_run(vcpu); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_ARM_NISV); - TEST_ASSERT_EQ(run->arm_nisv.fault_ipa, MMIO_ADDR); - - vcpu_inject_extabt(vcpu); - vcpu_run_expect_done(vcpu); - kvm_vm_free(vm); -} - -int main(void) -{ - test_mmio_abort(); - test_mmio_nisv(); - test_mmio_nisv_abort(); -} diff --git a/tools/testing/selftests/kvm/aarch64/no-vgic-v3.c b/tools/testing/selftests/kvm/aarch64/no-vgic-v3.c deleted file mode 100644 index ebd70430c89d..000000000000 --- a/tools/testing/selftests/kvm/aarch64/no-vgic-v3.c +++ /dev/null @@ -1,175 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -// Check that, on a GICv3 system, not configuring GICv3 correctly -// results in all of the sysregs generating an UNDEF exception. - -#include -#include -#include - -static volatile bool handled; - -#define __check_sr_read(r) \ - ({ \ - uint64_t val; \ - \ - handled = false; \ - dsb(sy); \ - val = read_sysreg_s(SYS_ ## r); \ - val; \ - }) - -#define __check_sr_write(r) \ - do { \ - handled = false; \ - dsb(sy); \ - write_sysreg_s(0, SYS_ ## r); \ - isb(); \ - } while(0) - -/* Fatal checks */ -#define check_sr_read(r) \ - do { \ - __check_sr_read(r); \ - __GUEST_ASSERT(handled, #r " no read trap"); \ - } while(0) - -#define check_sr_write(r) \ - do { \ - __check_sr_write(r); \ - __GUEST_ASSERT(handled, #r " no write trap"); \ - } while(0) - -#define check_sr_rw(r) \ - do { \ - check_sr_read(r); \ - check_sr_write(r); \ - } while(0) - -static void guest_code(void) -{ - uint64_t val; - - /* - * Check that we advertise that ID_AA64PFR0_EL1.GIC == 0, having - * hidden the feature at runtime without any other userspace action. - */ - __GUEST_ASSERT(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC), - read_sysreg(id_aa64pfr0_el1)) == 0, - "GICv3 wrongly advertised"); - - /* - * Access all GICv3 registers, and fail if we don't get an UNDEF. - * Note that we happily access all the APxRn registers without - * checking their existance, as all we want to see is a failure. - */ - check_sr_rw(ICC_PMR_EL1); - check_sr_read(ICC_IAR0_EL1); - check_sr_write(ICC_EOIR0_EL1); - check_sr_rw(ICC_HPPIR0_EL1); - check_sr_rw(ICC_BPR0_EL1); - check_sr_rw(ICC_AP0R0_EL1); - check_sr_rw(ICC_AP0R1_EL1); - check_sr_rw(ICC_AP0R2_EL1); - check_sr_rw(ICC_AP0R3_EL1); - check_sr_rw(ICC_AP1R0_EL1); - check_sr_rw(ICC_AP1R1_EL1); - check_sr_rw(ICC_AP1R2_EL1); - check_sr_rw(ICC_AP1R3_EL1); - check_sr_write(ICC_DIR_EL1); - check_sr_read(ICC_RPR_EL1); - check_sr_write(ICC_SGI1R_EL1); - check_sr_write(ICC_ASGI1R_EL1); - check_sr_write(ICC_SGI0R_EL1); - check_sr_read(ICC_IAR1_EL1); - check_sr_write(ICC_EOIR1_EL1); - check_sr_rw(ICC_HPPIR1_EL1); - check_sr_rw(ICC_BPR1_EL1); - check_sr_rw(ICC_CTLR_EL1); - check_sr_rw(ICC_IGRPEN0_EL1); - check_sr_rw(ICC_IGRPEN1_EL1); - - /* - * ICC_SRE_EL1 may not be trappable, as ICC_SRE_EL2.Enable can - * be RAO/WI. Engage in non-fatal accesses, starting with a - * write of 0 to try and disable SRE, and let's see if it - * sticks. - */ - __check_sr_write(ICC_SRE_EL1); - if (!handled) - GUEST_PRINTF("ICC_SRE_EL1 write not trapping (OK)\n"); - - val = __check_sr_read(ICC_SRE_EL1); - if (!handled) { - __GUEST_ASSERT((val & BIT(0)), - "ICC_SRE_EL1 not trapped but ICC_SRE_EL1.SRE not set\n"); - GUEST_PRINTF("ICC_SRE_EL1 read not trapping (OK)\n"); - } - - GUEST_DONE(); -} - -static void guest_undef_handler(struct ex_regs *regs) -{ - /* Success, we've gracefully exploded! */ - handled = true; - regs->pc += 4; -} - -static void test_run_vcpu(struct kvm_vcpu *vcpu) -{ - struct ucall uc; - - do { - vcpu_run(vcpu); - - switch (get_ucall(vcpu, &uc)) { - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - break; - case UCALL_PRINTF: - printf("%s", uc.buffer); - break; - case UCALL_DONE: - break; - default: - TEST_FAIL("Unknown ucall %lu", uc.cmd); - } - } while (uc.cmd != UCALL_DONE); -} - -static void test_guest_no_gicv3(void) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - - /* Create a VM without a GICv3 */ - vm = vm_create_with_one_vcpu(&vcpu, guest_code); - - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vcpu); - - vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, - ESR_ELx_EC_UNKNOWN, guest_undef_handler); - - test_run_vcpu(vcpu); - - kvm_vm_free(vm); -} - -int main(int argc, char *argv[]) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - uint64_t pfr0; - - vm = vm_create_with_one_vcpu(&vcpu, NULL); - pfr0 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1)); - __TEST_REQUIRE(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC), pfr0), - "GICv3 not supported."); - kvm_vm_free(vm); - - test_guest_no_gicv3(); - - return 0; -} diff --git a/tools/testing/selftests/kvm/aarch64/page_fault_test.c b/tools/testing/selftests/kvm/aarch64/page_fault_test.c deleted file mode 100644 index ec33a8f9c908..000000000000 --- a/tools/testing/selftests/kvm/aarch64/page_fault_test.c +++ /dev/null @@ -1,1135 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * page_fault_test.c - Test stage 2 faults. - * - * This test tries different combinations of guest accesses (e.g., write, - * S1PTW), backing source type (e.g., anon) and types of faults (e.g., read on - * hugetlbfs with a hole). It checks that the expected handling method is - * called (e.g., uffd faults with the right address and write/read flag). - */ -#include -#include -#include -#include -#include -#include -#include -#include "guest_modes.h" -#include "userfaultfd_util.h" - -/* Guest virtual addresses that point to the test page and its PTE. */ -#define TEST_GVA 0xc0000000 -#define TEST_EXEC_GVA (TEST_GVA + 0x8) -#define TEST_PTE_GVA 0xb0000000 -#define TEST_DATA 0x0123456789ABCDEF - -static uint64_t *guest_test_memory = (uint64_t *)TEST_GVA; - -#define CMD_NONE (0) -#define CMD_SKIP_TEST (1ULL << 1) -#define CMD_HOLE_PT (1ULL << 2) -#define CMD_HOLE_DATA (1ULL << 3) -#define CMD_CHECK_WRITE_IN_DIRTY_LOG (1ULL << 4) -#define CMD_CHECK_S1PTW_WR_IN_DIRTY_LOG (1ULL << 5) -#define CMD_CHECK_NO_WRITE_IN_DIRTY_LOG (1ULL << 6) -#define CMD_CHECK_NO_S1PTW_WR_IN_DIRTY_LOG (1ULL << 7) -#define CMD_SET_PTE_AF (1ULL << 8) - -#define PREPARE_FN_NR 10 -#define CHECK_FN_NR 10 - -static struct event_cnt { - int mmio_exits; - int fail_vcpu_runs; - int uffd_faults; - /* uffd_faults is incremented from multiple threads. */ - pthread_mutex_t uffd_faults_mutex; -} events; - -struct test_desc { - const char *name; - uint64_t mem_mark_cmd; - /* Skip the test if any prepare function returns false */ - bool (*guest_prepare[PREPARE_FN_NR])(void); - void (*guest_test)(void); - void (*guest_test_check[CHECK_FN_NR])(void); - uffd_handler_t uffd_pt_handler; - uffd_handler_t uffd_data_handler; - void (*dabt_handler)(struct ex_regs *regs); - void (*iabt_handler)(struct ex_regs *regs); - void (*mmio_handler)(struct kvm_vm *vm, struct kvm_run *run); - void (*fail_vcpu_run_handler)(int ret); - uint32_t pt_memslot_flags; - uint32_t data_memslot_flags; - bool skip; - struct event_cnt expected_events; -}; - -struct test_params { - enum vm_mem_backing_src_type src_type; - struct test_desc *test_desc; -}; - -static inline void flush_tlb_page(uint64_t vaddr) -{ - uint64_t page = vaddr >> 12; - - dsb(ishst); - asm volatile("tlbi vaae1is, %0" :: "r" (page)); - dsb(ish); - isb(); -} - -static void guest_write64(void) -{ - uint64_t val; - - WRITE_ONCE(*guest_test_memory, TEST_DATA); - val = READ_ONCE(*guest_test_memory); - GUEST_ASSERT_EQ(val, TEST_DATA); -} - -/* Check the system for atomic instructions. */ -static bool guest_check_lse(void) -{ - uint64_t isar0 = read_sysreg(id_aa64isar0_el1); - uint64_t atomic; - - atomic = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_ATOMIC), isar0); - return atomic >= 2; -} - -static bool guest_check_dc_zva(void) -{ - uint64_t dczid = read_sysreg(dczid_el0); - uint64_t dzp = FIELD_GET(ARM64_FEATURE_MASK(DCZID_EL0_DZP), dczid); - - return dzp == 0; -} - -/* Compare and swap instruction. */ -static void guest_cas(void) -{ - uint64_t val; - - GUEST_ASSERT(guest_check_lse()); - asm volatile(".arch_extension lse\n" - "casal %0, %1, [%2]\n" - :: "r" (0ul), "r" (TEST_DATA), "r" (guest_test_memory)); - val = READ_ONCE(*guest_test_memory); - GUEST_ASSERT_EQ(val, TEST_DATA); -} - -static void guest_read64(void) -{ - uint64_t val; - - val = READ_ONCE(*guest_test_memory); - GUEST_ASSERT_EQ(val, 0); -} - -/* Address translation instruction */ -static void guest_at(void) -{ - uint64_t par; - - asm volatile("at s1e1r, %0" :: "r" (guest_test_memory)); - isb(); - par = read_sysreg(par_el1); - - /* Bit 1 indicates whether the AT was successful */ - GUEST_ASSERT_EQ(par & 1, 0); -} - -/* - * The size of the block written by "dc zva" is guaranteed to be between (2 << - * 0) and (2 << 9), which is safe in our case as we need the write to happen - * for at least a word, and not more than a page. - */ -static void guest_dc_zva(void) -{ - uint16_t val; - - asm volatile("dc zva, %0" :: "r" (guest_test_memory)); - dsb(ish); - val = READ_ONCE(*guest_test_memory); - GUEST_ASSERT_EQ(val, 0); -} - -/* - * Pre-indexing loads and stores don't have a valid syndrome (ESR_EL2.ISV==0). - * And that's special because KVM must take special care with those: they - * should still count as accesses for dirty logging or user-faulting, but - * should be handled differently on mmio. - */ -static void guest_ld_preidx(void) -{ - uint64_t val; - uint64_t addr = TEST_GVA - 8; - - /* - * This ends up accessing "TEST_GVA + 8 - 8", where "TEST_GVA - 8" is - * in a gap between memslots not backing by anything. - */ - asm volatile("ldr %0, [%1, #8]!" - : "=r" (val), "+r" (addr)); - GUEST_ASSERT_EQ(val, 0); - GUEST_ASSERT_EQ(addr, TEST_GVA); -} - -static void guest_st_preidx(void) -{ - uint64_t val = TEST_DATA; - uint64_t addr = TEST_GVA - 8; - - asm volatile("str %0, [%1, #8]!" - : "+r" (val), "+r" (addr)); - - GUEST_ASSERT_EQ(addr, TEST_GVA); - val = READ_ONCE(*guest_test_memory); -} - -static bool guest_set_ha(void) -{ - uint64_t mmfr1 = read_sysreg(id_aa64mmfr1_el1); - uint64_t hadbs, tcr; - - /* Skip if HA is not supported. */ - hadbs = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HAFDBS), mmfr1); - if (hadbs == 0) - return false; - - tcr = read_sysreg(tcr_el1) | TCR_EL1_HA; - write_sysreg(tcr, tcr_el1); - isb(); - - return true; -} - -static bool guest_clear_pte_af(void) -{ - *((uint64_t *)TEST_PTE_GVA) &= ~PTE_AF; - flush_tlb_page(TEST_GVA); - - return true; -} - -static void guest_check_pte_af(void) -{ - dsb(ish); - GUEST_ASSERT_EQ(*((uint64_t *)TEST_PTE_GVA) & PTE_AF, PTE_AF); -} - -static void guest_check_write_in_dirty_log(void) -{ - GUEST_SYNC(CMD_CHECK_WRITE_IN_DIRTY_LOG); -} - -static void guest_check_no_write_in_dirty_log(void) -{ - GUEST_SYNC(CMD_CHECK_NO_WRITE_IN_DIRTY_LOG); -} - -static void guest_check_s1ptw_wr_in_dirty_log(void) -{ - GUEST_SYNC(CMD_CHECK_S1PTW_WR_IN_DIRTY_LOG); -} - -static void guest_check_no_s1ptw_wr_in_dirty_log(void) -{ - GUEST_SYNC(CMD_CHECK_NO_S1PTW_WR_IN_DIRTY_LOG); -} - -static void guest_exec(void) -{ - int (*code)(void) = (int (*)(void))TEST_EXEC_GVA; - int ret; - - ret = code(); - GUEST_ASSERT_EQ(ret, 0x77); -} - -static bool guest_prepare(struct test_desc *test) -{ - bool (*prepare_fn)(void); - int i; - - for (i = 0; i < PREPARE_FN_NR; i++) { - prepare_fn = test->guest_prepare[i]; - if (prepare_fn && !prepare_fn()) - return false; - } - - return true; -} - -static void guest_test_check(struct test_desc *test) -{ - void (*check_fn)(void); - int i; - - for (i = 0; i < CHECK_FN_NR; i++) { - check_fn = test->guest_test_check[i]; - if (check_fn) - check_fn(); - } -} - -static void guest_code(struct test_desc *test) -{ - if (!guest_prepare(test)) - GUEST_SYNC(CMD_SKIP_TEST); - - GUEST_SYNC(test->mem_mark_cmd); - - if (test->guest_test) - test->guest_test(); - - guest_test_check(test); - GUEST_DONE(); -} - -static void no_dabt_handler(struct ex_regs *regs) -{ - GUEST_FAIL("Unexpected dabt, far_el1 = 0x%lx", read_sysreg(far_el1)); -} - -static void no_iabt_handler(struct ex_regs *regs) -{ - GUEST_FAIL("Unexpected iabt, pc = 0x%lx", regs->pc); -} - -static struct uffd_args { - char *copy; - void *hva; - uint64_t paging_size; -} pt_args, data_args; - -/* Returns true to continue the test, and false if it should be skipped. */ -static int uffd_generic_handler(int uffd_mode, int uffd, struct uffd_msg *msg, - struct uffd_args *args) -{ - uint64_t addr = msg->arg.pagefault.address; - uint64_t flags = msg->arg.pagefault.flags; - struct uffdio_copy copy; - int ret; - - TEST_ASSERT(uffd_mode == UFFDIO_REGISTER_MODE_MISSING, - "The only expected UFFD mode is MISSING"); - TEST_ASSERT_EQ(addr, (uint64_t)args->hva); - - pr_debug("uffd fault: addr=%p write=%d\n", - (void *)addr, !!(flags & UFFD_PAGEFAULT_FLAG_WRITE)); - - copy.src = (uint64_t)args->copy; - copy.dst = addr; - copy.len = args->paging_size; - copy.mode = 0; - - ret = ioctl(uffd, UFFDIO_COPY, ©); - if (ret == -1) { - pr_info("Failed UFFDIO_COPY in 0x%lx with errno: %d\n", - addr, errno); - return ret; - } - - pthread_mutex_lock(&events.uffd_faults_mutex); - events.uffd_faults += 1; - pthread_mutex_unlock(&events.uffd_faults_mutex); - return 0; -} - -static int uffd_pt_handler(int mode, int uffd, struct uffd_msg *msg) -{ - return uffd_generic_handler(mode, uffd, msg, &pt_args); -} - -static int uffd_data_handler(int mode, int uffd, struct uffd_msg *msg) -{ - return uffd_generic_handler(mode, uffd, msg, &data_args); -} - -static void setup_uffd_args(struct userspace_mem_region *region, - struct uffd_args *args) -{ - args->hva = (void *)region->region.userspace_addr; - args->paging_size = region->region.memory_size; - - args->copy = malloc(args->paging_size); - TEST_ASSERT(args->copy, "Failed to allocate data copy."); - memcpy(args->copy, args->hva, args->paging_size); -} - -static void setup_uffd(struct kvm_vm *vm, struct test_params *p, - struct uffd_desc **pt_uffd, struct uffd_desc **data_uffd) -{ - struct test_desc *test = p->test_desc; - int uffd_mode = UFFDIO_REGISTER_MODE_MISSING; - - setup_uffd_args(vm_get_mem_region(vm, MEM_REGION_PT), &pt_args); - setup_uffd_args(vm_get_mem_region(vm, MEM_REGION_TEST_DATA), &data_args); - - *pt_uffd = NULL; - if (test->uffd_pt_handler) - *pt_uffd = uffd_setup_demand_paging(uffd_mode, 0, - pt_args.hva, - pt_args.paging_size, - 1, test->uffd_pt_handler); - - *data_uffd = NULL; - if (test->uffd_data_handler) - *data_uffd = uffd_setup_demand_paging(uffd_mode, 0, - data_args.hva, - data_args.paging_size, - 1, test->uffd_data_handler); -} - -static void free_uffd(struct test_desc *test, struct uffd_desc *pt_uffd, - struct uffd_desc *data_uffd) -{ - if (test->uffd_pt_handler) - uffd_stop_demand_paging(pt_uffd); - if (test->uffd_data_handler) - uffd_stop_demand_paging(data_uffd); - - free(pt_args.copy); - free(data_args.copy); -} - -static int uffd_no_handler(int mode, int uffd, struct uffd_msg *msg) -{ - TEST_FAIL("There was no UFFD fault expected."); - return -1; -} - -/* Returns false if the test should be skipped. */ -static bool punch_hole_in_backing_store(struct kvm_vm *vm, - struct userspace_mem_region *region) -{ - void *hva = (void *)region->region.userspace_addr; - uint64_t paging_size = region->region.memory_size; - int ret, fd = region->fd; - - if (fd != -1) { - ret = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, - 0, paging_size); - TEST_ASSERT(ret == 0, "fallocate failed"); - } else { - ret = madvise(hva, paging_size, MADV_DONTNEED); - TEST_ASSERT(ret == 0, "madvise failed"); - } - - return true; -} - -static void mmio_on_test_gpa_handler(struct kvm_vm *vm, struct kvm_run *run) -{ - struct userspace_mem_region *region; - void *hva; - - region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA); - hva = (void *)region->region.userspace_addr; - - TEST_ASSERT_EQ(run->mmio.phys_addr, region->region.guest_phys_addr); - - memcpy(hva, run->mmio.data, run->mmio.len); - events.mmio_exits += 1; -} - -static void mmio_no_handler(struct kvm_vm *vm, struct kvm_run *run) -{ - uint64_t data; - - memcpy(&data, run->mmio.data, sizeof(data)); - pr_debug("addr=%lld len=%d w=%d data=%lx\n", - run->mmio.phys_addr, run->mmio.len, - run->mmio.is_write, data); - TEST_FAIL("There was no MMIO exit expected."); -} - -static bool check_write_in_dirty_log(struct kvm_vm *vm, - struct userspace_mem_region *region, - uint64_t host_pg_nr) -{ - unsigned long *bmap; - bool first_page_dirty; - uint64_t size = region->region.memory_size; - - /* getpage_size() is not always equal to vm->page_size */ - bmap = bitmap_zalloc(size / getpagesize()); - kvm_vm_get_dirty_log(vm, region->region.slot, bmap); - first_page_dirty = test_bit(host_pg_nr, bmap); - free(bmap); - return first_page_dirty; -} - -/* Returns true to continue the test, and false if it should be skipped. */ -static bool handle_cmd(struct kvm_vm *vm, int cmd) -{ - struct userspace_mem_region *data_region, *pt_region; - bool continue_test = true; - uint64_t pte_gpa, pte_pg; - - data_region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA); - pt_region = vm_get_mem_region(vm, MEM_REGION_PT); - pte_gpa = addr_hva2gpa(vm, virt_get_pte_hva(vm, TEST_GVA)); - pte_pg = (pte_gpa - pt_region->region.guest_phys_addr) / getpagesize(); - - if (cmd == CMD_SKIP_TEST) - continue_test = false; - - if (cmd & CMD_HOLE_PT) - continue_test = punch_hole_in_backing_store(vm, pt_region); - if (cmd & CMD_HOLE_DATA) - continue_test = punch_hole_in_backing_store(vm, data_region); - if (cmd & CMD_CHECK_WRITE_IN_DIRTY_LOG) - TEST_ASSERT(check_write_in_dirty_log(vm, data_region, 0), - "Missing write in dirty log"); - if (cmd & CMD_CHECK_S1PTW_WR_IN_DIRTY_LOG) - TEST_ASSERT(check_write_in_dirty_log(vm, pt_region, pte_pg), - "Missing s1ptw write in dirty log"); - if (cmd & CMD_CHECK_NO_WRITE_IN_DIRTY_LOG) - TEST_ASSERT(!check_write_in_dirty_log(vm, data_region, 0), - "Unexpected write in dirty log"); - if (cmd & CMD_CHECK_NO_S1PTW_WR_IN_DIRTY_LOG) - TEST_ASSERT(!check_write_in_dirty_log(vm, pt_region, pte_pg), - "Unexpected s1ptw write in dirty log"); - - return continue_test; -} - -void fail_vcpu_run_no_handler(int ret) -{ - TEST_FAIL("Unexpected vcpu run failure"); -} - -void fail_vcpu_run_mmio_no_syndrome_handler(int ret) -{ - TEST_ASSERT(errno == ENOSYS, - "The mmio handler should have returned not implemented."); - events.fail_vcpu_runs += 1; -} - -typedef uint32_t aarch64_insn_t; -extern aarch64_insn_t __exec_test[2]; - -noinline void __return_0x77(void) -{ - asm volatile("__exec_test: mov x0, #0x77\n" - "ret\n"); -} - -/* - * Note that this function runs on the host before the test VM starts: there's - * no need to sync the D$ and I$ caches. - */ -static void load_exec_code_for_test(struct kvm_vm *vm) -{ - uint64_t *code; - struct userspace_mem_region *region; - void *hva; - - region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA); - hva = (void *)region->region.userspace_addr; - - assert(TEST_EXEC_GVA > TEST_GVA); - code = hva + TEST_EXEC_GVA - TEST_GVA; - memcpy(code, __exec_test, sizeof(__exec_test)); -} - -static void setup_abort_handlers(struct kvm_vm *vm, struct kvm_vcpu *vcpu, - struct test_desc *test) -{ - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vcpu); - - vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, - ESR_ELx_EC_DABT_CUR, no_dabt_handler); - vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, - ESR_ELx_EC_IABT_CUR, no_iabt_handler); -} - -static void setup_gva_maps(struct kvm_vm *vm) -{ - struct userspace_mem_region *region; - uint64_t pte_gpa; - - region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA); - /* Map TEST_GVA first. This will install a new PTE. */ - virt_pg_map(vm, TEST_GVA, region->region.guest_phys_addr); - /* Then map TEST_PTE_GVA to the above PTE. */ - pte_gpa = addr_hva2gpa(vm, virt_get_pte_hva(vm, TEST_GVA)); - virt_pg_map(vm, TEST_PTE_GVA, pte_gpa); -} - -enum pf_test_memslots { - CODE_AND_DATA_MEMSLOT, - PAGE_TABLE_MEMSLOT, - TEST_DATA_MEMSLOT, -}; - -/* - * Create a memslot for code and data at pfn=0, and test-data and PT ones - * at max_gfn. - */ -static void setup_memslots(struct kvm_vm *vm, struct test_params *p) -{ - uint64_t backing_src_pagesz = get_backing_src_pagesz(p->src_type); - uint64_t guest_page_size = vm->page_size; - uint64_t max_gfn = vm_compute_max_gfn(vm); - /* Enough for 2M of code when using 4K guest pages. */ - uint64_t code_npages = 512; - uint64_t pt_size, data_size, data_gpa; - - /* - * This test requires 1 pgd, 2 pud, 4 pmd, and 6 pte pages when using - * VM_MODE_P48V48_4K. Note that the .text takes ~1.6MBs. That's 13 - * pages. VM_MODE_P48V48_4K is the mode with most PT pages; let's use - * twice that just in case. - */ - pt_size = 26 * guest_page_size; - - /* memslot sizes and gpa's must be aligned to the backing page size */ - pt_size = align_up(pt_size, backing_src_pagesz); - data_size = align_up(guest_page_size, backing_src_pagesz); - data_gpa = (max_gfn * guest_page_size) - data_size; - data_gpa = align_down(data_gpa, backing_src_pagesz); - - vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, - CODE_AND_DATA_MEMSLOT, code_npages, 0); - vm->memslots[MEM_REGION_CODE] = CODE_AND_DATA_MEMSLOT; - vm->memslots[MEM_REGION_DATA] = CODE_AND_DATA_MEMSLOT; - - vm_userspace_mem_region_add(vm, p->src_type, data_gpa - pt_size, - PAGE_TABLE_MEMSLOT, pt_size / guest_page_size, - p->test_desc->pt_memslot_flags); - vm->memslots[MEM_REGION_PT] = PAGE_TABLE_MEMSLOT; - - vm_userspace_mem_region_add(vm, p->src_type, data_gpa, TEST_DATA_MEMSLOT, - data_size / guest_page_size, - p->test_desc->data_memslot_flags); - vm->memslots[MEM_REGION_TEST_DATA] = TEST_DATA_MEMSLOT; -} - -static void setup_ucall(struct kvm_vm *vm) -{ - struct userspace_mem_region *region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA); - - ucall_init(vm, region->region.guest_phys_addr + region->region.memory_size); -} - -static void setup_default_handlers(struct test_desc *test) -{ - if (!test->mmio_handler) - test->mmio_handler = mmio_no_handler; - - if (!test->fail_vcpu_run_handler) - test->fail_vcpu_run_handler = fail_vcpu_run_no_handler; -} - -static void check_event_counts(struct test_desc *test) -{ - TEST_ASSERT_EQ(test->expected_events.uffd_faults, events.uffd_faults); - TEST_ASSERT_EQ(test->expected_events.mmio_exits, events.mmio_exits); - TEST_ASSERT_EQ(test->expected_events.fail_vcpu_runs, events.fail_vcpu_runs); -} - -static void print_test_banner(enum vm_guest_mode mode, struct test_params *p) -{ - struct test_desc *test = p->test_desc; - - pr_debug("Test: %s\n", test->name); - pr_debug("Testing guest mode: %s\n", vm_guest_mode_string(mode)); - pr_debug("Testing memory backing src type: %s\n", - vm_mem_backing_src_alias(p->src_type)->name); -} - -static void reset_event_counts(void) -{ - memset(&events, 0, sizeof(events)); -} - -/* - * This function either succeeds, skips the test (after setting test->skip), or - * fails with a TEST_FAIL that aborts all tests. - */ -static void vcpu_run_loop(struct kvm_vm *vm, struct kvm_vcpu *vcpu, - struct test_desc *test) -{ - struct kvm_run *run; - struct ucall uc; - int ret; - - run = vcpu->run; - - for (;;) { - ret = _vcpu_run(vcpu); - if (ret) { - test->fail_vcpu_run_handler(ret); - goto done; - } - - switch (get_ucall(vcpu, &uc)) { - case UCALL_SYNC: - if (!handle_cmd(vm, uc.args[1])) { - test->skip = true; - goto done; - } - break; - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - break; - case UCALL_DONE: - goto done; - case UCALL_NONE: - if (run->exit_reason == KVM_EXIT_MMIO) - test->mmio_handler(vm, run); - break; - default: - TEST_FAIL("Unknown ucall %lu", uc.cmd); - } - } - -done: - pr_debug(test->skip ? "Skipped.\n" : "Done.\n"); -} - -static void run_test(enum vm_guest_mode mode, void *arg) -{ - struct test_params *p = (struct test_params *)arg; - struct test_desc *test = p->test_desc; - struct kvm_vm *vm; - struct kvm_vcpu *vcpu; - struct uffd_desc *pt_uffd, *data_uffd; - - print_test_banner(mode, p); - - vm = ____vm_create(VM_SHAPE(mode)); - setup_memslots(vm, p); - kvm_vm_elf_load(vm, program_invocation_name); - setup_ucall(vm); - vcpu = vm_vcpu_add(vm, 0, guest_code); - - setup_gva_maps(vm); - - reset_event_counts(); - - /* - * Set some code in the data memslot for the guest to execute (only - * applicable to the EXEC tests). This has to be done before - * setup_uffd() as that function copies the memslot data for the uffd - * handler. - */ - load_exec_code_for_test(vm); - setup_uffd(vm, p, &pt_uffd, &data_uffd); - setup_abort_handlers(vm, vcpu, test); - setup_default_handlers(test); - vcpu_args_set(vcpu, 1, test); - - vcpu_run_loop(vm, vcpu, test); - - kvm_vm_free(vm); - free_uffd(test, pt_uffd, data_uffd); - - /* - * Make sure we check the events after the uffd threads have exited, - * which means they updated their respective event counters. - */ - if (!test->skip) - check_event_counts(test); -} - -static void help(char *name) -{ - puts(""); - printf("usage: %s [-h] [-s mem-type]\n", name); - puts(""); - guest_modes_help(); - backing_src_help("-s"); - puts(""); -} - -#define SNAME(s) #s -#define SCAT2(a, b) SNAME(a ## _ ## b) -#define SCAT3(a, b, c) SCAT2(a, SCAT2(b, c)) -#define SCAT4(a, b, c, d) SCAT2(a, SCAT3(b, c, d)) - -#define _CHECK(_test) _CHECK_##_test -#define _PREPARE(_test) _PREPARE_##_test -#define _PREPARE_guest_read64 NULL -#define _PREPARE_guest_ld_preidx NULL -#define _PREPARE_guest_write64 NULL -#define _PREPARE_guest_st_preidx NULL -#define _PREPARE_guest_exec NULL -#define _PREPARE_guest_at NULL -#define _PREPARE_guest_dc_zva guest_check_dc_zva -#define _PREPARE_guest_cas guest_check_lse - -/* With or without access flag checks */ -#define _PREPARE_with_af guest_set_ha, guest_clear_pte_af -#define _PREPARE_no_af NULL -#define _CHECK_with_af guest_check_pte_af -#define _CHECK_no_af NULL - -/* Performs an access and checks that no faults were triggered. */ -#define TEST_ACCESS(_access, _with_af, _mark_cmd) \ -{ \ - .name = SCAT3(_access, _with_af, #_mark_cmd), \ - .guest_prepare = { _PREPARE(_with_af), \ - _PREPARE(_access) }, \ - .mem_mark_cmd = _mark_cmd, \ - .guest_test = _access, \ - .guest_test_check = { _CHECK(_with_af) }, \ - .expected_events = { 0 }, \ -} - -#define TEST_UFFD(_access, _with_af, _mark_cmd, \ - _uffd_data_handler, _uffd_pt_handler, _uffd_faults) \ -{ \ - .name = SCAT4(uffd, _access, _with_af, #_mark_cmd), \ - .guest_prepare = { _PREPARE(_with_af), \ - _PREPARE(_access) }, \ - .guest_test = _access, \ - .mem_mark_cmd = _mark_cmd, \ - .guest_test_check = { _CHECK(_with_af) }, \ - .uffd_data_handler = _uffd_data_handler, \ - .uffd_pt_handler = _uffd_pt_handler, \ - .expected_events = { .uffd_faults = _uffd_faults, }, \ -} - -#define TEST_DIRTY_LOG(_access, _with_af, _test_check, _pt_check) \ -{ \ - .name = SCAT3(dirty_log, _access, _with_af), \ - .data_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \ - .pt_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \ - .guest_prepare = { _PREPARE(_with_af), \ - _PREPARE(_access) }, \ - .guest_test = _access, \ - .guest_test_check = { _CHECK(_with_af), _test_check, _pt_check }, \ - .expected_events = { 0 }, \ -} - -#define TEST_UFFD_AND_DIRTY_LOG(_access, _with_af, _uffd_data_handler, \ - _uffd_faults, _test_check, _pt_check) \ -{ \ - .name = SCAT3(uffd_and_dirty_log, _access, _with_af), \ - .data_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \ - .pt_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \ - .guest_prepare = { _PREPARE(_with_af), \ - _PREPARE(_access) }, \ - .guest_test = _access, \ - .mem_mark_cmd = CMD_HOLE_DATA | CMD_HOLE_PT, \ - .guest_test_check = { _CHECK(_with_af), _test_check, _pt_check }, \ - .uffd_data_handler = _uffd_data_handler, \ - .uffd_pt_handler = uffd_pt_handler, \ - .expected_events = { .uffd_faults = _uffd_faults, }, \ -} - -#define TEST_RO_MEMSLOT(_access, _mmio_handler, _mmio_exits) \ -{ \ - .name = SCAT2(ro_memslot, _access), \ - .data_memslot_flags = KVM_MEM_READONLY, \ - .pt_memslot_flags = KVM_MEM_READONLY, \ - .guest_prepare = { _PREPARE(_access) }, \ - .guest_test = _access, \ - .mmio_handler = _mmio_handler, \ - .expected_events = { .mmio_exits = _mmio_exits }, \ -} - -#define TEST_RO_MEMSLOT_NO_SYNDROME(_access) \ -{ \ - .name = SCAT2(ro_memslot_no_syndrome, _access), \ - .data_memslot_flags = KVM_MEM_READONLY, \ - .pt_memslot_flags = KVM_MEM_READONLY, \ - .guest_prepare = { _PREPARE(_access) }, \ - .guest_test = _access, \ - .fail_vcpu_run_handler = fail_vcpu_run_mmio_no_syndrome_handler, \ - .expected_events = { .fail_vcpu_runs = 1 }, \ -} - -#define TEST_RO_MEMSLOT_AND_DIRTY_LOG(_access, _mmio_handler, _mmio_exits, \ - _test_check) \ -{ \ - .name = SCAT2(ro_memslot, _access), \ - .data_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \ - .pt_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \ - .guest_prepare = { _PREPARE(_access) }, \ - .guest_test = _access, \ - .guest_test_check = { _test_check }, \ - .mmio_handler = _mmio_handler, \ - .expected_events = { .mmio_exits = _mmio_exits}, \ -} - -#define TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(_access, _test_check) \ -{ \ - .name = SCAT2(ro_memslot_no_syn_and_dlog, _access), \ - .data_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \ - .pt_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \ - .guest_prepare = { _PREPARE(_access) }, \ - .guest_test = _access, \ - .guest_test_check = { _test_check }, \ - .fail_vcpu_run_handler = fail_vcpu_run_mmio_no_syndrome_handler, \ - .expected_events = { .fail_vcpu_runs = 1 }, \ -} - -#define TEST_RO_MEMSLOT_AND_UFFD(_access, _mmio_handler, _mmio_exits, \ - _uffd_data_handler, _uffd_faults) \ -{ \ - .name = SCAT2(ro_memslot_uffd, _access), \ - .data_memslot_flags = KVM_MEM_READONLY, \ - .pt_memslot_flags = KVM_MEM_READONLY, \ - .mem_mark_cmd = CMD_HOLE_DATA | CMD_HOLE_PT, \ - .guest_prepare = { _PREPARE(_access) }, \ - .guest_test = _access, \ - .uffd_data_handler = _uffd_data_handler, \ - .uffd_pt_handler = uffd_pt_handler, \ - .mmio_handler = _mmio_handler, \ - .expected_events = { .mmio_exits = _mmio_exits, \ - .uffd_faults = _uffd_faults }, \ -} - -#define TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(_access, _uffd_data_handler, \ - _uffd_faults) \ -{ \ - .name = SCAT2(ro_memslot_no_syndrome, _access), \ - .data_memslot_flags = KVM_MEM_READONLY, \ - .pt_memslot_flags = KVM_MEM_READONLY, \ - .mem_mark_cmd = CMD_HOLE_DATA | CMD_HOLE_PT, \ - .guest_prepare = { _PREPARE(_access) }, \ - .guest_test = _access, \ - .uffd_data_handler = _uffd_data_handler, \ - .uffd_pt_handler = uffd_pt_handler, \ - .fail_vcpu_run_handler = fail_vcpu_run_mmio_no_syndrome_handler, \ - .expected_events = { .fail_vcpu_runs = 1, \ - .uffd_faults = _uffd_faults }, \ -} - -static struct test_desc tests[] = { - - /* Check that HW is setting the Access Flag (AF) (sanity checks). */ - TEST_ACCESS(guest_read64, with_af, CMD_NONE), - TEST_ACCESS(guest_ld_preidx, with_af, CMD_NONE), - TEST_ACCESS(guest_cas, with_af, CMD_NONE), - TEST_ACCESS(guest_write64, with_af, CMD_NONE), - TEST_ACCESS(guest_st_preidx, with_af, CMD_NONE), - TEST_ACCESS(guest_dc_zva, with_af, CMD_NONE), - TEST_ACCESS(guest_exec, with_af, CMD_NONE), - - /* - * Punch a hole in the data backing store, and then try multiple - * accesses: reads should rturn zeroes, and writes should - * re-populate the page. Moreover, the test also check that no - * exception was generated in the guest. Note that this - * reading/writing behavior is the same as reading/writing a - * punched page (with fallocate(FALLOC_FL_PUNCH_HOLE)) from - * userspace. - */ - TEST_ACCESS(guest_read64, no_af, CMD_HOLE_DATA), - TEST_ACCESS(guest_cas, no_af, CMD_HOLE_DATA), - TEST_ACCESS(guest_ld_preidx, no_af, CMD_HOLE_DATA), - TEST_ACCESS(guest_write64, no_af, CMD_HOLE_DATA), - TEST_ACCESS(guest_st_preidx, no_af, CMD_HOLE_DATA), - TEST_ACCESS(guest_at, no_af, CMD_HOLE_DATA), - TEST_ACCESS(guest_dc_zva, no_af, CMD_HOLE_DATA), - - /* - * Punch holes in the data and PT backing stores and mark them for - * userfaultfd handling. This should result in 2 faults: the access - * on the data backing store, and its respective S1 page table walk - * (S1PTW). - */ - TEST_UFFD(guest_read64, with_af, CMD_HOLE_DATA | CMD_HOLE_PT, - uffd_data_handler, uffd_pt_handler, 2), - TEST_UFFD(guest_read64, no_af, CMD_HOLE_DATA | CMD_HOLE_PT, - uffd_data_handler, uffd_pt_handler, 2), - TEST_UFFD(guest_cas, with_af, CMD_HOLE_DATA | CMD_HOLE_PT, - uffd_data_handler, uffd_pt_handler, 2), - /* - * Can't test guest_at with_af as it's IMPDEF whether the AF is set. - * The S1PTW fault should still be marked as a write. - */ - TEST_UFFD(guest_at, no_af, CMD_HOLE_DATA | CMD_HOLE_PT, - uffd_no_handler, uffd_pt_handler, 1), - TEST_UFFD(guest_ld_preidx, with_af, CMD_HOLE_DATA | CMD_HOLE_PT, - uffd_data_handler, uffd_pt_handler, 2), - TEST_UFFD(guest_write64, with_af, CMD_HOLE_DATA | CMD_HOLE_PT, - uffd_data_handler, uffd_pt_handler, 2), - TEST_UFFD(guest_dc_zva, with_af, CMD_HOLE_DATA | CMD_HOLE_PT, - uffd_data_handler, uffd_pt_handler, 2), - TEST_UFFD(guest_st_preidx, with_af, CMD_HOLE_DATA | CMD_HOLE_PT, - uffd_data_handler, uffd_pt_handler, 2), - TEST_UFFD(guest_exec, with_af, CMD_HOLE_DATA | CMD_HOLE_PT, - uffd_data_handler, uffd_pt_handler, 2), - - /* - * Try accesses when the data and PT memory regions are both - * tracked for dirty logging. - */ - TEST_DIRTY_LOG(guest_read64, with_af, guest_check_no_write_in_dirty_log, - guest_check_s1ptw_wr_in_dirty_log), - TEST_DIRTY_LOG(guest_read64, no_af, guest_check_no_write_in_dirty_log, - guest_check_no_s1ptw_wr_in_dirty_log), - TEST_DIRTY_LOG(guest_ld_preidx, with_af, - guest_check_no_write_in_dirty_log, - guest_check_s1ptw_wr_in_dirty_log), - TEST_DIRTY_LOG(guest_at, no_af, guest_check_no_write_in_dirty_log, - guest_check_no_s1ptw_wr_in_dirty_log), - TEST_DIRTY_LOG(guest_exec, with_af, guest_check_no_write_in_dirty_log, - guest_check_s1ptw_wr_in_dirty_log), - TEST_DIRTY_LOG(guest_write64, with_af, guest_check_write_in_dirty_log, - guest_check_s1ptw_wr_in_dirty_log), - TEST_DIRTY_LOG(guest_cas, with_af, guest_check_write_in_dirty_log, - guest_check_s1ptw_wr_in_dirty_log), - TEST_DIRTY_LOG(guest_dc_zva, with_af, guest_check_write_in_dirty_log, - guest_check_s1ptw_wr_in_dirty_log), - TEST_DIRTY_LOG(guest_st_preidx, with_af, guest_check_write_in_dirty_log, - guest_check_s1ptw_wr_in_dirty_log), - - /* - * Access when the data and PT memory regions are both marked for - * dirty logging and UFFD at the same time. The expected result is - * that writes should mark the dirty log and trigger a userfaultfd - * write fault. Reads/execs should result in a read userfaultfd - * fault, and nothing in the dirty log. Any S1PTW should result in - * a write in the dirty log and a userfaultfd write. - */ - TEST_UFFD_AND_DIRTY_LOG(guest_read64, with_af, - uffd_data_handler, 2, - guest_check_no_write_in_dirty_log, - guest_check_s1ptw_wr_in_dirty_log), - TEST_UFFD_AND_DIRTY_LOG(guest_read64, no_af, - uffd_data_handler, 2, - guest_check_no_write_in_dirty_log, - guest_check_no_s1ptw_wr_in_dirty_log), - TEST_UFFD_AND_DIRTY_LOG(guest_ld_preidx, with_af, - uffd_data_handler, - 2, guest_check_no_write_in_dirty_log, - guest_check_s1ptw_wr_in_dirty_log), - TEST_UFFD_AND_DIRTY_LOG(guest_at, with_af, uffd_no_handler, 1, - guest_check_no_write_in_dirty_log, - guest_check_s1ptw_wr_in_dirty_log), - TEST_UFFD_AND_DIRTY_LOG(guest_exec, with_af, - uffd_data_handler, 2, - guest_check_no_write_in_dirty_log, - guest_check_s1ptw_wr_in_dirty_log), - TEST_UFFD_AND_DIRTY_LOG(guest_write64, with_af, - uffd_data_handler, - 2, guest_check_write_in_dirty_log, - guest_check_s1ptw_wr_in_dirty_log), - TEST_UFFD_AND_DIRTY_LOG(guest_cas, with_af, - uffd_data_handler, 2, - guest_check_write_in_dirty_log, - guest_check_s1ptw_wr_in_dirty_log), - TEST_UFFD_AND_DIRTY_LOG(guest_dc_zva, with_af, - uffd_data_handler, - 2, guest_check_write_in_dirty_log, - guest_check_s1ptw_wr_in_dirty_log), - TEST_UFFD_AND_DIRTY_LOG(guest_st_preidx, with_af, - uffd_data_handler, 2, - guest_check_write_in_dirty_log, - guest_check_s1ptw_wr_in_dirty_log), - /* - * Access when both the PT and data regions are marked read-only - * (with KVM_MEM_READONLY). Writes with a syndrome result in an - * MMIO exit, writes with no syndrome (e.g., CAS) result in a - * failed vcpu run, and reads/execs with and without syndroms do - * not fault. - */ - TEST_RO_MEMSLOT(guest_read64, 0, 0), - TEST_RO_MEMSLOT(guest_ld_preidx, 0, 0), - TEST_RO_MEMSLOT(guest_at, 0, 0), - TEST_RO_MEMSLOT(guest_exec, 0, 0), - TEST_RO_MEMSLOT(guest_write64, mmio_on_test_gpa_handler, 1), - TEST_RO_MEMSLOT_NO_SYNDROME(guest_dc_zva), - TEST_RO_MEMSLOT_NO_SYNDROME(guest_cas), - TEST_RO_MEMSLOT_NO_SYNDROME(guest_st_preidx), - - /* - * The PT and data regions are both read-only and marked - * for dirty logging at the same time. The expected result is that - * for writes there should be no write in the dirty log. The - * readonly handling is the same as if the memslot was not marked - * for dirty logging: writes with a syndrome result in an MMIO - * exit, and writes with no syndrome result in a failed vcpu run. - */ - TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_read64, 0, 0, - guest_check_no_write_in_dirty_log), - TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_ld_preidx, 0, 0, - guest_check_no_write_in_dirty_log), - TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_at, 0, 0, - guest_check_no_write_in_dirty_log), - TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_exec, 0, 0, - guest_check_no_write_in_dirty_log), - TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_write64, mmio_on_test_gpa_handler, - 1, guest_check_no_write_in_dirty_log), - TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(guest_dc_zva, - guest_check_no_write_in_dirty_log), - TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(guest_cas, - guest_check_no_write_in_dirty_log), - TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(guest_st_preidx, - guest_check_no_write_in_dirty_log), - - /* - * The PT and data regions are both read-only and punched with - * holes tracked with userfaultfd. The expected result is the - * union of both userfaultfd and read-only behaviors. For example, - * write accesses result in a userfaultfd write fault and an MMIO - * exit. Writes with no syndrome result in a failed vcpu run and - * no userfaultfd write fault. Reads result in userfaultfd getting - * triggered. - */ - TEST_RO_MEMSLOT_AND_UFFD(guest_read64, 0, 0, uffd_data_handler, 2), - TEST_RO_MEMSLOT_AND_UFFD(guest_ld_preidx, 0, 0, uffd_data_handler, 2), - TEST_RO_MEMSLOT_AND_UFFD(guest_at, 0, 0, uffd_no_handler, 1), - TEST_RO_MEMSLOT_AND_UFFD(guest_exec, 0, 0, uffd_data_handler, 2), - TEST_RO_MEMSLOT_AND_UFFD(guest_write64, mmio_on_test_gpa_handler, 1, - uffd_data_handler, 2), - TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_cas, uffd_data_handler, 2), - TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_dc_zva, uffd_no_handler, 1), - TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_st_preidx, uffd_no_handler, 1), - - { 0 } -}; - -static void for_each_test_and_guest_mode(enum vm_mem_backing_src_type src_type) -{ - struct test_desc *t; - - for (t = &tests[0]; t->name; t++) { - if (t->skip) - continue; - - struct test_params p = { - .src_type = src_type, - .test_desc = t, - }; - - for_each_guest_mode(run_test, &p); - } -} - -int main(int argc, char *argv[]) -{ - enum vm_mem_backing_src_type src_type; - int opt; - - src_type = DEFAULT_VM_MEM_SRC; - - while ((opt = getopt(argc, argv, "hm:s:")) != -1) { - switch (opt) { - case 'm': - guest_modes_cmdline(optarg); - break; - case 's': - src_type = parse_backing_src_type(optarg); - break; - case 'h': - default: - help(argv[0]); - exit(0); - } - } - - for_each_test_and_guest_mode(src_type); - return 0; -} diff --git a/tools/testing/selftests/kvm/aarch64/psci_test.c b/tools/testing/selftests/kvm/aarch64/psci_test.c deleted file mode 100644 index ab491ee9e5f7..000000000000 --- a/tools/testing/selftests/kvm/aarch64/psci_test.c +++ /dev/null @@ -1,290 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * psci_test - Tests relating to KVM's PSCI implementation. - * - * Copyright (c) 2021 Google LLC. - * - * This test includes: - * - A regression test for a race between KVM servicing the PSCI CPU_ON call - * and userspace reading the targeted vCPU's registers. - * - A test for KVM's handling of PSCI SYSTEM_SUSPEND and the associated - * KVM_SYSTEM_EVENT_SUSPEND UAPI. - */ - -#include -#include -#include - -#include "kvm_util.h" -#include "processor.h" -#include "test_util.h" - -#define CPU_ON_ENTRY_ADDR 0xfeedf00dul -#define CPU_ON_CONTEXT_ID 0xdeadc0deul - -static uint64_t psci_cpu_on(uint64_t target_cpu, uint64_t entry_addr, - uint64_t context_id) -{ - struct arm_smccc_res res; - - smccc_hvc(PSCI_0_2_FN64_CPU_ON, target_cpu, entry_addr, context_id, - 0, 0, 0, 0, &res); - - return res.a0; -} - -static uint64_t psci_affinity_info(uint64_t target_affinity, - uint64_t lowest_affinity_level) -{ - struct arm_smccc_res res; - - smccc_hvc(PSCI_0_2_FN64_AFFINITY_INFO, target_affinity, lowest_affinity_level, - 0, 0, 0, 0, 0, &res); - - return res.a0; -} - -static uint64_t psci_system_suspend(uint64_t entry_addr, uint64_t context_id) -{ - struct arm_smccc_res res; - - smccc_hvc(PSCI_1_0_FN64_SYSTEM_SUSPEND, entry_addr, context_id, - 0, 0, 0, 0, 0, &res); - - return res.a0; -} - -static uint64_t psci_system_off2(uint64_t type, uint64_t cookie) -{ - struct arm_smccc_res res; - - smccc_hvc(PSCI_1_3_FN64_SYSTEM_OFF2, type, cookie, 0, 0, 0, 0, 0, &res); - - return res.a0; -} - -static uint64_t psci_features(uint32_t func_id) -{ - struct arm_smccc_res res; - - smccc_hvc(PSCI_1_0_FN_PSCI_FEATURES, func_id, 0, 0, 0, 0, 0, 0, &res); - - return res.a0; -} - -static void vcpu_power_off(struct kvm_vcpu *vcpu) -{ - struct kvm_mp_state mp_state = { - .mp_state = KVM_MP_STATE_STOPPED, - }; - - vcpu_mp_state_set(vcpu, &mp_state); -} - -static struct kvm_vm *setup_vm(void *guest_code, struct kvm_vcpu **source, - struct kvm_vcpu **target) -{ - struct kvm_vcpu_init init; - struct kvm_vm *vm; - - vm = vm_create(2); - - vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init); - init.features[0] |= (1 << KVM_ARM_VCPU_PSCI_0_2); - - *source = aarch64_vcpu_add(vm, 0, &init, guest_code); - *target = aarch64_vcpu_add(vm, 1, &init, guest_code); - - return vm; -} - -static void enter_guest(struct kvm_vcpu *vcpu) -{ - struct ucall uc; - - vcpu_run(vcpu); - if (get_ucall(vcpu, &uc) == UCALL_ABORT) - REPORT_GUEST_ASSERT(uc); -} - -static void assert_vcpu_reset(struct kvm_vcpu *vcpu) -{ - uint64_t obs_pc, obs_x0; - - obs_pc = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc)); - obs_x0 = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.regs[0])); - - TEST_ASSERT(obs_pc == CPU_ON_ENTRY_ADDR, - "unexpected target cpu pc: %lx (expected: %lx)", - obs_pc, CPU_ON_ENTRY_ADDR); - TEST_ASSERT(obs_x0 == CPU_ON_CONTEXT_ID, - "unexpected target context id: %lx (expected: %lx)", - obs_x0, CPU_ON_CONTEXT_ID); -} - -static void guest_test_cpu_on(uint64_t target_cpu) -{ - uint64_t target_state; - - GUEST_ASSERT(!psci_cpu_on(target_cpu, CPU_ON_ENTRY_ADDR, CPU_ON_CONTEXT_ID)); - - do { - target_state = psci_affinity_info(target_cpu, 0); - - GUEST_ASSERT((target_state == PSCI_0_2_AFFINITY_LEVEL_ON) || - (target_state == PSCI_0_2_AFFINITY_LEVEL_OFF)); - } while (target_state != PSCI_0_2_AFFINITY_LEVEL_ON); - - GUEST_DONE(); -} - -static void host_test_cpu_on(void) -{ - struct kvm_vcpu *source, *target; - uint64_t target_mpidr; - struct kvm_vm *vm; - struct ucall uc; - - vm = setup_vm(guest_test_cpu_on, &source, &target); - - /* - * make sure the target is already off when executing the test. - */ - vcpu_power_off(target); - - target_mpidr = vcpu_get_reg(target, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1)); - vcpu_args_set(source, 1, target_mpidr & MPIDR_HWID_BITMASK); - enter_guest(source); - - if (get_ucall(source, &uc) != UCALL_DONE) - TEST_FAIL("Unhandled ucall: %lu", uc.cmd); - - assert_vcpu_reset(target); - kvm_vm_free(vm); -} - -static void guest_test_system_suspend(void) -{ - uint64_t ret; - - /* assert that SYSTEM_SUSPEND is discoverable */ - GUEST_ASSERT(!psci_features(PSCI_1_0_FN_SYSTEM_SUSPEND)); - GUEST_ASSERT(!psci_features(PSCI_1_0_FN64_SYSTEM_SUSPEND)); - - ret = psci_system_suspend(CPU_ON_ENTRY_ADDR, CPU_ON_CONTEXT_ID); - GUEST_SYNC(ret); -} - -static void host_test_system_suspend(void) -{ - struct kvm_vcpu *source, *target; - struct kvm_run *run; - struct kvm_vm *vm; - - vm = setup_vm(guest_test_system_suspend, &source, &target); - vm_enable_cap(vm, KVM_CAP_ARM_SYSTEM_SUSPEND, 0); - - vcpu_power_off(target); - run = source->run; - - enter_guest(source); - - TEST_ASSERT_KVM_EXIT_REASON(source, KVM_EXIT_SYSTEM_EVENT); - TEST_ASSERT(run->system_event.type == KVM_SYSTEM_EVENT_SUSPEND, - "Unhandled system event: %u (expected: %u)", - run->system_event.type, KVM_SYSTEM_EVENT_SUSPEND); - - kvm_vm_free(vm); -} - -static void guest_test_system_off2(void) -{ - uint64_t ret; - - /* assert that SYSTEM_OFF2 is discoverable */ - GUEST_ASSERT(psci_features(PSCI_1_3_FN_SYSTEM_OFF2) & - PSCI_1_3_OFF_TYPE_HIBERNATE_OFF); - GUEST_ASSERT(psci_features(PSCI_1_3_FN64_SYSTEM_OFF2) & - PSCI_1_3_OFF_TYPE_HIBERNATE_OFF); - - /* With non-zero 'cookie' field, it should fail */ - ret = psci_system_off2(PSCI_1_3_OFF_TYPE_HIBERNATE_OFF, 1); - GUEST_ASSERT(ret == PSCI_RET_INVALID_PARAMS); - - /* - * This would normally never return, so KVM sets the return value - * to PSCI_RET_INTERNAL_FAILURE. The test case *does* return, so - * that it can test both values for HIBERNATE_OFF. - */ - ret = psci_system_off2(PSCI_1_3_OFF_TYPE_HIBERNATE_OFF, 0); - GUEST_ASSERT(ret == PSCI_RET_INTERNAL_FAILURE); - - /* - * Revision F.b of the PSCI v1.3 specification documents zero as an - * alias for HIBERNATE_OFF, since that's the value used in earlier - * revisions of the spec and some implementations in the field. - */ - ret = psci_system_off2(0, 1); - GUEST_ASSERT(ret == PSCI_RET_INVALID_PARAMS); - - ret = psci_system_off2(0, 0); - GUEST_ASSERT(ret == PSCI_RET_INTERNAL_FAILURE); - - GUEST_DONE(); -} - -static void host_test_system_off2(void) -{ - struct kvm_vcpu *source, *target; - struct kvm_mp_state mps; - uint64_t psci_version = 0; - int nr_shutdowns = 0; - struct kvm_run *run; - struct ucall uc; - - setup_vm(guest_test_system_off2, &source, &target); - - psci_version = vcpu_get_reg(target, KVM_REG_ARM_PSCI_VERSION); - - TEST_ASSERT(psci_version >= PSCI_VERSION(1, 3), - "Unexpected PSCI version %lu.%lu", - PSCI_VERSION_MAJOR(psci_version), - PSCI_VERSION_MINOR(psci_version)); - - vcpu_power_off(target); - run = source->run; - - enter_guest(source); - while (run->exit_reason == KVM_EXIT_SYSTEM_EVENT) { - TEST_ASSERT(run->system_event.type == KVM_SYSTEM_EVENT_SHUTDOWN, - "Unhandled system event: %u (expected: %u)", - run->system_event.type, KVM_SYSTEM_EVENT_SHUTDOWN); - TEST_ASSERT(run->system_event.ndata >= 1, - "Unexpected amount of system event data: %u (expected, >= 1)", - run->system_event.ndata); - TEST_ASSERT(run->system_event.data[0] & KVM_SYSTEM_EVENT_SHUTDOWN_FLAG_PSCI_OFF2, - "PSCI_OFF2 flag not set. Flags %llu (expected %llu)", - run->system_event.data[0], KVM_SYSTEM_EVENT_SHUTDOWN_FLAG_PSCI_OFF2); - - nr_shutdowns++; - - /* Restart the vCPU */ - mps.mp_state = KVM_MP_STATE_RUNNABLE; - vcpu_mp_state_set(source, &mps); - - enter_guest(source); - } - - TEST_ASSERT(get_ucall(source, &uc) == UCALL_DONE, "Guest did not exit cleanly"); - TEST_ASSERT(nr_shutdowns == 2, "Two shutdown events were expected, but saw %d", nr_shutdowns); -} - -int main(void) -{ - TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_SYSTEM_SUSPEND)); - - host_test_cpu_on(); - host_test_system_suspend(); - host_test_system_off2(); - return 0; -} diff --git a/tools/testing/selftests/kvm/aarch64/set_id_regs.c b/tools/testing/selftests/kvm/aarch64/set_id_regs.c deleted file mode 100644 index bc6cf50e5135..000000000000 --- a/tools/testing/selftests/kvm/aarch64/set_id_regs.c +++ /dev/null @@ -1,695 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * set_id_regs - Test for setting ID register from usersapce. - * - * Copyright (c) 2023 Google LLC. - * - * - * Test that KVM supports setting ID registers from userspace and handles the - * feature set correctly. - */ - -#include -#include "kvm_util.h" -#include "processor.h" -#include "test_util.h" -#include - -enum ftr_type { - FTR_EXACT, /* Use a predefined safe value */ - FTR_LOWER_SAFE, /* Smaller value is safe */ - FTR_HIGHER_SAFE, /* Bigger value is safe */ - FTR_HIGHER_OR_ZERO_SAFE, /* Bigger value is safe, but 0 is biggest */ - FTR_END, /* Mark the last ftr bits */ -}; - -#define FTR_SIGNED true /* Value should be treated as signed */ -#define FTR_UNSIGNED false /* Value should be treated as unsigned */ - -struct reg_ftr_bits { - char *name; - bool sign; - enum ftr_type type; - uint8_t shift; - uint64_t mask; - /* - * For FTR_EXACT, safe_val is used as the exact safe value. - * For FTR_LOWER_SAFE, safe_val is used as the minimal safe value. - */ - int64_t safe_val; -}; - -struct test_feature_reg { - uint32_t reg; - const struct reg_ftr_bits *ftr_bits; -}; - -#define __REG_FTR_BITS(NAME, SIGNED, TYPE, SHIFT, MASK, SAFE_VAL) \ - { \ - .name = #NAME, \ - .sign = SIGNED, \ - .type = TYPE, \ - .shift = SHIFT, \ - .mask = MASK, \ - .safe_val = SAFE_VAL, \ - } - -#define REG_FTR_BITS(type, reg, field, safe_val) \ - __REG_FTR_BITS(reg##_##field, FTR_UNSIGNED, type, reg##_##field##_SHIFT, \ - reg##_##field##_MASK, safe_val) - -#define S_REG_FTR_BITS(type, reg, field, safe_val) \ - __REG_FTR_BITS(reg##_##field, FTR_SIGNED, type, reg##_##field##_SHIFT, \ - reg##_##field##_MASK, safe_val) - -#define REG_FTR_END \ - { \ - .type = FTR_END, \ - } - -static const struct reg_ftr_bits ftr_id_aa64dfr0_el1[] = { - S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, DoubleLock, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, WRPs, 0), - S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, PMUVer, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, DebugVer, ID_AA64DFR0_EL1_DebugVer_IMP), - REG_FTR_END, -}; - -static const struct reg_ftr_bits ftr_id_dfr0_el1[] = { - S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_DFR0_EL1, PerfMon, ID_DFR0_EL1_PerfMon_PMUv3), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_DFR0_EL1, CopDbg, ID_DFR0_EL1_CopDbg_Armv8), - REG_FTR_END, -}; - -static const struct reg_ftr_bits ftr_id_aa64isar0_el1[] = { - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, RNDR, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, TLB, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, TS, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, FHM, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, DP, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SM4, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SM3, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SHA3, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, RDM, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, TME, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, ATOMIC, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, CRC32, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SHA2, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SHA1, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, AES, 0), - REG_FTR_END, -}; - -static const struct reg_ftr_bits ftr_id_aa64isar1_el1[] = { - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, LS64, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, XS, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, I8MM, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, DGH, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, BF16, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, SPECRES, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, SB, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, FRINTTS, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, LRCPC, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, FCMA, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, JSCVT, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, DPB, 0), - REG_FTR_END, -}; - -static const struct reg_ftr_bits ftr_id_aa64isar2_el1[] = { - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR2_EL1, BC, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR2_EL1, RPRES, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR2_EL1, WFxT, 0), - REG_FTR_END, -}; - -static const struct reg_ftr_bits ftr_id_aa64pfr0_el1[] = { - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, CSV3, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, CSV2, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, DIT, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, SEL2, 0), - REG_FTR_BITS(FTR_EXACT, ID_AA64PFR0_EL1, GIC, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL3, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL2, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL1, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL0, 0), - REG_FTR_END, -}; - -static const struct reg_ftr_bits ftr_id_aa64pfr1_el1[] = { - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, CSV2_frac, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, SSBS, ID_AA64PFR1_EL1_SSBS_NI), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, BT, 0), - REG_FTR_END, -}; - -static const struct reg_ftr_bits ftr_id_aa64mmfr0_el1[] = { - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, ECV, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, EXS, 0), - S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN4, 0), - S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN64, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN16, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, BIGENDEL0, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, SNSMEM, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, BIGEND, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, ASIDBITS, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, PARANGE, 0), - REG_FTR_END, -}; - -static const struct reg_ftr_bits ftr_id_aa64mmfr1_el1[] = { - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, TIDCP1, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, AFP, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, ETS, 0), - REG_FTR_BITS(FTR_HIGHER_SAFE, ID_AA64MMFR1_EL1, SpecSEI, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, PAN, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, LO, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, HPDS, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, HAFDBS, 0), - REG_FTR_END, -}; - -static const struct reg_ftr_bits ftr_id_aa64mmfr2_el1[] = { - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, E0PD, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, BBM, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, TTL, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, AT, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, ST, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, VARange, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, IESB, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, LSM, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, UAO, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, CnP, 0), - REG_FTR_END, -}; - -static const struct reg_ftr_bits ftr_id_aa64zfr0_el1[] = { - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, F64MM, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, F32MM, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, I8MM, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, SM4, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, SHA3, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, BF16, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, BitPerm, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, AES, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, SVEver, 0), - REG_FTR_END, -}; - -#define TEST_REG(id, table) \ - { \ - .reg = id, \ - .ftr_bits = &((table)[0]), \ - } - -static struct test_feature_reg test_regs[] = { - TEST_REG(SYS_ID_AA64DFR0_EL1, ftr_id_aa64dfr0_el1), - TEST_REG(SYS_ID_DFR0_EL1, ftr_id_dfr0_el1), - TEST_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0_el1), - TEST_REG(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1_el1), - TEST_REG(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2_el1), - TEST_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0_el1), - TEST_REG(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1_el1), - TEST_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0_el1), - TEST_REG(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1_el1), - TEST_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2_el1), - TEST_REG(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0_el1), -}; - -#define GUEST_REG_SYNC(id) GUEST_SYNC_ARGS(0, id, read_sysreg_s(id), 0, 0); - -static void guest_code(void) -{ - GUEST_REG_SYNC(SYS_ID_AA64DFR0_EL1); - GUEST_REG_SYNC(SYS_ID_DFR0_EL1); - GUEST_REG_SYNC(SYS_ID_AA64ISAR0_EL1); - GUEST_REG_SYNC(SYS_ID_AA64ISAR1_EL1); - GUEST_REG_SYNC(SYS_ID_AA64ISAR2_EL1); - GUEST_REG_SYNC(SYS_ID_AA64PFR0_EL1); - GUEST_REG_SYNC(SYS_ID_AA64MMFR0_EL1); - GUEST_REG_SYNC(SYS_ID_AA64MMFR1_EL1); - GUEST_REG_SYNC(SYS_ID_AA64MMFR2_EL1); - GUEST_REG_SYNC(SYS_ID_AA64ZFR0_EL1); - GUEST_REG_SYNC(SYS_CTR_EL0); - - GUEST_DONE(); -} - -/* Return a safe value to a given ftr_bits an ftr value */ -uint64_t get_safe_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr) -{ - uint64_t ftr_max = GENMASK_ULL(ARM64_FEATURE_FIELD_BITS - 1, 0); - - if (ftr_bits->sign == FTR_UNSIGNED) { - switch (ftr_bits->type) { - case FTR_EXACT: - ftr = ftr_bits->safe_val; - break; - case FTR_LOWER_SAFE: - if (ftr > ftr_bits->safe_val) - ftr--; - break; - case FTR_HIGHER_SAFE: - if (ftr < ftr_max) - ftr++; - break; - case FTR_HIGHER_OR_ZERO_SAFE: - if (ftr == ftr_max) - ftr = 0; - else if (ftr != 0) - ftr++; - break; - default: - break; - } - } else if (ftr != ftr_max) { - switch (ftr_bits->type) { - case FTR_EXACT: - ftr = ftr_bits->safe_val; - break; - case FTR_LOWER_SAFE: - if (ftr > ftr_bits->safe_val) - ftr--; - break; - case FTR_HIGHER_SAFE: - if (ftr < ftr_max - 1) - ftr++; - break; - case FTR_HIGHER_OR_ZERO_SAFE: - if (ftr != 0 && ftr != ftr_max - 1) - ftr++; - break; - default: - break; - } - } - - return ftr; -} - -/* Return an invalid value to a given ftr_bits an ftr value */ -uint64_t get_invalid_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr) -{ - uint64_t ftr_max = GENMASK_ULL(ARM64_FEATURE_FIELD_BITS - 1, 0); - - if (ftr_bits->sign == FTR_UNSIGNED) { - switch (ftr_bits->type) { - case FTR_EXACT: - ftr = max((uint64_t)ftr_bits->safe_val + 1, ftr + 1); - break; - case FTR_LOWER_SAFE: - ftr++; - break; - case FTR_HIGHER_SAFE: - ftr--; - break; - case FTR_HIGHER_OR_ZERO_SAFE: - if (ftr == 0) - ftr = ftr_max; - else - ftr--; - break; - default: - break; - } - } else if (ftr != ftr_max) { - switch (ftr_bits->type) { - case FTR_EXACT: - ftr = max((uint64_t)ftr_bits->safe_val + 1, ftr + 1); - break; - case FTR_LOWER_SAFE: - ftr++; - break; - case FTR_HIGHER_SAFE: - ftr--; - break; - case FTR_HIGHER_OR_ZERO_SAFE: - if (ftr == 0) - ftr = ftr_max - 1; - else - ftr--; - break; - default: - break; - } - } else { - ftr = 0; - } - - return ftr; -} - -static uint64_t test_reg_set_success(struct kvm_vcpu *vcpu, uint64_t reg, - const struct reg_ftr_bits *ftr_bits) -{ - uint8_t shift = ftr_bits->shift; - uint64_t mask = ftr_bits->mask; - uint64_t val, new_val, ftr; - - val = vcpu_get_reg(vcpu, reg); - ftr = (val & mask) >> shift; - - ftr = get_safe_value(ftr_bits, ftr); - - ftr <<= shift; - val &= ~mask; - val |= ftr; - - vcpu_set_reg(vcpu, reg, val); - new_val = vcpu_get_reg(vcpu, reg); - TEST_ASSERT_EQ(new_val, val); - - return new_val; -} - -static void test_reg_set_fail(struct kvm_vcpu *vcpu, uint64_t reg, - const struct reg_ftr_bits *ftr_bits) -{ - uint8_t shift = ftr_bits->shift; - uint64_t mask = ftr_bits->mask; - uint64_t val, old_val, ftr; - int r; - - val = vcpu_get_reg(vcpu, reg); - ftr = (val & mask) >> shift; - - ftr = get_invalid_value(ftr_bits, ftr); - - old_val = val; - ftr <<= shift; - val &= ~mask; - val |= ftr; - - r = __vcpu_set_reg(vcpu, reg, val); - TEST_ASSERT(r < 0 && errno == EINVAL, - "Unexpected KVM_SET_ONE_REG error: r=%d, errno=%d", r, errno); - - val = vcpu_get_reg(vcpu, reg); - TEST_ASSERT_EQ(val, old_val); -} - -static uint64_t test_reg_vals[KVM_ARM_FEATURE_ID_RANGE_SIZE]; - -#define encoding_to_range_idx(encoding) \ - KVM_ARM_FEATURE_ID_RANGE_IDX(sys_reg_Op0(encoding), sys_reg_Op1(encoding), \ - sys_reg_CRn(encoding), sys_reg_CRm(encoding), \ - sys_reg_Op2(encoding)) - - -static void test_vm_ftr_id_regs(struct kvm_vcpu *vcpu, bool aarch64_only) -{ - uint64_t masks[KVM_ARM_FEATURE_ID_RANGE_SIZE]; - struct reg_mask_range range = { - .addr = (__u64)masks, - }; - int ret; - - /* KVM should return error when reserved field is not zero */ - range.reserved[0] = 1; - ret = __vm_ioctl(vcpu->vm, KVM_ARM_GET_REG_WRITABLE_MASKS, &range); - TEST_ASSERT(ret, "KVM doesn't check invalid parameters."); - - /* Get writable masks for feature ID registers */ - memset(range.reserved, 0, sizeof(range.reserved)); - vm_ioctl(vcpu->vm, KVM_ARM_GET_REG_WRITABLE_MASKS, &range); - - for (int i = 0; i < ARRAY_SIZE(test_regs); i++) { - const struct reg_ftr_bits *ftr_bits = test_regs[i].ftr_bits; - uint32_t reg_id = test_regs[i].reg; - uint64_t reg = KVM_ARM64_SYS_REG(reg_id); - int idx; - - /* Get the index to masks array for the idreg */ - idx = encoding_to_range_idx(reg_id); - - for (int j = 0; ftr_bits[j].type != FTR_END; j++) { - /* Skip aarch32 reg on aarch64 only system, since they are RAZ/WI. */ - if (aarch64_only && sys_reg_CRm(reg_id) < 4) { - ksft_test_result_skip("%s on AARCH64 only system\n", - ftr_bits[j].name); - continue; - } - - /* Make sure the feature field is writable */ - TEST_ASSERT_EQ(masks[idx] & ftr_bits[j].mask, ftr_bits[j].mask); - - test_reg_set_fail(vcpu, reg, &ftr_bits[j]); - - test_reg_vals[idx] = test_reg_set_success(vcpu, reg, - &ftr_bits[j]); - - ksft_test_result_pass("%s\n", ftr_bits[j].name); - } - } -} - -#define MPAM_IDREG_TEST 6 -static void test_user_set_mpam_reg(struct kvm_vcpu *vcpu) -{ - uint64_t masks[KVM_ARM_FEATURE_ID_RANGE_SIZE]; - struct reg_mask_range range = { - .addr = (__u64)masks, - }; - uint64_t val; - int idx, err; - - /* - * If ID_AA64PFR0.MPAM is _not_ officially modifiable and is zero, - * check that if it can be set to 1, (i.e. it is supported by the - * hardware), that it can't be set to other values. - */ - - /* Get writable masks for feature ID registers */ - memset(range.reserved, 0, sizeof(range.reserved)); - vm_ioctl(vcpu->vm, KVM_ARM_GET_REG_WRITABLE_MASKS, &range); - - /* Writeable? Nothing to test! */ - idx = encoding_to_range_idx(SYS_ID_AA64PFR0_EL1); - if ((masks[idx] & ID_AA64PFR0_EL1_MPAM_MASK) == ID_AA64PFR0_EL1_MPAM_MASK) { - ksft_test_result_skip("ID_AA64PFR0_EL1.MPAM is officially writable, nothing to test\n"); - return; - } - - /* Get the id register value */ - val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1)); - - /* Try to set MPAM=0. This should always be possible. */ - val &= ~ID_AA64PFR0_EL1_MPAM_MASK; - val |= FIELD_PREP(ID_AA64PFR0_EL1_MPAM_MASK, 0); - err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), val); - if (err) - ksft_test_result_fail("ID_AA64PFR0_EL1.MPAM=0 was not accepted\n"); - else - ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM=0 worked\n"); - - /* Try to set MPAM=1 */ - val &= ~ID_AA64PFR0_EL1_MPAM_MASK; - val |= FIELD_PREP(ID_AA64PFR0_EL1_MPAM_MASK, 1); - err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), val); - if (err) - ksft_test_result_skip("ID_AA64PFR0_EL1.MPAM is not writable, nothing to test\n"); - else - ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM=1 was writable\n"); - - /* Try to set MPAM=2 */ - val &= ~ID_AA64PFR0_EL1_MPAM_MASK; - val |= FIELD_PREP(ID_AA64PFR0_EL1_MPAM_MASK, 2); - err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), val); - if (err) - ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM not arbitrarily modifiable\n"); - else - ksft_test_result_fail("ID_AA64PFR0_EL1.MPAM value should not be ignored\n"); - - /* And again for ID_AA64PFR1_EL1.MPAM_frac */ - idx = encoding_to_range_idx(SYS_ID_AA64PFR1_EL1); - if ((masks[idx] & ID_AA64PFR1_EL1_MPAM_frac_MASK) == ID_AA64PFR1_EL1_MPAM_frac_MASK) { - ksft_test_result_skip("ID_AA64PFR1_EL1.MPAM_frac is officially writable, nothing to test\n"); - return; - } - - /* Get the id register value */ - val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1)); - - /* Try to set MPAM_frac=0. This should always be possible. */ - val &= ~ID_AA64PFR1_EL1_MPAM_frac_MASK; - val |= FIELD_PREP(ID_AA64PFR1_EL1_MPAM_frac_MASK, 0); - err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1), val); - if (err) - ksft_test_result_fail("ID_AA64PFR0_EL1.MPAM_frac=0 was not accepted\n"); - else - ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM_frac=0 worked\n"); - - /* Try to set MPAM_frac=1 */ - val &= ~ID_AA64PFR1_EL1_MPAM_frac_MASK; - val |= FIELD_PREP(ID_AA64PFR1_EL1_MPAM_frac_MASK, 1); - err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1), val); - if (err) - ksft_test_result_skip("ID_AA64PFR1_EL1.MPAM_frac is not writable, nothing to test\n"); - else - ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM_frac=1 was writable\n"); - - /* Try to set MPAM_frac=2 */ - val &= ~ID_AA64PFR1_EL1_MPAM_frac_MASK; - val |= FIELD_PREP(ID_AA64PFR1_EL1_MPAM_frac_MASK, 2); - err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1), val); - if (err) - ksft_test_result_pass("ID_AA64PFR1_EL1.MPAM_frac not arbitrarily modifiable\n"); - else - ksft_test_result_fail("ID_AA64PFR1_EL1.MPAM_frac value should not be ignored\n"); -} - -static void test_guest_reg_read(struct kvm_vcpu *vcpu) -{ - bool done = false; - struct ucall uc; - - while (!done) { - vcpu_run(vcpu); - - switch (get_ucall(vcpu, &uc)) { - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - break; - case UCALL_SYNC: - /* Make sure the written values are seen by guest */ - TEST_ASSERT_EQ(test_reg_vals[encoding_to_range_idx(uc.args[2])], - uc.args[3]); - break; - case UCALL_DONE: - done = true; - break; - default: - TEST_FAIL("Unexpected ucall: %lu", uc.cmd); - } - } -} - -/* Politely lifted from arch/arm64/include/asm/cache.h */ -/* Ctypen, bits[3(n - 1) + 2 : 3(n - 1)], for n = 1 to 7 */ -#define CLIDR_CTYPE_SHIFT(level) (3 * (level - 1)) -#define CLIDR_CTYPE_MASK(level) (7 << CLIDR_CTYPE_SHIFT(level)) -#define CLIDR_CTYPE(clidr, level) \ - (((clidr) & CLIDR_CTYPE_MASK(level)) >> CLIDR_CTYPE_SHIFT(level)) - -static void test_clidr(struct kvm_vcpu *vcpu) -{ - uint64_t clidr; - int level; - - clidr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CLIDR_EL1)); - - /* find the first empty level in the cache hierarchy */ - for (level = 1; level < 7; level++) { - if (!CLIDR_CTYPE(clidr, level)) - break; - } - - /* - * If you have a mind-boggling 7 levels of cache, congratulations, you - * get to fix this. - */ - TEST_ASSERT(level <= 7, "can't find an empty level in cache hierarchy"); - - /* stick in a unified cache level */ - clidr |= BIT(2) << CLIDR_CTYPE_SHIFT(level); - - vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CLIDR_EL1), clidr); - test_reg_vals[encoding_to_range_idx(SYS_CLIDR_EL1)] = clidr; -} - -static void test_ctr(struct kvm_vcpu *vcpu) -{ - u64 ctr; - - ctr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CTR_EL0)); - ctr &= ~CTR_EL0_DIC_MASK; - if (ctr & CTR_EL0_IminLine_MASK) - ctr--; - - vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CTR_EL0), ctr); - test_reg_vals[encoding_to_range_idx(SYS_CTR_EL0)] = ctr; -} - -static void test_vcpu_ftr_id_regs(struct kvm_vcpu *vcpu) -{ - u64 val; - - test_clidr(vcpu); - test_ctr(vcpu); - - val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1)); - val++; - vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1), val); - - test_reg_vals[encoding_to_range_idx(SYS_MPIDR_EL1)] = val; - ksft_test_result_pass("%s\n", __func__); -} - -static void test_assert_id_reg_unchanged(struct kvm_vcpu *vcpu, uint32_t encoding) -{ - size_t idx = encoding_to_range_idx(encoding); - uint64_t observed; - - observed = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(encoding)); - TEST_ASSERT_EQ(test_reg_vals[idx], observed); -} - -static void test_reset_preserves_id_regs(struct kvm_vcpu *vcpu) -{ - /* - * Calls KVM_ARM_VCPU_INIT behind the scenes, which will do an - * architectural reset of the vCPU. - */ - aarch64_vcpu_setup(vcpu, NULL); - - for (int i = 0; i < ARRAY_SIZE(test_regs); i++) - test_assert_id_reg_unchanged(vcpu, test_regs[i].reg); - - test_assert_id_reg_unchanged(vcpu, SYS_MPIDR_EL1); - test_assert_id_reg_unchanged(vcpu, SYS_CLIDR_EL1); - test_assert_id_reg_unchanged(vcpu, SYS_CTR_EL0); - - ksft_test_result_pass("%s\n", __func__); -} - -int main(void) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - bool aarch64_only; - uint64_t val, el0; - int test_cnt; - - TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES)); - - vm = vm_create_with_one_vcpu(&vcpu, guest_code); - - /* Check for AARCH64 only system */ - val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1)); - el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), val); - aarch64_only = (el0 == ID_AA64PFR0_EL1_ELx_64BIT_ONLY); - - ksft_print_header(); - - test_cnt = ARRAY_SIZE(ftr_id_aa64dfr0_el1) + ARRAY_SIZE(ftr_id_dfr0_el1) + - ARRAY_SIZE(ftr_id_aa64isar0_el1) + ARRAY_SIZE(ftr_id_aa64isar1_el1) + - ARRAY_SIZE(ftr_id_aa64isar2_el1) + ARRAY_SIZE(ftr_id_aa64pfr0_el1) + - ARRAY_SIZE(ftr_id_aa64pfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr0_el1) + - ARRAY_SIZE(ftr_id_aa64mmfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr2_el1) + - ARRAY_SIZE(ftr_id_aa64zfr0_el1) - ARRAY_SIZE(test_regs) + 2 + - MPAM_IDREG_TEST; - - ksft_set_plan(test_cnt); - - test_vm_ftr_id_regs(vcpu, aarch64_only); - test_vcpu_ftr_id_regs(vcpu); - test_user_set_mpam_reg(vcpu); - - test_guest_reg_read(vcpu); - - test_reset_preserves_id_regs(vcpu); - - kvm_vm_free(vm); - - ksft_finished(); -} diff --git a/tools/testing/selftests/kvm/aarch64/smccc_filter.c b/tools/testing/selftests/kvm/aarch64/smccc_filter.c deleted file mode 100644 index 2d189f3da228..000000000000 --- a/tools/testing/selftests/kvm/aarch64/smccc_filter.c +++ /dev/null @@ -1,268 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * smccc_filter - Tests for the SMCCC filter UAPI. - * - * Copyright (c) 2023 Google LLC - * - * This test includes: - * - Tests that the UAPI constraints are upheld by KVM. For example, userspace - * is prevented from filtering the architecture range of SMCCC calls. - * - Test that the filter actions (DENIED, FWD_TO_USER) work as intended. - */ - -#include -#include -#include - -#include "processor.h" -#include "test_util.h" - -enum smccc_conduit { - HVC_INSN, - SMC_INSN, -}; - -#define for_each_conduit(conduit) \ - for (conduit = HVC_INSN; conduit <= SMC_INSN; conduit++) - -static void guest_main(uint32_t func_id, enum smccc_conduit conduit) -{ - struct arm_smccc_res res; - - if (conduit == SMC_INSN) - smccc_smc(func_id, 0, 0, 0, 0, 0, 0, 0, &res); - else - smccc_hvc(func_id, 0, 0, 0, 0, 0, 0, 0, &res); - - GUEST_SYNC(res.a0); -} - -static int __set_smccc_filter(struct kvm_vm *vm, uint32_t start, uint32_t nr_functions, - enum kvm_smccc_filter_action action) -{ - struct kvm_smccc_filter filter = { - .base = start, - .nr_functions = nr_functions, - .action = action, - }; - - return __kvm_device_attr_set(vm->fd, KVM_ARM_VM_SMCCC_CTRL, - KVM_ARM_VM_SMCCC_FILTER, &filter); -} - -static void set_smccc_filter(struct kvm_vm *vm, uint32_t start, uint32_t nr_functions, - enum kvm_smccc_filter_action action) -{ - int ret = __set_smccc_filter(vm, start, nr_functions, action); - - TEST_ASSERT(!ret, "failed to configure SMCCC filter: %d", ret); -} - -static struct kvm_vm *setup_vm(struct kvm_vcpu **vcpu) -{ - struct kvm_vcpu_init init; - struct kvm_vm *vm; - - vm = vm_create(1); - vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init); - - /* - * Enable in-kernel emulation of PSCI to ensure that calls are denied - * due to the SMCCC filter, not because of KVM. - */ - init.features[0] |= (1 << KVM_ARM_VCPU_PSCI_0_2); - - *vcpu = aarch64_vcpu_add(vm, 0, &init, guest_main); - return vm; -} - -static void test_pad_must_be_zero(void) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm = setup_vm(&vcpu); - struct kvm_smccc_filter filter = { - .base = PSCI_0_2_FN_PSCI_VERSION, - .nr_functions = 1, - .action = KVM_SMCCC_FILTER_DENY, - .pad = { -1 }, - }; - int r; - - r = __kvm_device_attr_set(vm->fd, KVM_ARM_VM_SMCCC_CTRL, - KVM_ARM_VM_SMCCC_FILTER, &filter); - TEST_ASSERT(r < 0 && errno == EINVAL, - "Setting filter with nonzero padding should return EINVAL"); -} - -/* Ensure that userspace cannot filter the Arm Architecture SMCCC range */ -static void test_filter_reserved_range(void) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm = setup_vm(&vcpu); - uint32_t smc64_fn; - int r; - - r = __set_smccc_filter(vm, ARM_SMCCC_ARCH_WORKAROUND_1, - 1, KVM_SMCCC_FILTER_DENY); - TEST_ASSERT(r < 0 && errno == EEXIST, - "Attempt to filter reserved range should return EEXIST"); - - smc64_fn = ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, ARM_SMCCC_SMC_64, - 0, 0); - - r = __set_smccc_filter(vm, smc64_fn, 1, KVM_SMCCC_FILTER_DENY); - TEST_ASSERT(r < 0 && errno == EEXIST, - "Attempt to filter reserved range should return EEXIST"); - - kvm_vm_free(vm); -} - -static void test_invalid_nr_functions(void) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm = setup_vm(&vcpu); - int r; - - r = __set_smccc_filter(vm, PSCI_0_2_FN64_CPU_ON, 0, KVM_SMCCC_FILTER_DENY); - TEST_ASSERT(r < 0 && errno == EINVAL, - "Attempt to filter 0 functions should return EINVAL"); - - kvm_vm_free(vm); -} - -static void test_overflow_nr_functions(void) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm = setup_vm(&vcpu); - int r; - - r = __set_smccc_filter(vm, ~0, ~0, KVM_SMCCC_FILTER_DENY); - TEST_ASSERT(r < 0 && errno == EINVAL, - "Attempt to overflow filter range should return EINVAL"); - - kvm_vm_free(vm); -} - -static void test_reserved_action(void) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm = setup_vm(&vcpu); - int r; - - r = __set_smccc_filter(vm, PSCI_0_2_FN64_CPU_ON, 1, -1); - TEST_ASSERT(r < 0 && errno == EINVAL, - "Attempt to use reserved filter action should return EINVAL"); - - kvm_vm_free(vm); -} - - -/* Test that overlapping configurations of the SMCCC filter are rejected */ -static void test_filter_overlap(void) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm = setup_vm(&vcpu); - int r; - - set_smccc_filter(vm, PSCI_0_2_FN64_CPU_ON, 1, KVM_SMCCC_FILTER_DENY); - - r = __set_smccc_filter(vm, PSCI_0_2_FN64_CPU_ON, 1, KVM_SMCCC_FILTER_DENY); - TEST_ASSERT(r < 0 && errno == EEXIST, - "Attempt to filter already configured range should return EEXIST"); - - kvm_vm_free(vm); -} - -static void expect_call_denied(struct kvm_vcpu *vcpu) -{ - struct ucall uc; - - if (get_ucall(vcpu, &uc) != UCALL_SYNC) - TEST_FAIL("Unexpected ucall: %lu", uc.cmd); - - TEST_ASSERT(uc.args[1] == SMCCC_RET_NOT_SUPPORTED, - "Unexpected SMCCC return code: %lu", uc.args[1]); -} - -/* Denied SMCCC calls have a return code of SMCCC_RET_NOT_SUPPORTED */ -static void test_filter_denied(void) -{ - enum smccc_conduit conduit; - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - - for_each_conduit(conduit) { - vm = setup_vm(&vcpu); - - set_smccc_filter(vm, PSCI_0_2_FN_PSCI_VERSION, 1, KVM_SMCCC_FILTER_DENY); - vcpu_args_set(vcpu, 2, PSCI_0_2_FN_PSCI_VERSION, conduit); - - vcpu_run(vcpu); - expect_call_denied(vcpu); - - kvm_vm_free(vm); - } -} - -static void expect_call_fwd_to_user(struct kvm_vcpu *vcpu, uint32_t func_id, - enum smccc_conduit conduit) -{ - struct kvm_run *run = vcpu->run; - - TEST_ASSERT(run->exit_reason == KVM_EXIT_HYPERCALL, - "Unexpected exit reason: %u", run->exit_reason); - TEST_ASSERT(run->hypercall.nr == func_id, - "Unexpected SMCCC function: %llu", run->hypercall.nr); - - if (conduit == SMC_INSN) - TEST_ASSERT(run->hypercall.flags & KVM_HYPERCALL_EXIT_SMC, - "KVM_HYPERCALL_EXIT_SMC is not set"); - else - TEST_ASSERT(!(run->hypercall.flags & KVM_HYPERCALL_EXIT_SMC), - "KVM_HYPERCALL_EXIT_SMC is set"); -} - -/* SMCCC calls forwarded to userspace cause KVM_EXIT_HYPERCALL exits */ -static void test_filter_fwd_to_user(void) -{ - enum smccc_conduit conduit; - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - - for_each_conduit(conduit) { - vm = setup_vm(&vcpu); - - set_smccc_filter(vm, PSCI_0_2_FN_PSCI_VERSION, 1, KVM_SMCCC_FILTER_FWD_TO_USER); - vcpu_args_set(vcpu, 2, PSCI_0_2_FN_PSCI_VERSION, conduit); - - vcpu_run(vcpu); - expect_call_fwd_to_user(vcpu, PSCI_0_2_FN_PSCI_VERSION, conduit); - - kvm_vm_free(vm); - } -} - -static bool kvm_supports_smccc_filter(void) -{ - struct kvm_vm *vm = vm_create_barebones(); - int r; - - r = __kvm_has_device_attr(vm->fd, KVM_ARM_VM_SMCCC_CTRL, KVM_ARM_VM_SMCCC_FILTER); - - kvm_vm_free(vm); - return !r; -} - -int main(void) -{ - TEST_REQUIRE(kvm_supports_smccc_filter()); - - test_pad_must_be_zero(); - test_invalid_nr_functions(); - test_overflow_nr_functions(); - test_reserved_action(); - test_filter_reserved_range(); - test_filter_overlap(); - test_filter_denied(); - test_filter_fwd_to_user(); -} diff --git a/tools/testing/selftests/kvm/aarch64/vcpu_width_config.c b/tools/testing/selftests/kvm/aarch64/vcpu_width_config.c deleted file mode 100644 index 80b74c6f152b..000000000000 --- a/tools/testing/selftests/kvm/aarch64/vcpu_width_config.c +++ /dev/null @@ -1,121 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * vcpu_width_config - Test KVM_ARM_VCPU_INIT() with KVM_ARM_VCPU_EL1_32BIT. - * - * Copyright (c) 2022 Google LLC. - * - * This is a test that ensures that non-mixed-width vCPUs (all 64bit vCPUs - * or all 32bit vcPUs) can be configured and mixed-width vCPUs cannot be - * configured. - */ - -#include "kvm_util.h" -#include "processor.h" -#include "test_util.h" - - -/* - * Add a vCPU, run KVM_ARM_VCPU_INIT with @init0, and then - * add another vCPU, and run KVM_ARM_VCPU_INIT with @init1. - */ -static int add_init_2vcpus(struct kvm_vcpu_init *init0, - struct kvm_vcpu_init *init1) -{ - struct kvm_vcpu *vcpu0, *vcpu1; - struct kvm_vm *vm; - int ret; - - vm = vm_create_barebones(); - - vcpu0 = __vm_vcpu_add(vm, 0); - ret = __vcpu_ioctl(vcpu0, KVM_ARM_VCPU_INIT, init0); - if (ret) - goto free_exit; - - vcpu1 = __vm_vcpu_add(vm, 1); - ret = __vcpu_ioctl(vcpu1, KVM_ARM_VCPU_INIT, init1); - -free_exit: - kvm_vm_free(vm); - return ret; -} - -/* - * Add two vCPUs, then run KVM_ARM_VCPU_INIT for one vCPU with @init0, - * and run KVM_ARM_VCPU_INIT for another vCPU with @init1. - */ -static int add_2vcpus_init_2vcpus(struct kvm_vcpu_init *init0, - struct kvm_vcpu_init *init1) -{ - struct kvm_vcpu *vcpu0, *vcpu1; - struct kvm_vm *vm; - int ret; - - vm = vm_create_barebones(); - - vcpu0 = __vm_vcpu_add(vm, 0); - vcpu1 = __vm_vcpu_add(vm, 1); - - ret = __vcpu_ioctl(vcpu0, KVM_ARM_VCPU_INIT, init0); - if (ret) - goto free_exit; - - ret = __vcpu_ioctl(vcpu1, KVM_ARM_VCPU_INIT, init1); - -free_exit: - kvm_vm_free(vm); - return ret; -} - -/* - * Tests that two 64bit vCPUs can be configured, two 32bit vCPUs can be - * configured, and two mixed-width vCPUs cannot be configured. - * Each of those three cases, configure vCPUs in two different orders. - * The one is running KVM_CREATE_VCPU for 2 vCPUs, and then running - * KVM_ARM_VCPU_INIT for them. - * The other is running KVM_CREATE_VCPU and KVM_ARM_VCPU_INIT for a vCPU, - * and then run those commands for another vCPU. - */ -int main(void) -{ - struct kvm_vcpu_init init0, init1; - struct kvm_vm *vm; - int ret; - - TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_EL1_32BIT)); - - /* Get the preferred target type and copy that to init1 for later use */ - vm = vm_create_barebones(); - vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init0); - kvm_vm_free(vm); - init1 = init0; - - /* Test with 64bit vCPUs */ - ret = add_init_2vcpus(&init0, &init0); - TEST_ASSERT(ret == 0, - "Configuring 64bit EL1 vCPUs failed unexpectedly"); - ret = add_2vcpus_init_2vcpus(&init0, &init0); - TEST_ASSERT(ret == 0, - "Configuring 64bit EL1 vCPUs failed unexpectedly"); - - /* Test with 32bit vCPUs */ - init0.features[0] = (1 << KVM_ARM_VCPU_EL1_32BIT); - ret = add_init_2vcpus(&init0, &init0); - TEST_ASSERT(ret == 0, - "Configuring 32bit EL1 vCPUs failed unexpectedly"); - ret = add_2vcpus_init_2vcpus(&init0, &init0); - TEST_ASSERT(ret == 0, - "Configuring 32bit EL1 vCPUs failed unexpectedly"); - - /* Test with mixed-width vCPUs */ - init0.features[0] = 0; - init1.features[0] = (1 << KVM_ARM_VCPU_EL1_32BIT); - ret = add_init_2vcpus(&init0, &init1); - TEST_ASSERT(ret != 0, - "Configuring mixed-width vCPUs worked unexpectedly"); - ret = add_2vcpus_init_2vcpus(&init0, &init1); - TEST_ASSERT(ret != 0, - "Configuring mixed-width vCPUs worked unexpectedly"); - - return 0; -} diff --git a/tools/testing/selftests/kvm/aarch64/vgic_init.c b/tools/testing/selftests/kvm/aarch64/vgic_init.c deleted file mode 100644 index b3b5fb0ff0a9..000000000000 --- a/tools/testing/selftests/kvm/aarch64/vgic_init.c +++ /dev/null @@ -1,764 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * vgic init sequence tests - * - * Copyright (C) 2020, Red Hat, Inc. - */ -#include -#include -#include -#include - -#include "test_util.h" -#include "kvm_util.h" -#include "processor.h" -#include "vgic.h" - -#define NR_VCPUS 4 - -#define REG_OFFSET(vcpu, offset) (((uint64_t)vcpu << 32) | offset) - -#define GICR_TYPER 0x8 - -#define VGIC_DEV_IS_V2(_d) ((_d) == KVM_DEV_TYPE_ARM_VGIC_V2) -#define VGIC_DEV_IS_V3(_d) ((_d) == KVM_DEV_TYPE_ARM_VGIC_V3) - -struct vm_gic { - struct kvm_vm *vm; - int gic_fd; - uint32_t gic_dev_type; -}; - -static uint64_t max_phys_size; - -/* - * Helpers to access a redistributor register and verify the ioctl() failed or - * succeeded as expected, and provided the correct value on success. - */ -static void v3_redist_reg_get_errno(int gicv3_fd, int vcpu, int offset, - int want, const char *msg) -{ - uint32_t ignored_val; - int ret = __kvm_device_attr_get(gicv3_fd, KVM_DEV_ARM_VGIC_GRP_REDIST_REGS, - REG_OFFSET(vcpu, offset), &ignored_val); - - TEST_ASSERT(ret && errno == want, "%s; want errno = %d", msg, want); -} - -static void v3_redist_reg_get(int gicv3_fd, int vcpu, int offset, uint32_t want, - const char *msg) -{ - uint32_t val; - - kvm_device_attr_get(gicv3_fd, KVM_DEV_ARM_VGIC_GRP_REDIST_REGS, - REG_OFFSET(vcpu, offset), &val); - TEST_ASSERT(val == want, "%s; want '0x%x', got '0x%x'", msg, want, val); -} - -/* dummy guest code */ -static void guest_code(void) -{ - GUEST_SYNC(0); - GUEST_SYNC(1); - GUEST_SYNC(2); - GUEST_DONE(); -} - -/* we don't want to assert on run execution, hence that helper */ -static int run_vcpu(struct kvm_vcpu *vcpu) -{ - return __vcpu_run(vcpu) ? -errno : 0; -} - -static struct vm_gic vm_gic_create_with_vcpus(uint32_t gic_dev_type, - uint32_t nr_vcpus, - struct kvm_vcpu *vcpus[]) -{ - struct vm_gic v; - - v.gic_dev_type = gic_dev_type; - v.vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus); - v.gic_fd = kvm_create_device(v.vm, gic_dev_type); - - return v; -} - -static struct vm_gic vm_gic_create_barebones(uint32_t gic_dev_type) -{ - struct vm_gic v; - - v.gic_dev_type = gic_dev_type; - v.vm = vm_create_barebones(); - v.gic_fd = kvm_create_device(v.vm, gic_dev_type); - - return v; -} - - -static void vm_gic_destroy(struct vm_gic *v) -{ - close(v->gic_fd); - kvm_vm_free(v->vm); -} - -struct vgic_region_attr { - uint64_t attr; - uint64_t size; - uint64_t alignment; -}; - -struct vgic_region_attr gic_v3_dist_region = { - .attr = KVM_VGIC_V3_ADDR_TYPE_DIST, - .size = 0x10000, - .alignment = 0x10000, -}; - -struct vgic_region_attr gic_v3_redist_region = { - .attr = KVM_VGIC_V3_ADDR_TYPE_REDIST, - .size = NR_VCPUS * 0x20000, - .alignment = 0x10000, -}; - -struct vgic_region_attr gic_v2_dist_region = { - .attr = KVM_VGIC_V2_ADDR_TYPE_DIST, - .size = 0x1000, - .alignment = 0x1000, -}; - -struct vgic_region_attr gic_v2_cpu_region = { - .attr = KVM_VGIC_V2_ADDR_TYPE_CPU, - .size = 0x2000, - .alignment = 0x1000, -}; - -/** - * Helper routine that performs KVM device tests in general. Eventually the - * ARM_VGIC (GICv2 or GICv3) device gets created with an overlapping - * DIST/REDIST (or DIST/CPUIF for GICv2). Assumption is 4 vcpus are going to be - * used hence the overlap. In the case of GICv3, A RDIST region is set at @0x0 - * and a DIST region is set @0x70000. The GICv2 case sets a CPUIF @0x0 and a - * DIST region @0x1000. - */ -static void subtest_dist_rdist(struct vm_gic *v) -{ - int ret; - uint64_t addr; - struct vgic_region_attr rdist; /* CPU interface in GICv2*/ - struct vgic_region_attr dist; - - rdist = VGIC_DEV_IS_V3(v->gic_dev_type) ? gic_v3_redist_region - : gic_v2_cpu_region; - dist = VGIC_DEV_IS_V3(v->gic_dev_type) ? gic_v3_dist_region - : gic_v2_dist_region; - - /* Check existing group/attributes */ - kvm_has_device_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, dist.attr); - - kvm_has_device_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, rdist.attr); - - /* check non existing attribute */ - ret = __kvm_has_device_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, -1); - TEST_ASSERT(ret && errno == ENXIO, "attribute not supported"); - - /* misaligned DIST and REDIST address settings */ - addr = dist.alignment / 0x10; - ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - dist.attr, &addr); - TEST_ASSERT(ret && errno == EINVAL, "GIC dist base not aligned"); - - addr = rdist.alignment / 0x10; - ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - rdist.attr, &addr); - TEST_ASSERT(ret && errno == EINVAL, "GIC redist/cpu base not aligned"); - - /* out of range address */ - addr = max_phys_size; - ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - dist.attr, &addr); - TEST_ASSERT(ret && errno == E2BIG, "dist address beyond IPA limit"); - - ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - rdist.attr, &addr); - TEST_ASSERT(ret && errno == E2BIG, "redist address beyond IPA limit"); - - /* Space for half a rdist (a rdist is: 2 * rdist.alignment). */ - addr = max_phys_size - dist.alignment; - ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - rdist.attr, &addr); - TEST_ASSERT(ret && errno == E2BIG, - "half of the redist is beyond IPA limit"); - - /* set REDIST base address @0x0*/ - addr = 0x00000; - kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - rdist.attr, &addr); - - /* Attempt to create a second legacy redistributor region */ - addr = 0xE0000; - ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - rdist.attr, &addr); - TEST_ASSERT(ret && errno == EEXIST, "GIC redist base set again"); - - ret = __kvm_has_device_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST); - if (!ret) { - /* Attempt to mix legacy and new redistributor regions */ - addr = REDIST_REGION_ATTR_ADDR(NR_VCPUS, 0x100000, 0, 0); - ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); - TEST_ASSERT(ret && errno == EINVAL, - "attempt to mix GICv3 REDIST and REDIST_REGION"); - } - - /* - * Set overlapping DIST / REDIST, cannot be detected here. Will be detected - * on first vcpu run instead. - */ - addr = rdist.size - rdist.alignment; - kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - dist.attr, &addr); -} - -/* Test the new REDIST region API */ -static void subtest_v3_redist_regions(struct vm_gic *v) -{ - uint64_t addr, expected_addr; - int ret; - - ret = __kvm_has_device_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST); - TEST_ASSERT(!ret, "Multiple redist regions advertised"); - - addr = REDIST_REGION_ATTR_ADDR(NR_VCPUS, 0x100000, 2, 0); - ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); - TEST_ASSERT(ret && errno == EINVAL, "redist region attr value with flags != 0"); - - addr = REDIST_REGION_ATTR_ADDR(0, 0x100000, 0, 0); - ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); - TEST_ASSERT(ret && errno == EINVAL, "redist region attr value with count== 0"); - - addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 1); - ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); - TEST_ASSERT(ret && errno == EINVAL, - "attempt to register the first rdist region with index != 0"); - - addr = REDIST_REGION_ATTR_ADDR(2, 0x201000, 0, 1); - ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); - TEST_ASSERT(ret && errno == EINVAL, "rdist region with misaligned address"); - - addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 0); - kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); - - addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 1); - ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); - TEST_ASSERT(ret && errno == EINVAL, "register an rdist region with already used index"); - - addr = REDIST_REGION_ATTR_ADDR(1, 0x210000, 0, 2); - ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); - TEST_ASSERT(ret && errno == EINVAL, - "register an rdist region overlapping with another one"); - - addr = REDIST_REGION_ATTR_ADDR(1, 0x240000, 0, 2); - ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); - TEST_ASSERT(ret && errno == EINVAL, "register redist region with index not +1"); - - addr = REDIST_REGION_ATTR_ADDR(1, 0x240000, 0, 1); - kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); - - addr = REDIST_REGION_ATTR_ADDR(1, max_phys_size, 0, 2); - ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); - TEST_ASSERT(ret && errno == E2BIG, - "register redist region with base address beyond IPA range"); - - /* The last redist is above the pa range. */ - addr = REDIST_REGION_ATTR_ADDR(2, max_phys_size - 0x30000, 0, 2); - ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); - TEST_ASSERT(ret && errno == E2BIG, - "register redist region with top address beyond IPA range"); - - addr = 0x260000; - ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr); - TEST_ASSERT(ret && errno == EINVAL, - "Mix KVM_VGIC_V3_ADDR_TYPE_REDIST and REDIST_REGION"); - - /* - * Now there are 2 redist regions: - * region 0 @ 0x200000 2 redists - * region 1 @ 0x240000 1 redist - * Attempt to read their characteristics - */ - - addr = REDIST_REGION_ATTR_ADDR(0, 0, 0, 0); - expected_addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 0); - ret = __kvm_device_attr_get(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); - TEST_ASSERT(!ret && addr == expected_addr, "read characteristics of region #0"); - - addr = REDIST_REGION_ATTR_ADDR(0, 0, 0, 1); - expected_addr = REDIST_REGION_ATTR_ADDR(1, 0x240000, 0, 1); - ret = __kvm_device_attr_get(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); - TEST_ASSERT(!ret && addr == expected_addr, "read characteristics of region #1"); - - addr = REDIST_REGION_ATTR_ADDR(0, 0, 0, 2); - ret = __kvm_device_attr_get(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); - TEST_ASSERT(ret && errno == ENOENT, "read characteristics of non existing region"); - - addr = 0x260000; - kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_DIST, &addr); - - addr = REDIST_REGION_ATTR_ADDR(1, 0x260000, 0, 2); - ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); - TEST_ASSERT(ret && errno == EINVAL, "register redist region colliding with dist"); -} - -/* - * VGIC KVM device is created and initialized before the secondary CPUs - * get created - */ -static void test_vgic_then_vcpus(uint32_t gic_dev_type) -{ - struct kvm_vcpu *vcpus[NR_VCPUS]; - struct vm_gic v; - int ret, i; - - v = vm_gic_create_with_vcpus(gic_dev_type, 1, vcpus); - - subtest_dist_rdist(&v); - - /* Add the rest of the VCPUs */ - for (i = 1; i < NR_VCPUS; ++i) - vcpus[i] = vm_vcpu_add(v.vm, i, guest_code); - - ret = run_vcpu(vcpus[3]); - TEST_ASSERT(ret == -EINVAL, "dist/rdist overlap detected on 1st vcpu run"); - - vm_gic_destroy(&v); -} - -/* All the VCPUs are created before the VGIC KVM device gets initialized */ -static void test_vcpus_then_vgic(uint32_t gic_dev_type) -{ - struct kvm_vcpu *vcpus[NR_VCPUS]; - struct vm_gic v; - int ret; - - v = vm_gic_create_with_vcpus(gic_dev_type, NR_VCPUS, vcpus); - - subtest_dist_rdist(&v); - - ret = run_vcpu(vcpus[3]); - TEST_ASSERT(ret == -EINVAL, "dist/rdist overlap detected on 1st vcpu run"); - - vm_gic_destroy(&v); -} - -#define KVM_VGIC_V2_ATTR(offset, cpu) \ - (FIELD_PREP(KVM_DEV_ARM_VGIC_OFFSET_MASK, offset) | \ - FIELD_PREP(KVM_DEV_ARM_VGIC_CPUID_MASK, cpu)) - -#define GIC_CPU_CTRL 0x00 - -static void test_v2_uaccess_cpuif_no_vcpus(void) -{ - struct vm_gic v; - u64 val = 0; - int ret; - - v = vm_gic_create_barebones(KVM_DEV_TYPE_ARM_VGIC_V2); - subtest_dist_rdist(&v); - - ret = __kvm_has_device_attr(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CPU_REGS, - KVM_VGIC_V2_ATTR(GIC_CPU_CTRL, 0)); - TEST_ASSERT(ret && errno == EINVAL, - "accessed non-existent CPU interface, want errno: %i", - EINVAL); - ret = __kvm_device_attr_get(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CPU_REGS, - KVM_VGIC_V2_ATTR(GIC_CPU_CTRL, 0), &val); - TEST_ASSERT(ret && errno == EINVAL, - "accessed non-existent CPU interface, want errno: %i", - EINVAL); - ret = __kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CPU_REGS, - KVM_VGIC_V2_ATTR(GIC_CPU_CTRL, 0), &val); - TEST_ASSERT(ret && errno == EINVAL, - "accessed non-existent CPU interface, want errno: %i", - EINVAL); - - vm_gic_destroy(&v); -} - -static void test_v3_new_redist_regions(void) -{ - struct kvm_vcpu *vcpus[NR_VCPUS]; - void *dummy = NULL; - struct vm_gic v; - uint64_t addr; - int ret; - - v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus); - subtest_v3_redist_regions(&v); - kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, - KVM_DEV_ARM_VGIC_CTRL_INIT, NULL); - - ret = run_vcpu(vcpus[3]); - TEST_ASSERT(ret == -ENXIO, "running without sufficient number of rdists"); - vm_gic_destroy(&v); - - /* step2 */ - - v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus); - subtest_v3_redist_regions(&v); - - addr = REDIST_REGION_ATTR_ADDR(1, 0x280000, 0, 2); - kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); - - ret = run_vcpu(vcpus[3]); - TEST_ASSERT(ret == -EBUSY, "running without vgic explicit init"); - - vm_gic_destroy(&v); - - /* step 3 */ - - v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus); - subtest_v3_redist_regions(&v); - - ret = __kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, dummy); - TEST_ASSERT(ret && errno == EFAULT, - "register a third region allowing to cover the 4 vcpus"); - - addr = REDIST_REGION_ATTR_ADDR(1, 0x280000, 0, 2); - kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); - - kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, - KVM_DEV_ARM_VGIC_CTRL_INIT, NULL); - - ret = run_vcpu(vcpus[3]); - TEST_ASSERT(!ret, "vcpu run"); - - vm_gic_destroy(&v); -} - -static void test_v3_typer_accesses(void) -{ - struct vm_gic v; - uint64_t addr; - int ret, i; - - v.vm = vm_create(NR_VCPUS); - (void)vm_vcpu_add(v.vm, 0, guest_code); - - v.gic_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3); - - (void)vm_vcpu_add(v.vm, 3, guest_code); - - v3_redist_reg_get_errno(v.gic_fd, 1, GICR_TYPER, EINVAL, - "attempting to read GICR_TYPER of non created vcpu"); - - (void)vm_vcpu_add(v.vm, 1, guest_code); - - v3_redist_reg_get_errno(v.gic_fd, 1, GICR_TYPER, EBUSY, - "read GICR_TYPER before GIC initialized"); - - (void)vm_vcpu_add(v.vm, 2, guest_code); - - kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, - KVM_DEV_ARM_VGIC_CTRL_INIT, NULL); - - for (i = 0; i < NR_VCPUS ; i++) { - v3_redist_reg_get(v.gic_fd, i, GICR_TYPER, i * 0x100, - "read GICR_TYPER before rdist region setting"); - } - - addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 0); - kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); - - /* The 2 first rdists should be put there (vcpu 0 and 3) */ - v3_redist_reg_get(v.gic_fd, 0, GICR_TYPER, 0x0, "read typer of rdist #0"); - v3_redist_reg_get(v.gic_fd, 3, GICR_TYPER, 0x310, "read typer of rdist #1"); - - addr = REDIST_REGION_ATTR_ADDR(10, 0x100000, 0, 1); - ret = __kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); - TEST_ASSERT(ret && errno == EINVAL, "collision with previous rdist region"); - - v3_redist_reg_get(v.gic_fd, 1, GICR_TYPER, 0x100, - "no redist region attached to vcpu #1 yet, last cannot be returned"); - v3_redist_reg_get(v.gic_fd, 2, GICR_TYPER, 0x200, - "no redist region attached to vcpu #2, last cannot be returned"); - - addr = REDIST_REGION_ATTR_ADDR(10, 0x20000, 0, 1); - kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); - - v3_redist_reg_get(v.gic_fd, 1, GICR_TYPER, 0x100, "read typer of rdist #1"); - v3_redist_reg_get(v.gic_fd, 2, GICR_TYPER, 0x210, - "read typer of rdist #1, last properly returned"); - - vm_gic_destroy(&v); -} - -static struct vm_gic vm_gic_v3_create_with_vcpuids(int nr_vcpus, - uint32_t vcpuids[]) -{ - struct vm_gic v; - int i; - - v.vm = vm_create(nr_vcpus); - for (i = 0; i < nr_vcpus; i++) - vm_vcpu_add(v.vm, vcpuids[i], guest_code); - - v.gic_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3); - - return v; -} - -/** - * Test GICR_TYPER last bit with new redist regions - * rdist regions #1 and #2 are contiguous - * rdist region #0 @0x100000 2 rdist capacity - * rdists: 0, 3 (Last) - * rdist region #1 @0x240000 2 rdist capacity - * rdists: 5, 4 (Last) - * rdist region #2 @0x200000 2 rdist capacity - * rdists: 1, 2 - */ -static void test_v3_last_bit_redist_regions(void) -{ - uint32_t vcpuids[] = { 0, 3, 5, 4, 1, 2 }; - struct vm_gic v; - uint64_t addr; - - v = vm_gic_v3_create_with_vcpuids(ARRAY_SIZE(vcpuids), vcpuids); - - kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, - KVM_DEV_ARM_VGIC_CTRL_INIT, NULL); - - addr = REDIST_REGION_ATTR_ADDR(2, 0x100000, 0, 0); - kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); - - addr = REDIST_REGION_ATTR_ADDR(2, 0x240000, 0, 1); - kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); - - addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 2); - kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); - - v3_redist_reg_get(v.gic_fd, 0, GICR_TYPER, 0x000, "read typer of rdist #0"); - v3_redist_reg_get(v.gic_fd, 1, GICR_TYPER, 0x100, "read typer of rdist #1"); - v3_redist_reg_get(v.gic_fd, 2, GICR_TYPER, 0x200, "read typer of rdist #2"); - v3_redist_reg_get(v.gic_fd, 3, GICR_TYPER, 0x310, "read typer of rdist #3"); - v3_redist_reg_get(v.gic_fd, 5, GICR_TYPER, 0x500, "read typer of rdist #5"); - v3_redist_reg_get(v.gic_fd, 4, GICR_TYPER, 0x410, "read typer of rdist #4"); - - vm_gic_destroy(&v); -} - -/* Test last bit with legacy region */ -static void test_v3_last_bit_single_rdist(void) -{ - uint32_t vcpuids[] = { 0, 3, 5, 4, 1, 2 }; - struct vm_gic v; - uint64_t addr; - - v = vm_gic_v3_create_with_vcpuids(ARRAY_SIZE(vcpuids), vcpuids); - - kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, - KVM_DEV_ARM_VGIC_CTRL_INIT, NULL); - - addr = 0x10000; - kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr); - - v3_redist_reg_get(v.gic_fd, 0, GICR_TYPER, 0x000, "read typer of rdist #0"); - v3_redist_reg_get(v.gic_fd, 3, GICR_TYPER, 0x300, "read typer of rdist #1"); - v3_redist_reg_get(v.gic_fd, 5, GICR_TYPER, 0x500, "read typer of rdist #2"); - v3_redist_reg_get(v.gic_fd, 1, GICR_TYPER, 0x100, "read typer of rdist #3"); - v3_redist_reg_get(v.gic_fd, 2, GICR_TYPER, 0x210, "read typer of rdist #3"); - - vm_gic_destroy(&v); -} - -/* Uses the legacy REDIST region API. */ -static void test_v3_redist_ipa_range_check_at_vcpu_run(void) -{ - struct kvm_vcpu *vcpus[NR_VCPUS]; - struct vm_gic v; - int ret, i; - uint64_t addr; - - v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, 1, vcpus); - - /* Set space for 3 redists, we have 1 vcpu, so this succeeds. */ - addr = max_phys_size - (3 * 2 * 0x10000); - kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr); - - addr = 0x00000; - kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_DIST, &addr); - - /* Add the rest of the VCPUs */ - for (i = 1; i < NR_VCPUS; ++i) - vcpus[i] = vm_vcpu_add(v.vm, i, guest_code); - - kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, - KVM_DEV_ARM_VGIC_CTRL_INIT, NULL); - - /* Attempt to run a vcpu without enough redist space. */ - ret = run_vcpu(vcpus[2]); - TEST_ASSERT(ret && errno == EINVAL, - "redist base+size above PA range detected on 1st vcpu run"); - - vm_gic_destroy(&v); -} - -static void test_v3_its_region(void) -{ - struct kvm_vcpu *vcpus[NR_VCPUS]; - struct vm_gic v; - uint64_t addr; - int its_fd, ret; - - v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus); - its_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_ITS); - - addr = 0x401000; - ret = __kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_ITS_ADDR_TYPE, &addr); - TEST_ASSERT(ret && errno == EINVAL, - "ITS region with misaligned address"); - - addr = max_phys_size; - ret = __kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_ITS_ADDR_TYPE, &addr); - TEST_ASSERT(ret && errno == E2BIG, - "register ITS region with base address beyond IPA range"); - - addr = max_phys_size - 0x10000; - ret = __kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_ITS_ADDR_TYPE, &addr); - TEST_ASSERT(ret && errno == E2BIG, - "Half of ITS region is beyond IPA range"); - - /* This one succeeds setting the ITS base */ - addr = 0x400000; - kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_ITS_ADDR_TYPE, &addr); - - addr = 0x300000; - ret = __kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_ITS_ADDR_TYPE, &addr); - TEST_ASSERT(ret && errno == EEXIST, "ITS base set again"); - - close(its_fd); - vm_gic_destroy(&v); -} - -/* - * Returns 0 if it's possible to create GIC device of a given type (V2 or V3). - */ -int test_kvm_device(uint32_t gic_dev_type) -{ - struct kvm_vcpu *vcpus[NR_VCPUS]; - struct vm_gic v; - uint32_t other; - int ret; - - v.vm = vm_create_with_vcpus(NR_VCPUS, guest_code, vcpus); - - /* try to create a non existing KVM device */ - ret = __kvm_test_create_device(v.vm, 0); - TEST_ASSERT(ret && errno == ENODEV, "unsupported device"); - - /* trial mode */ - ret = __kvm_test_create_device(v.vm, gic_dev_type); - if (ret) - return ret; - v.gic_fd = kvm_create_device(v.vm, gic_dev_type); - - ret = __kvm_create_device(v.vm, gic_dev_type); - TEST_ASSERT(ret < 0 && errno == EEXIST, "create GIC device twice"); - - /* try to create the other gic_dev_type */ - other = VGIC_DEV_IS_V2(gic_dev_type) ? KVM_DEV_TYPE_ARM_VGIC_V3 - : KVM_DEV_TYPE_ARM_VGIC_V2; - - if (!__kvm_test_create_device(v.vm, other)) { - ret = __kvm_create_device(v.vm, other); - TEST_ASSERT(ret < 0 && (errno == EINVAL || errno == EEXIST), - "create GIC device while other version exists"); - } - - vm_gic_destroy(&v); - - return 0; -} - -void run_tests(uint32_t gic_dev_type) -{ - test_vcpus_then_vgic(gic_dev_type); - test_vgic_then_vcpus(gic_dev_type); - - if (VGIC_DEV_IS_V2(gic_dev_type)) - test_v2_uaccess_cpuif_no_vcpus(); - - if (VGIC_DEV_IS_V3(gic_dev_type)) { - test_v3_new_redist_regions(); - test_v3_typer_accesses(); - test_v3_last_bit_redist_regions(); - test_v3_last_bit_single_rdist(); - test_v3_redist_ipa_range_check_at_vcpu_run(); - test_v3_its_region(); - } -} - -int main(int ac, char **av) -{ - int ret; - int pa_bits; - int cnt_impl = 0; - - pa_bits = vm_guest_mode_params[VM_MODE_DEFAULT].pa_bits; - max_phys_size = 1ULL << pa_bits; - - ret = test_kvm_device(KVM_DEV_TYPE_ARM_VGIC_V3); - if (!ret) { - pr_info("Running GIC_v3 tests.\n"); - run_tests(KVM_DEV_TYPE_ARM_VGIC_V3); - cnt_impl++; - } - - ret = test_kvm_device(KVM_DEV_TYPE_ARM_VGIC_V2); - if (!ret) { - pr_info("Running GIC_v2 tests.\n"); - run_tests(KVM_DEV_TYPE_ARM_VGIC_V2); - cnt_impl++; - } - - if (!cnt_impl) { - print_skip("No GICv2 nor GICv3 support"); - exit(KSFT_SKIP); - } - return 0; -} diff --git a/tools/testing/selftests/kvm/aarch64/vgic_irq.c b/tools/testing/selftests/kvm/aarch64/vgic_irq.c deleted file mode 100644 index f4ac28d53747..000000000000 --- a/tools/testing/selftests/kvm/aarch64/vgic_irq.c +++ /dev/null @@ -1,847 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * vgic_irq.c - Test userspace injection of IRQs - * - * This test validates the injection of IRQs from userspace using various - * methods (e.g., KVM_IRQ_LINE) and modes (e.g., EOI). The guest "asks" the - * host to inject a specific intid via a GUEST_SYNC call, and then checks that - * it received it. - */ -#include -#include -#include -#include - -#include "processor.h" -#include "test_util.h" -#include "kvm_util.h" -#include "gic.h" -#include "gic_v3.h" -#include "vgic.h" - -/* - * Stores the user specified args; it's passed to the guest and to every test - * function. - */ -struct test_args { - uint32_t nr_irqs; /* number of KVM supported IRQs. */ - bool eoi_split; /* 1 is eoir+dir, 0 is eoir only */ - bool level_sensitive; /* 1 is level, 0 is edge */ - int kvm_max_routes; /* output of KVM_CAP_IRQ_ROUTING */ - bool kvm_supports_irqfd; /* output of KVM_CAP_IRQFD */ -}; - -/* - * KVM implements 32 priority levels: - * 0x00 (highest priority) - 0xF8 (lowest priority), in steps of 8 - * - * Note that these macros will still be correct in the case that KVM implements - * more priority levels. Also note that 32 is the minimum for GICv3 and GICv2. - */ -#define KVM_NUM_PRIOS 32 -#define KVM_PRIO_SHIFT 3 /* steps of 8 = 1 << 3 */ -#define KVM_PRIO_STEPS (1 << KVM_PRIO_SHIFT) /* 8 */ -#define LOWEST_PRIO (KVM_NUM_PRIOS - 1) -#define CPU_PRIO_MASK (LOWEST_PRIO << KVM_PRIO_SHIFT) /* 0xf8 */ -#define IRQ_DEFAULT_PRIO (LOWEST_PRIO - 1) -#define IRQ_DEFAULT_PRIO_REG (IRQ_DEFAULT_PRIO << KVM_PRIO_SHIFT) /* 0xf0 */ - -/* - * The kvm_inject_* utilities are used by the guest to ask the host to inject - * interrupts (e.g., using the KVM_IRQ_LINE ioctl). - */ - -typedef enum { - KVM_INJECT_EDGE_IRQ_LINE = 1, - KVM_SET_IRQ_LINE, - KVM_SET_IRQ_LINE_HIGH, - KVM_SET_LEVEL_INFO_HIGH, - KVM_INJECT_IRQFD, - KVM_WRITE_ISPENDR, - KVM_WRITE_ISACTIVER, -} kvm_inject_cmd; - -struct kvm_inject_args { - kvm_inject_cmd cmd; - uint32_t first_intid; - uint32_t num; - int level; - bool expect_failure; -}; - -/* Used on the guest side to perform the hypercall. */ -static void kvm_inject_call(kvm_inject_cmd cmd, uint32_t first_intid, - uint32_t num, int level, bool expect_failure); - -/* Used on the host side to get the hypercall info. */ -static void kvm_inject_get_call(struct kvm_vm *vm, struct ucall *uc, - struct kvm_inject_args *args); - -#define _KVM_INJECT_MULTI(cmd, intid, num, expect_failure) \ - kvm_inject_call(cmd, intid, num, -1 /* not used */, expect_failure) - -#define KVM_INJECT_MULTI(cmd, intid, num) \ - _KVM_INJECT_MULTI(cmd, intid, num, false) - -#define _KVM_INJECT(cmd, intid, expect_failure) \ - _KVM_INJECT_MULTI(cmd, intid, 1, expect_failure) - -#define KVM_INJECT(cmd, intid) \ - _KVM_INJECT_MULTI(cmd, intid, 1, false) - -#define KVM_ACTIVATE(cmd, intid) \ - kvm_inject_call(cmd, intid, 1, 1, false); - -struct kvm_inject_desc { - kvm_inject_cmd cmd; - /* can inject PPIs, PPIs, and/or SPIs. */ - bool sgi, ppi, spi; -}; - -static struct kvm_inject_desc inject_edge_fns[] = { - /* sgi ppi spi */ - { KVM_INJECT_EDGE_IRQ_LINE, false, false, true }, - { KVM_INJECT_IRQFD, false, false, true }, - { KVM_WRITE_ISPENDR, true, false, true }, - { 0, }, -}; - -static struct kvm_inject_desc inject_level_fns[] = { - /* sgi ppi spi */ - { KVM_SET_IRQ_LINE_HIGH, false, true, true }, - { KVM_SET_LEVEL_INFO_HIGH, false, true, true }, - { KVM_INJECT_IRQFD, false, false, true }, - { KVM_WRITE_ISPENDR, false, true, true }, - { 0, }, -}; - -static struct kvm_inject_desc set_active_fns[] = { - /* sgi ppi spi */ - { KVM_WRITE_ISACTIVER, true, true, true }, - { 0, }, -}; - -#define for_each_inject_fn(t, f) \ - for ((f) = (t); (f)->cmd; (f)++) - -#define for_each_supported_inject_fn(args, t, f) \ - for_each_inject_fn(t, f) \ - if ((args)->kvm_supports_irqfd || (f)->cmd != KVM_INJECT_IRQFD) - -#define for_each_supported_activate_fn(args, t, f) \ - for_each_supported_inject_fn((args), (t), (f)) - -/* Shared between the guest main thread and the IRQ handlers. */ -volatile uint64_t irq_handled; -volatile uint32_t irqnr_received[MAX_SPI + 1]; - -static void reset_stats(void) -{ - int i; - - irq_handled = 0; - for (i = 0; i <= MAX_SPI; i++) - irqnr_received[i] = 0; -} - -static uint64_t gic_read_ap1r0(void) -{ - uint64_t reg = read_sysreg_s(SYS_ICC_AP1R0_EL1); - - dsb(sy); - return reg; -} - -static void gic_write_ap1r0(uint64_t val) -{ - write_sysreg_s(val, SYS_ICC_AP1R0_EL1); - isb(); -} - -static void guest_set_irq_line(uint32_t intid, uint32_t level); - -static void guest_irq_generic_handler(bool eoi_split, bool level_sensitive) -{ - uint32_t intid = gic_get_and_ack_irq(); - - if (intid == IAR_SPURIOUS) - return; - - GUEST_ASSERT(gic_irq_get_active(intid)); - - if (!level_sensitive) - GUEST_ASSERT(!gic_irq_get_pending(intid)); - - if (level_sensitive) - guest_set_irq_line(intid, 0); - - GUEST_ASSERT(intid < MAX_SPI); - irqnr_received[intid] += 1; - irq_handled += 1; - - gic_set_eoi(intid); - GUEST_ASSERT_EQ(gic_read_ap1r0(), 0); - if (eoi_split) - gic_set_dir(intid); - - GUEST_ASSERT(!gic_irq_get_active(intid)); - GUEST_ASSERT(!gic_irq_get_pending(intid)); -} - -static void kvm_inject_call(kvm_inject_cmd cmd, uint32_t first_intid, - uint32_t num, int level, bool expect_failure) -{ - struct kvm_inject_args args = { - .cmd = cmd, - .first_intid = first_intid, - .num = num, - .level = level, - .expect_failure = expect_failure, - }; - GUEST_SYNC(&args); -} - -#define GUEST_ASSERT_IAR_EMPTY() \ -do { \ - uint32_t _intid; \ - _intid = gic_get_and_ack_irq(); \ - GUEST_ASSERT(_intid == 0 || _intid == IAR_SPURIOUS); \ -} while (0) - -#define CAT_HELPER(a, b) a ## b -#define CAT(a, b) CAT_HELPER(a, b) -#define PREFIX guest_irq_handler_ -#define GUEST_IRQ_HANDLER_NAME(split, lev) CAT(PREFIX, CAT(split, lev)) -#define GENERATE_GUEST_IRQ_HANDLER(split, lev) \ -static void CAT(PREFIX, CAT(split, lev))(struct ex_regs *regs) \ -{ \ - guest_irq_generic_handler(split, lev); \ -} - -GENERATE_GUEST_IRQ_HANDLER(0, 0); -GENERATE_GUEST_IRQ_HANDLER(0, 1); -GENERATE_GUEST_IRQ_HANDLER(1, 0); -GENERATE_GUEST_IRQ_HANDLER(1, 1); - -static void (*guest_irq_handlers[2][2])(struct ex_regs *) = { - {GUEST_IRQ_HANDLER_NAME(0, 0), GUEST_IRQ_HANDLER_NAME(0, 1),}, - {GUEST_IRQ_HANDLER_NAME(1, 0), GUEST_IRQ_HANDLER_NAME(1, 1),}, -}; - -static void reset_priorities(struct test_args *args) -{ - int i; - - for (i = 0; i < args->nr_irqs; i++) - gic_set_priority(i, IRQ_DEFAULT_PRIO_REG); -} - -static void guest_set_irq_line(uint32_t intid, uint32_t level) -{ - kvm_inject_call(KVM_SET_IRQ_LINE, intid, 1, level, false); -} - -static void test_inject_fail(struct test_args *args, - uint32_t intid, kvm_inject_cmd cmd) -{ - reset_stats(); - - _KVM_INJECT(cmd, intid, true); - /* no IRQ to handle on entry */ - - GUEST_ASSERT_EQ(irq_handled, 0); - GUEST_ASSERT_IAR_EMPTY(); -} - -static void guest_inject(struct test_args *args, - uint32_t first_intid, uint32_t num, - kvm_inject_cmd cmd) -{ - uint32_t i; - - reset_stats(); - - /* Cycle over all priorities to make things more interesting. */ - for (i = first_intid; i < num + first_intid; i++) - gic_set_priority(i, (i % (KVM_NUM_PRIOS - 1)) << 3); - - asm volatile("msr daifset, #2" : : : "memory"); - KVM_INJECT_MULTI(cmd, first_intid, num); - - while (irq_handled < num) { - wfi(); - local_irq_enable(); - isb(); /* handle IRQ */ - local_irq_disable(); - } - local_irq_enable(); - - GUEST_ASSERT_EQ(irq_handled, num); - for (i = first_intid; i < num + first_intid; i++) - GUEST_ASSERT_EQ(irqnr_received[i], 1); - GUEST_ASSERT_IAR_EMPTY(); - - reset_priorities(args); -} - -/* - * Restore the active state of multiple concurrent IRQs (given by - * concurrent_irqs). This does what a live-migration would do on the - * destination side assuming there are some active IRQs that were not - * deactivated yet. - */ -static void guest_restore_active(struct test_args *args, - uint32_t first_intid, uint32_t num, - kvm_inject_cmd cmd) -{ - uint32_t prio, intid, ap1r; - int i; - - /* - * Set the priorities of the first (KVM_NUM_PRIOS - 1) IRQs - * in descending order, so intid+1 can preempt intid. - */ - for (i = 0, prio = (num - 1) * 8; i < num; i++, prio -= 8) { - GUEST_ASSERT(prio >= 0); - intid = i + first_intid; - gic_set_priority(intid, prio); - } - - /* - * In a real migration, KVM would restore all GIC state before running - * guest code. - */ - for (i = 0; i < num; i++) { - intid = i + first_intid; - KVM_ACTIVATE(cmd, intid); - ap1r = gic_read_ap1r0(); - ap1r |= 1U << i; - gic_write_ap1r0(ap1r); - } - - /* This is where the "migration" would occur. */ - - /* finish handling the IRQs starting with the highest priority one. */ - for (i = 0; i < num; i++) { - intid = num - i - 1 + first_intid; - gic_set_eoi(intid); - if (args->eoi_split) - gic_set_dir(intid); - } - - for (i = 0; i < num; i++) - GUEST_ASSERT(!gic_irq_get_active(i + first_intid)); - GUEST_ASSERT_EQ(gic_read_ap1r0(), 0); - GUEST_ASSERT_IAR_EMPTY(); -} - -/* - * Polls the IAR until it's not a spurious interrupt. - * - * This function should only be used in test_inject_preemption (with IRQs - * masked). - */ -static uint32_t wait_for_and_activate_irq(void) -{ - uint32_t intid; - - do { - asm volatile("wfi" : : : "memory"); - intid = gic_get_and_ack_irq(); - } while (intid == IAR_SPURIOUS); - - return intid; -} - -/* - * Inject multiple concurrent IRQs (num IRQs starting at first_intid) and - * handle them without handling the actual exceptions. This is done by masking - * interrupts for the whole test. - */ -static void test_inject_preemption(struct test_args *args, - uint32_t first_intid, int num, - kvm_inject_cmd cmd) -{ - uint32_t intid, prio, step = KVM_PRIO_STEPS; - int i; - - /* Set the priorities of the first (KVM_NUM_PRIOS - 1) IRQs - * in descending order, so intid+1 can preempt intid. - */ - for (i = 0, prio = (num - 1) * step; i < num; i++, prio -= step) { - GUEST_ASSERT(prio >= 0); - intid = i + first_intid; - gic_set_priority(intid, prio); - } - - local_irq_disable(); - - for (i = 0; i < num; i++) { - uint32_t tmp; - intid = i + first_intid; - KVM_INJECT(cmd, intid); - /* Each successive IRQ will preempt the previous one. */ - tmp = wait_for_and_activate_irq(); - GUEST_ASSERT_EQ(tmp, intid); - if (args->level_sensitive) - guest_set_irq_line(intid, 0); - } - - /* finish handling the IRQs starting with the highest priority one. */ - for (i = 0; i < num; i++) { - intid = num - i - 1 + first_intid; - gic_set_eoi(intid); - if (args->eoi_split) - gic_set_dir(intid); - } - - local_irq_enable(); - - for (i = 0; i < num; i++) - GUEST_ASSERT(!gic_irq_get_active(i + first_intid)); - GUEST_ASSERT_EQ(gic_read_ap1r0(), 0); - GUEST_ASSERT_IAR_EMPTY(); - - reset_priorities(args); -} - -static void test_injection(struct test_args *args, struct kvm_inject_desc *f) -{ - uint32_t nr_irqs = args->nr_irqs; - - if (f->sgi) { - guest_inject(args, MIN_SGI, 1, f->cmd); - guest_inject(args, 0, 16, f->cmd); - } - - if (f->ppi) - guest_inject(args, MIN_PPI, 1, f->cmd); - - if (f->spi) { - guest_inject(args, MIN_SPI, 1, f->cmd); - guest_inject(args, nr_irqs - 1, 1, f->cmd); - guest_inject(args, MIN_SPI, nr_irqs - MIN_SPI, f->cmd); - } -} - -static void test_injection_failure(struct test_args *args, - struct kvm_inject_desc *f) -{ - uint32_t bad_intid[] = { args->nr_irqs, 1020, 1024, 1120, 5120, ~0U, }; - int i; - - for (i = 0; i < ARRAY_SIZE(bad_intid); i++) - test_inject_fail(args, bad_intid[i], f->cmd); -} - -static void test_preemption(struct test_args *args, struct kvm_inject_desc *f) -{ - /* - * Test up to 4 levels of preemption. The reason is that KVM doesn't - * currently implement the ability to have more than the number-of-LRs - * number of concurrently active IRQs. The number of LRs implemented is - * IMPLEMENTATION DEFINED, however, it seems that most implement 4. - */ - if (f->sgi) - test_inject_preemption(args, MIN_SGI, 4, f->cmd); - - if (f->ppi) - test_inject_preemption(args, MIN_PPI, 4, f->cmd); - - if (f->spi) - test_inject_preemption(args, MIN_SPI, 4, f->cmd); -} - -static void test_restore_active(struct test_args *args, struct kvm_inject_desc *f) -{ - /* Test up to 4 active IRQs. Same reason as in test_preemption. */ - if (f->sgi) - guest_restore_active(args, MIN_SGI, 4, f->cmd); - - if (f->ppi) - guest_restore_active(args, MIN_PPI, 4, f->cmd); - - if (f->spi) - guest_restore_active(args, MIN_SPI, 4, f->cmd); -} - -static void guest_code(struct test_args *args) -{ - uint32_t i, nr_irqs = args->nr_irqs; - bool level_sensitive = args->level_sensitive; - struct kvm_inject_desc *f, *inject_fns; - - gic_init(GIC_V3, 1); - - for (i = 0; i < nr_irqs; i++) - gic_irq_enable(i); - - for (i = MIN_SPI; i < nr_irqs; i++) - gic_irq_set_config(i, !level_sensitive); - - gic_set_eoi_split(args->eoi_split); - - reset_priorities(args); - gic_set_priority_mask(CPU_PRIO_MASK); - - inject_fns = level_sensitive ? inject_level_fns - : inject_edge_fns; - - local_irq_enable(); - - /* Start the tests. */ - for_each_supported_inject_fn(args, inject_fns, f) { - test_injection(args, f); - test_preemption(args, f); - test_injection_failure(args, f); - } - - /* - * Restore the active state of IRQs. This would happen when live - * migrating IRQs in the middle of being handled. - */ - for_each_supported_activate_fn(args, set_active_fns, f) - test_restore_active(args, f); - - GUEST_DONE(); -} - -static void kvm_irq_line_check(struct kvm_vm *vm, uint32_t intid, int level, - struct test_args *test_args, bool expect_failure) -{ - int ret; - - if (!expect_failure) { - kvm_arm_irq_line(vm, intid, level); - } else { - /* The interface doesn't allow larger intid's. */ - if (intid > KVM_ARM_IRQ_NUM_MASK) - return; - - ret = _kvm_arm_irq_line(vm, intid, level); - TEST_ASSERT(ret != 0 && errno == EINVAL, - "Bad intid %i did not cause KVM_IRQ_LINE " - "error: rc: %i errno: %i", intid, ret, errno); - } -} - -void kvm_irq_set_level_info_check(int gic_fd, uint32_t intid, int level, - bool expect_failure) -{ - if (!expect_failure) { - kvm_irq_set_level_info(gic_fd, intid, level); - } else { - int ret = _kvm_irq_set_level_info(gic_fd, intid, level); - /* - * The kernel silently fails for invalid SPIs and SGIs (which - * are not level-sensitive). It only checks for intid to not - * spill over 1U << 10 (the max reserved SPI). Also, callers - * are supposed to mask the intid with 0x3ff (1023). - */ - if (intid > VGIC_MAX_RESERVED) - TEST_ASSERT(ret != 0 && errno == EINVAL, - "Bad intid %i did not cause VGIC_GRP_LEVEL_INFO " - "error: rc: %i errno: %i", intid, ret, errno); - else - TEST_ASSERT(!ret, "KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO " - "for intid %i failed, rc: %i errno: %i", - intid, ret, errno); - } -} - -static void kvm_set_gsi_routing_irqchip_check(struct kvm_vm *vm, - uint32_t intid, uint32_t num, uint32_t kvm_max_routes, - bool expect_failure) -{ - struct kvm_irq_routing *routing; - int ret; - uint64_t i; - - assert(num <= kvm_max_routes && kvm_max_routes <= KVM_MAX_IRQ_ROUTES); - - routing = kvm_gsi_routing_create(); - for (i = intid; i < (uint64_t)intid + num; i++) - kvm_gsi_routing_irqchip_add(routing, i - MIN_SPI, i - MIN_SPI); - - if (!expect_failure) { - kvm_gsi_routing_write(vm, routing); - } else { - ret = _kvm_gsi_routing_write(vm, routing); - /* The kernel only checks e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS */ - if (((uint64_t)intid + num - 1 - MIN_SPI) >= KVM_IRQCHIP_NUM_PINS) - TEST_ASSERT(ret != 0 && errno == EINVAL, - "Bad intid %u did not cause KVM_SET_GSI_ROUTING " - "error: rc: %i errno: %i", intid, ret, errno); - else - TEST_ASSERT(ret == 0, "KVM_SET_GSI_ROUTING " - "for intid %i failed, rc: %i errno: %i", - intid, ret, errno); - } -} - -static void kvm_irq_write_ispendr_check(int gic_fd, uint32_t intid, - struct kvm_vcpu *vcpu, - bool expect_failure) -{ - /* - * Ignore this when expecting failure as invalid intids will lead to - * either trying to inject SGIs when we configured the test to be - * level_sensitive (or the reverse), or inject large intids which - * will lead to writing above the ISPENDR register space (and we - * don't want to do that either). - */ - if (!expect_failure) - kvm_irq_write_ispendr(gic_fd, intid, vcpu); -} - -static void kvm_routing_and_irqfd_check(struct kvm_vm *vm, - uint32_t intid, uint32_t num, uint32_t kvm_max_routes, - bool expect_failure) -{ - int fd[MAX_SPI]; - uint64_t val; - int ret, f; - uint64_t i; - - /* - * There is no way to try injecting an SGI or PPI as the interface - * starts counting from the first SPI (above the private ones), so just - * exit. - */ - if (INTID_IS_SGI(intid) || INTID_IS_PPI(intid)) - return; - - kvm_set_gsi_routing_irqchip_check(vm, intid, num, - kvm_max_routes, expect_failure); - - /* - * If expect_failure, then just to inject anyway. These - * will silently fail. And in any case, the guest will check - * that no actual interrupt was injected for those cases. - */ - - for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) { - fd[f] = eventfd(0, 0); - TEST_ASSERT(fd[f] != -1, __KVM_SYSCALL_ERROR("eventfd()", fd[f])); - } - - for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) { - struct kvm_irqfd irqfd = { - .fd = fd[f], - .gsi = i - MIN_SPI, - }; - assert(i <= (uint64_t)UINT_MAX); - vm_ioctl(vm, KVM_IRQFD, &irqfd); - } - - for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) { - val = 1; - ret = write(fd[f], &val, sizeof(uint64_t)); - TEST_ASSERT(ret == sizeof(uint64_t), - __KVM_SYSCALL_ERROR("write()", ret)); - } - - for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) - close(fd[f]); -} - -/* handles the valid case: intid=0xffffffff num=1 */ -#define for_each_intid(first, num, tmp, i) \ - for ((tmp) = (i) = (first); \ - (tmp) < (uint64_t)(first) + (uint64_t)(num); \ - (tmp)++, (i)++) - -static void run_guest_cmd(struct kvm_vcpu *vcpu, int gic_fd, - struct kvm_inject_args *inject_args, - struct test_args *test_args) -{ - kvm_inject_cmd cmd = inject_args->cmd; - uint32_t intid = inject_args->first_intid; - uint32_t num = inject_args->num; - int level = inject_args->level; - bool expect_failure = inject_args->expect_failure; - struct kvm_vm *vm = vcpu->vm; - uint64_t tmp; - uint32_t i; - - /* handles the valid case: intid=0xffffffff num=1 */ - assert(intid < UINT_MAX - num || num == 1); - - switch (cmd) { - case KVM_INJECT_EDGE_IRQ_LINE: - for_each_intid(intid, num, tmp, i) - kvm_irq_line_check(vm, i, 1, test_args, - expect_failure); - for_each_intid(intid, num, tmp, i) - kvm_irq_line_check(vm, i, 0, test_args, - expect_failure); - break; - case KVM_SET_IRQ_LINE: - for_each_intid(intid, num, tmp, i) - kvm_irq_line_check(vm, i, level, test_args, - expect_failure); - break; - case KVM_SET_IRQ_LINE_HIGH: - for_each_intid(intid, num, tmp, i) - kvm_irq_line_check(vm, i, 1, test_args, - expect_failure); - break; - case KVM_SET_LEVEL_INFO_HIGH: - for_each_intid(intid, num, tmp, i) - kvm_irq_set_level_info_check(gic_fd, i, 1, - expect_failure); - break; - case KVM_INJECT_IRQFD: - kvm_routing_and_irqfd_check(vm, intid, num, - test_args->kvm_max_routes, - expect_failure); - break; - case KVM_WRITE_ISPENDR: - for (i = intid; i < intid + num; i++) - kvm_irq_write_ispendr_check(gic_fd, i, vcpu, - expect_failure); - break; - case KVM_WRITE_ISACTIVER: - for (i = intid; i < intid + num; i++) - kvm_irq_write_isactiver(gic_fd, i, vcpu); - break; - default: - break; - } -} - -static void kvm_inject_get_call(struct kvm_vm *vm, struct ucall *uc, - struct kvm_inject_args *args) -{ - struct kvm_inject_args *kvm_args_hva; - vm_vaddr_t kvm_args_gva; - - kvm_args_gva = uc->args[1]; - kvm_args_hva = (struct kvm_inject_args *)addr_gva2hva(vm, kvm_args_gva); - memcpy(args, kvm_args_hva, sizeof(struct kvm_inject_args)); -} - -static void print_args(struct test_args *args) -{ - printf("nr-irqs=%d level-sensitive=%d eoi-split=%d\n", - args->nr_irqs, args->level_sensitive, - args->eoi_split); -} - -static void test_vgic(uint32_t nr_irqs, bool level_sensitive, bool eoi_split) -{ - struct ucall uc; - int gic_fd; - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - struct kvm_inject_args inject_args; - vm_vaddr_t args_gva; - - struct test_args args = { - .nr_irqs = nr_irqs, - .level_sensitive = level_sensitive, - .eoi_split = eoi_split, - .kvm_max_routes = kvm_check_cap(KVM_CAP_IRQ_ROUTING), - .kvm_supports_irqfd = kvm_check_cap(KVM_CAP_IRQFD), - }; - - print_args(&args); - - vm = vm_create_with_one_vcpu(&vcpu, guest_code); - - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vcpu); - - /* Setup the guest args page (so it gets the args). */ - args_gva = vm_vaddr_alloc_page(vm); - memcpy(addr_gva2hva(vm, args_gva), &args, sizeof(args)); - vcpu_args_set(vcpu, 1, args_gva); - - gic_fd = vgic_v3_setup(vm, 1, nr_irqs); - __TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3, skipping"); - - vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT, - guest_irq_handlers[args.eoi_split][args.level_sensitive]); - - while (1) { - vcpu_run(vcpu); - - switch (get_ucall(vcpu, &uc)) { - case UCALL_SYNC: - kvm_inject_get_call(vm, &uc, &inject_args); - run_guest_cmd(vcpu, gic_fd, &inject_args, &args); - break; - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - break; - case UCALL_DONE: - goto done; - default: - TEST_FAIL("Unknown ucall %lu", uc.cmd); - } - } - -done: - close(gic_fd); - kvm_vm_free(vm); -} - -static void help(const char *name) -{ - printf( - "\n" - "usage: %s [-n num_irqs] [-e eoi_split] [-l level_sensitive]\n", name); - printf(" -n: specify number of IRQs to setup the vgic with. " - "It has to be a multiple of 32 and between 64 and 1024.\n"); - printf(" -e: if 1 then EOI is split into a write to DIR on top " - "of writing EOI.\n"); - printf(" -l: specify whether the IRQs are level-sensitive (1) or not (0)."); - puts(""); - exit(1); -} - -int main(int argc, char **argv) -{ - uint32_t nr_irqs = 64; - bool default_args = true; - bool level_sensitive = false; - int opt; - bool eoi_split = false; - - while ((opt = getopt(argc, argv, "hn:e:l:")) != -1) { - switch (opt) { - case 'n': - nr_irqs = atoi_non_negative("Number of IRQs", optarg); - if (nr_irqs > 1024 || nr_irqs % 32) - help(argv[0]); - break; - case 'e': - eoi_split = (bool)atoi_paranoid(optarg); - default_args = false; - break; - case 'l': - level_sensitive = (bool)atoi_paranoid(optarg); - default_args = false; - break; - case 'h': - default: - help(argv[0]); - break; - } - } - - /* - * If the user just specified nr_irqs and/or gic_version, then run all - * combinations. - */ - if (default_args) { - test_vgic(nr_irqs, false /* level */, false /* eoi_split */); - test_vgic(nr_irqs, false /* level */, true /* eoi_split */); - test_vgic(nr_irqs, true /* level */, false /* eoi_split */); - test_vgic(nr_irqs, true /* level */, true /* eoi_split */); - } else { - test_vgic(nr_irqs, level_sensitive, eoi_split); - } - - return 0; -} diff --git a/tools/testing/selftests/kvm/aarch64/vgic_lpi_stress.c b/tools/testing/selftests/kvm/aarch64/vgic_lpi_stress.c deleted file mode 100644 index fc4fe52fb6f8..000000000000 --- a/tools/testing/selftests/kvm/aarch64/vgic_lpi_stress.c +++ /dev/null @@ -1,410 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * vgic_lpi_stress - Stress test for KVM's ITS emulation - * - * Copyright (c) 2024 Google LLC - */ - -#include -#include -#include -#include - -#include "kvm_util.h" -#include "gic.h" -#include "gic_v3.h" -#include "gic_v3_its.h" -#include "processor.h" -#include "ucall.h" -#include "vgic.h" - -#define TEST_MEMSLOT_INDEX 1 - -#define GIC_LPI_OFFSET 8192 - -static size_t nr_iterations = 1000; -static vm_paddr_t gpa_base; - -static struct kvm_vm *vm; -static struct kvm_vcpu **vcpus; -static int gic_fd, its_fd; - -static struct test_data { - bool request_vcpus_stop; - u32 nr_cpus; - u32 nr_devices; - u32 nr_event_ids; - - vm_paddr_t device_table; - vm_paddr_t collection_table; - vm_paddr_t cmdq_base; - void *cmdq_base_va; - vm_paddr_t itt_tables; - - vm_paddr_t lpi_prop_table; - vm_paddr_t lpi_pend_tables; -} test_data = { - .nr_cpus = 1, - .nr_devices = 1, - .nr_event_ids = 16, -}; - -static void guest_irq_handler(struct ex_regs *regs) -{ - u32 intid = gic_get_and_ack_irq(); - - if (intid == IAR_SPURIOUS) - return; - - GUEST_ASSERT(intid >= GIC_LPI_OFFSET); - gic_set_eoi(intid); -} - -static void guest_setup_its_mappings(void) -{ - u32 coll_id, device_id, event_id, intid = GIC_LPI_OFFSET; - u32 nr_events = test_data.nr_event_ids; - u32 nr_devices = test_data.nr_devices; - u32 nr_cpus = test_data.nr_cpus; - - for (coll_id = 0; coll_id < nr_cpus; coll_id++) - its_send_mapc_cmd(test_data.cmdq_base_va, coll_id, coll_id, true); - - /* Round-robin the LPIs to all of the vCPUs in the VM */ - coll_id = 0; - for (device_id = 0; device_id < nr_devices; device_id++) { - vm_paddr_t itt_base = test_data.itt_tables + (device_id * SZ_64K); - - its_send_mapd_cmd(test_data.cmdq_base_va, device_id, - itt_base, SZ_64K, true); - - for (event_id = 0; event_id < nr_events; event_id++) { - its_send_mapti_cmd(test_data.cmdq_base_va, device_id, - event_id, coll_id, intid++); - - coll_id = (coll_id + 1) % test_data.nr_cpus; - } - } -} - -static void guest_invalidate_all_rdists(void) -{ - int i; - - for (i = 0; i < test_data.nr_cpus; i++) - its_send_invall_cmd(test_data.cmdq_base_va, i); -} - -static void guest_setup_gic(void) -{ - static atomic_int nr_cpus_ready = 0; - u32 cpuid = guest_get_vcpuid(); - - gic_init(GIC_V3, test_data.nr_cpus); - gic_rdist_enable_lpis(test_data.lpi_prop_table, SZ_64K, - test_data.lpi_pend_tables + (cpuid * SZ_64K)); - - atomic_fetch_add(&nr_cpus_ready, 1); - - if (cpuid > 0) - return; - - while (atomic_load(&nr_cpus_ready) < test_data.nr_cpus) - cpu_relax(); - - its_init(test_data.collection_table, SZ_64K, - test_data.device_table, SZ_64K, - test_data.cmdq_base, SZ_64K); - - guest_setup_its_mappings(); - guest_invalidate_all_rdists(); -} - -static void guest_code(size_t nr_lpis) -{ - guest_setup_gic(); - - GUEST_SYNC(0); - - /* - * Don't use WFI here to avoid blocking the vCPU thread indefinitely and - * never getting the stop signal. - */ - while (!READ_ONCE(test_data.request_vcpus_stop)) - cpu_relax(); - - GUEST_DONE(); -} - -static void setup_memslot(void) -{ - size_t pages; - size_t sz; - - /* - * For the ITS: - * - A single level device table - * - A single level collection table - * - The command queue - * - An ITT for each device - */ - sz = (3 + test_data.nr_devices) * SZ_64K; - - /* - * For the redistributors: - * - A shared LPI configuration table - * - An LPI pending table for each vCPU - */ - sz += (1 + test_data.nr_cpus) * SZ_64K; - - pages = sz / vm->page_size; - gpa_base = ((vm_compute_max_gfn(vm) + 1) * vm->page_size) - sz; - vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, gpa_base, - TEST_MEMSLOT_INDEX, pages, 0); -} - -#define LPI_PROP_DEFAULT_PRIO 0xa0 - -static void configure_lpis(void) -{ - size_t nr_lpis = test_data.nr_devices * test_data.nr_event_ids; - u8 *tbl = addr_gpa2hva(vm, test_data.lpi_prop_table); - size_t i; - - for (i = 0; i < nr_lpis; i++) { - tbl[i] = LPI_PROP_DEFAULT_PRIO | - LPI_PROP_GROUP1 | - LPI_PROP_ENABLED; - } -} - -static void setup_test_data(void) -{ - size_t pages_per_64k = vm_calc_num_guest_pages(vm->mode, SZ_64K); - u32 nr_devices = test_data.nr_devices; - u32 nr_cpus = test_data.nr_cpus; - vm_paddr_t cmdq_base; - - test_data.device_table = vm_phy_pages_alloc(vm, pages_per_64k, - gpa_base, - TEST_MEMSLOT_INDEX); - - test_data.collection_table = vm_phy_pages_alloc(vm, pages_per_64k, - gpa_base, - TEST_MEMSLOT_INDEX); - - cmdq_base = vm_phy_pages_alloc(vm, pages_per_64k, gpa_base, - TEST_MEMSLOT_INDEX); - virt_map(vm, cmdq_base, cmdq_base, pages_per_64k); - test_data.cmdq_base = cmdq_base; - test_data.cmdq_base_va = (void *)cmdq_base; - - test_data.itt_tables = vm_phy_pages_alloc(vm, pages_per_64k * nr_devices, - gpa_base, TEST_MEMSLOT_INDEX); - - test_data.lpi_prop_table = vm_phy_pages_alloc(vm, pages_per_64k, - gpa_base, TEST_MEMSLOT_INDEX); - configure_lpis(); - - test_data.lpi_pend_tables = vm_phy_pages_alloc(vm, pages_per_64k * nr_cpus, - gpa_base, TEST_MEMSLOT_INDEX); - - sync_global_to_guest(vm, test_data); -} - -static void setup_gic(void) -{ - gic_fd = vgic_v3_setup(vm, test_data.nr_cpus, 64); - __TEST_REQUIRE(gic_fd >= 0, "Failed to create GICv3"); - - its_fd = vgic_its_setup(vm); -} - -static void signal_lpi(u32 device_id, u32 event_id) -{ - vm_paddr_t db_addr = GITS_BASE_GPA + GITS_TRANSLATER; - - struct kvm_msi msi = { - .address_lo = db_addr, - .address_hi = db_addr >> 32, - .data = event_id, - .devid = device_id, - .flags = KVM_MSI_VALID_DEVID, - }; - - /* - * KVM_SIGNAL_MSI returns 1 if the MSI wasn't 'blocked' by the VM, - * which for arm64 implies having a valid translation in the ITS. - */ - TEST_ASSERT(__vm_ioctl(vm, KVM_SIGNAL_MSI, &msi) == 1, - "KVM_SIGNAL_MSI ioctl failed"); -} - -static pthread_barrier_t test_setup_barrier; - -static void *lpi_worker_thread(void *data) -{ - u32 device_id = (size_t)data; - u32 event_id; - size_t i; - - pthread_barrier_wait(&test_setup_barrier); - - for (i = 0; i < nr_iterations; i++) - for (event_id = 0; event_id < test_data.nr_event_ids; event_id++) - signal_lpi(device_id, event_id); - - return NULL; -} - -static void *vcpu_worker_thread(void *data) -{ - struct kvm_vcpu *vcpu = data; - struct ucall uc; - - while (true) { - vcpu_run(vcpu); - - switch (get_ucall(vcpu, &uc)) { - case UCALL_SYNC: - pthread_barrier_wait(&test_setup_barrier); - continue; - case UCALL_DONE: - return NULL; - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - break; - default: - TEST_FAIL("Unknown ucall: %lu", uc.cmd); - } - } - - return NULL; -} - -static void report_stats(struct timespec delta) -{ - double nr_lpis; - double time; - - nr_lpis = test_data.nr_devices * test_data.nr_event_ids * nr_iterations; - - time = delta.tv_sec; - time += ((double)delta.tv_nsec) / NSEC_PER_SEC; - - pr_info("Rate: %.2f LPIs/sec\n", nr_lpis / time); -} - -static void run_test(void) -{ - u32 nr_devices = test_data.nr_devices; - u32 nr_vcpus = test_data.nr_cpus; - pthread_t *lpi_threads = malloc(nr_devices * sizeof(pthread_t)); - pthread_t *vcpu_threads = malloc(nr_vcpus * sizeof(pthread_t)); - struct timespec start, delta; - size_t i; - - TEST_ASSERT(lpi_threads && vcpu_threads, "Failed to allocate pthread arrays"); - - pthread_barrier_init(&test_setup_barrier, NULL, nr_vcpus + nr_devices + 1); - - for (i = 0; i < nr_vcpus; i++) - pthread_create(&vcpu_threads[i], NULL, vcpu_worker_thread, vcpus[i]); - - for (i = 0; i < nr_devices; i++) - pthread_create(&lpi_threads[i], NULL, lpi_worker_thread, (void *)i); - - pthread_barrier_wait(&test_setup_barrier); - - clock_gettime(CLOCK_MONOTONIC, &start); - - for (i = 0; i < nr_devices; i++) - pthread_join(lpi_threads[i], NULL); - - delta = timespec_elapsed(start); - write_guest_global(vm, test_data.request_vcpus_stop, true); - - for (i = 0; i < nr_vcpus; i++) - pthread_join(vcpu_threads[i], NULL); - - report_stats(delta); -} - -static void setup_vm(void) -{ - int i; - - vcpus = malloc(test_data.nr_cpus * sizeof(struct kvm_vcpu)); - TEST_ASSERT(vcpus, "Failed to allocate vCPU array"); - - vm = vm_create_with_vcpus(test_data.nr_cpus, guest_code, vcpus); - - vm_init_descriptor_tables(vm); - for (i = 0; i < test_data.nr_cpus; i++) - vcpu_init_descriptor_tables(vcpus[i]); - - vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT, guest_irq_handler); - - setup_memslot(); - - setup_gic(); - - setup_test_data(); -} - -static void destroy_vm(void) -{ - close(its_fd); - close(gic_fd); - kvm_vm_free(vm); - free(vcpus); -} - -static void pr_usage(const char *name) -{ - pr_info("%s [-v NR_VCPUS] [-d NR_DEVICES] [-e NR_EVENTS] [-i ITERS] -h\n", name); - pr_info(" -v:\tnumber of vCPUs (default: %u)\n", test_data.nr_cpus); - pr_info(" -d:\tnumber of devices (default: %u)\n", test_data.nr_devices); - pr_info(" -e:\tnumber of event IDs per device (default: %u)\n", test_data.nr_event_ids); - pr_info(" -i:\tnumber of iterations (default: %lu)\n", nr_iterations); -} - -int main(int argc, char **argv) -{ - u32 nr_threads; - int c; - - while ((c = getopt(argc, argv, "hv:d:e:i:")) != -1) { - switch (c) { - case 'v': - test_data.nr_cpus = atoi(optarg); - break; - case 'd': - test_data.nr_devices = atoi(optarg); - break; - case 'e': - test_data.nr_event_ids = atoi(optarg); - break; - case 'i': - nr_iterations = strtoul(optarg, NULL, 0); - break; - case 'h': - default: - pr_usage(argv[0]); - return 1; - } - } - - nr_threads = test_data.nr_cpus + test_data.nr_devices; - if (nr_threads > get_nprocs()) - pr_info("WARNING: running %u threads on %d CPUs; performance is degraded.\n", - nr_threads, get_nprocs()); - - setup_vm(); - - run_test(); - - destroy_vm(); - - return 0; -} diff --git a/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c b/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c deleted file mode 100644 index f16b3b27e32e..000000000000 --- a/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c +++ /dev/null @@ -1,648 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * vpmu_counter_access - Test vPMU event counter access - * - * Copyright (c) 2023 Google LLC. - * - * This test checks if the guest can see the same number of the PMU event - * counters (PMCR_EL0.N) that userspace sets, if the guest can access - * those counters, and if the guest is prevented from accessing any - * other counters. - * It also checks if the userspace accesses to the PMU regsisters honor the - * PMCR.N value that's set for the guest. - * This test runs only when KVM_CAP_ARM_PMU_V3 is supported on the host. - */ -#include -#include -#include -#include -#include -#include - -/* The max number of the PMU event counters (excluding the cycle counter) */ -#define ARMV8_PMU_MAX_GENERAL_COUNTERS (ARMV8_PMU_MAX_COUNTERS - 1) - -/* The cycle counter bit position that's common among the PMU registers */ -#define ARMV8_PMU_CYCLE_IDX 31 - -struct vpmu_vm { - struct kvm_vm *vm; - struct kvm_vcpu *vcpu; - int gic_fd; -}; - -static struct vpmu_vm vpmu_vm; - -struct pmreg_sets { - uint64_t set_reg_id; - uint64_t clr_reg_id; -}; - -#define PMREG_SET(set, clr) {.set_reg_id = set, .clr_reg_id = clr} - -static uint64_t get_pmcr_n(uint64_t pmcr) -{ - return FIELD_GET(ARMV8_PMU_PMCR_N, pmcr); -} - -static void set_pmcr_n(uint64_t *pmcr, uint64_t pmcr_n) -{ - u64p_replace_bits((__u64 *) pmcr, pmcr_n, ARMV8_PMU_PMCR_N); -} - -static uint64_t get_counters_mask(uint64_t n) -{ - uint64_t mask = BIT(ARMV8_PMU_CYCLE_IDX); - - if (n) - mask |= GENMASK(n - 1, 0); - return mask; -} - -/* Read PMEVTCNTR_EL0 through PMXEVCNTR_EL0 */ -static inline unsigned long read_sel_evcntr(int sel) -{ - write_sysreg(sel, pmselr_el0); - isb(); - return read_sysreg(pmxevcntr_el0); -} - -/* Write PMEVTCNTR_EL0 through PMXEVCNTR_EL0 */ -static inline void write_sel_evcntr(int sel, unsigned long val) -{ - write_sysreg(sel, pmselr_el0); - isb(); - write_sysreg(val, pmxevcntr_el0); - isb(); -} - -/* Read PMEVTYPER_EL0 through PMXEVTYPER_EL0 */ -static inline unsigned long read_sel_evtyper(int sel) -{ - write_sysreg(sel, pmselr_el0); - isb(); - return read_sysreg(pmxevtyper_el0); -} - -/* Write PMEVTYPER_EL0 through PMXEVTYPER_EL0 */ -static inline void write_sel_evtyper(int sel, unsigned long val) -{ - write_sysreg(sel, pmselr_el0); - isb(); - write_sysreg(val, pmxevtyper_el0); - isb(); -} - -static void pmu_disable_reset(void) -{ - uint64_t pmcr = read_sysreg(pmcr_el0); - - /* Reset all counters, disabling them */ - pmcr &= ~ARMV8_PMU_PMCR_E; - write_sysreg(pmcr | ARMV8_PMU_PMCR_P, pmcr_el0); - isb(); -} - -#define RETURN_READ_PMEVCNTRN(n) \ - return read_sysreg(pmevcntr##n##_el0) -static unsigned long read_pmevcntrn(int n) -{ - PMEVN_SWITCH(n, RETURN_READ_PMEVCNTRN); - return 0; -} - -#define WRITE_PMEVCNTRN(n) \ - write_sysreg(val, pmevcntr##n##_el0) -static void write_pmevcntrn(int n, unsigned long val) -{ - PMEVN_SWITCH(n, WRITE_PMEVCNTRN); - isb(); -} - -#define READ_PMEVTYPERN(n) \ - return read_sysreg(pmevtyper##n##_el0) -static unsigned long read_pmevtypern(int n) -{ - PMEVN_SWITCH(n, READ_PMEVTYPERN); - return 0; -} - -#define WRITE_PMEVTYPERN(n) \ - write_sysreg(val, pmevtyper##n##_el0) -static void write_pmevtypern(int n, unsigned long val) -{ - PMEVN_SWITCH(n, WRITE_PMEVTYPERN); - isb(); -} - -/* - * The pmc_accessor structure has pointers to PMEV{CNTR,TYPER}_EL0 - * accessors that test cases will use. Each of the accessors will - * either directly reads/writes PMEV{CNTR,TYPER}_EL0 - * (i.e. {read,write}_pmev{cnt,type}rn()), or reads/writes them through - * PMXEV{CNTR,TYPER}_EL0 (i.e. {read,write}_sel_ev{cnt,type}r()). - * - * This is used to test that combinations of those accessors provide - * the consistent behavior. - */ -struct pmc_accessor { - /* A function to be used to read PMEVTCNTR_EL0 */ - unsigned long (*read_cntr)(int idx); - /* A function to be used to write PMEVTCNTR_EL0 */ - void (*write_cntr)(int idx, unsigned long val); - /* A function to be used to read PMEVTYPER_EL0 */ - unsigned long (*read_typer)(int idx); - /* A function to be used to write PMEVTYPER_EL0 */ - void (*write_typer)(int idx, unsigned long val); -}; - -struct pmc_accessor pmc_accessors[] = { - /* test with all direct accesses */ - { read_pmevcntrn, write_pmevcntrn, read_pmevtypern, write_pmevtypern }, - /* test with all indirect accesses */ - { read_sel_evcntr, write_sel_evcntr, read_sel_evtyper, write_sel_evtyper }, - /* read with direct accesses, and write with indirect accesses */ - { read_pmevcntrn, write_sel_evcntr, read_pmevtypern, write_sel_evtyper }, - /* read with indirect accesses, and write with direct accesses */ - { read_sel_evcntr, write_pmevcntrn, read_sel_evtyper, write_pmevtypern }, -}; - -/* - * Convert a pointer of pmc_accessor to an index in pmc_accessors[], - * assuming that the pointer is one of the entries in pmc_accessors[]. - */ -#define PMC_ACC_TO_IDX(acc) (acc - &pmc_accessors[0]) - -#define GUEST_ASSERT_BITMAP_REG(regname, mask, set_expected) \ -{ \ - uint64_t _tval = read_sysreg(regname); \ - \ - if (set_expected) \ - __GUEST_ASSERT((_tval & mask), \ - "tval: 0x%lx; mask: 0x%lx; set_expected: %u", \ - _tval, mask, set_expected); \ - else \ - __GUEST_ASSERT(!(_tval & mask), \ - "tval: 0x%lx; mask: 0x%lx; set_expected: %u", \ - _tval, mask, set_expected); \ -} - -/* - * Check if @mask bits in {PMCNTEN,PMINTEN,PMOVS}{SET,CLR} registers - * are set or cleared as specified in @set_expected. - */ -static void check_bitmap_pmu_regs(uint64_t mask, bool set_expected) -{ - GUEST_ASSERT_BITMAP_REG(pmcntenset_el0, mask, set_expected); - GUEST_ASSERT_BITMAP_REG(pmcntenclr_el0, mask, set_expected); - GUEST_ASSERT_BITMAP_REG(pmintenset_el1, mask, set_expected); - GUEST_ASSERT_BITMAP_REG(pmintenclr_el1, mask, set_expected); - GUEST_ASSERT_BITMAP_REG(pmovsset_el0, mask, set_expected); - GUEST_ASSERT_BITMAP_REG(pmovsclr_el0, mask, set_expected); -} - -/* - * Check if the bit in {PMCNTEN,PMINTEN,PMOVS}{SET,CLR} registers corresponding - * to the specified counter (@pmc_idx) can be read/written as expected. - * When @set_op is true, it tries to set the bit for the counter in - * those registers by writing the SET registers (the bit won't be set - * if the counter is not implemented though). - * Otherwise, it tries to clear the bits in the registers by writing - * the CLR registers. - * Then, it checks if the values indicated in the registers are as expected. - */ -static void test_bitmap_pmu_regs(int pmc_idx, bool set_op) -{ - uint64_t pmcr_n, test_bit = BIT(pmc_idx); - bool set_expected = false; - - if (set_op) { - write_sysreg(test_bit, pmcntenset_el0); - write_sysreg(test_bit, pmintenset_el1); - write_sysreg(test_bit, pmovsset_el0); - - /* The bit will be set only if the counter is implemented */ - pmcr_n = get_pmcr_n(read_sysreg(pmcr_el0)); - set_expected = (pmc_idx < pmcr_n) ? true : false; - } else { - write_sysreg(test_bit, pmcntenclr_el0); - write_sysreg(test_bit, pmintenclr_el1); - write_sysreg(test_bit, pmovsclr_el0); - } - check_bitmap_pmu_regs(test_bit, set_expected); -} - -/* - * Tests for reading/writing registers for the (implemented) event counter - * specified by @pmc_idx. - */ -static void test_access_pmc_regs(struct pmc_accessor *acc, int pmc_idx) -{ - uint64_t write_data, read_data; - - /* Disable all PMCs and reset all PMCs to zero. */ - pmu_disable_reset(); - - /* - * Tests for reading/writing {PMCNTEN,PMINTEN,PMOVS}{SET,CLR}_EL1. - */ - - /* Make sure that the bit in those registers are set to 0 */ - test_bitmap_pmu_regs(pmc_idx, false); - /* Test if setting the bit in those registers works */ - test_bitmap_pmu_regs(pmc_idx, true); - /* Test if clearing the bit in those registers works */ - test_bitmap_pmu_regs(pmc_idx, false); - - /* - * Tests for reading/writing the event type register. - */ - - /* - * Set the event type register to an arbitrary value just for testing - * of reading/writing the register. - * Arm ARM says that for the event from 0x0000 to 0x003F, - * the value indicated in the PMEVTYPER_EL0.evtCount field is - * the value written to the field even when the specified event - * is not supported. - */ - write_data = (ARMV8_PMU_EXCLUDE_EL1 | ARMV8_PMUV3_PERFCTR_INST_RETIRED); - acc->write_typer(pmc_idx, write_data); - read_data = acc->read_typer(pmc_idx); - __GUEST_ASSERT(read_data == write_data, - "pmc_idx: 0x%x; acc_idx: 0x%lx; read_data: 0x%lx; write_data: 0x%lx", - pmc_idx, PMC_ACC_TO_IDX(acc), read_data, write_data); - - /* - * Tests for reading/writing the event count register. - */ - - read_data = acc->read_cntr(pmc_idx); - - /* The count value must be 0, as it is disabled and reset */ - __GUEST_ASSERT(read_data == 0, - "pmc_idx: 0x%x; acc_idx: 0x%lx; read_data: 0x%lx", - pmc_idx, PMC_ACC_TO_IDX(acc), read_data); - - write_data = read_data + pmc_idx + 0x12345; - acc->write_cntr(pmc_idx, write_data); - read_data = acc->read_cntr(pmc_idx); - __GUEST_ASSERT(read_data == write_data, - "pmc_idx: 0x%x; acc_idx: 0x%lx; read_data: 0x%lx; write_data: 0x%lx", - pmc_idx, PMC_ACC_TO_IDX(acc), read_data, write_data); -} - -#define INVALID_EC (-1ul) -uint64_t expected_ec = INVALID_EC; - -static void guest_sync_handler(struct ex_regs *regs) -{ - uint64_t esr, ec; - - esr = read_sysreg(esr_el1); - ec = ESR_ELx_EC(esr); - - __GUEST_ASSERT(expected_ec == ec, - "PC: 0x%lx; ESR: 0x%lx; EC: 0x%lx; EC expected: 0x%lx", - regs->pc, esr, ec, expected_ec); - - /* skip the trapping instruction */ - regs->pc += 4; - - /* Use INVALID_EC to indicate an exception occurred */ - expected_ec = INVALID_EC; -} - -/* - * Run the given operation that should trigger an exception with the - * given exception class. The exception handler (guest_sync_handler) - * will reset op_end_addr to 0, expected_ec to INVALID_EC, and skip - * the instruction that trapped. - */ -#define TEST_EXCEPTION(ec, ops) \ -({ \ - GUEST_ASSERT(ec != INVALID_EC); \ - WRITE_ONCE(expected_ec, ec); \ - dsb(ish); \ - ops; \ - GUEST_ASSERT(expected_ec == INVALID_EC); \ -}) - -/* - * Tests for reading/writing registers for the unimplemented event counter - * specified by @pmc_idx (>= PMCR_EL0.N). - */ -static void test_access_invalid_pmc_regs(struct pmc_accessor *acc, int pmc_idx) -{ - /* - * Reading/writing the event count/type registers should cause - * an UNDEFINED exception. - */ - TEST_EXCEPTION(ESR_ELx_EC_UNKNOWN, acc->read_cntr(pmc_idx)); - TEST_EXCEPTION(ESR_ELx_EC_UNKNOWN, acc->write_cntr(pmc_idx, 0)); - TEST_EXCEPTION(ESR_ELx_EC_UNKNOWN, acc->read_typer(pmc_idx)); - TEST_EXCEPTION(ESR_ELx_EC_UNKNOWN, acc->write_typer(pmc_idx, 0)); - /* - * The bit corresponding to the (unimplemented) counter in - * {PMCNTEN,PMINTEN,PMOVS}{SET,CLR} registers should be RAZ. - */ - test_bitmap_pmu_regs(pmc_idx, 1); - test_bitmap_pmu_regs(pmc_idx, 0); -} - -/* - * The guest is configured with PMUv3 with @expected_pmcr_n number of - * event counters. - * Check if @expected_pmcr_n is consistent with PMCR_EL0.N, and - * if reading/writing PMU registers for implemented or unimplemented - * counters works as expected. - */ -static void guest_code(uint64_t expected_pmcr_n) -{ - uint64_t pmcr, pmcr_n, unimp_mask; - int i, pmc; - - __GUEST_ASSERT(expected_pmcr_n <= ARMV8_PMU_MAX_GENERAL_COUNTERS, - "Expected PMCR.N: 0x%lx; ARMv8 general counters: 0x%x", - expected_pmcr_n, ARMV8_PMU_MAX_GENERAL_COUNTERS); - - pmcr = read_sysreg(pmcr_el0); - pmcr_n = get_pmcr_n(pmcr); - - /* Make sure that PMCR_EL0.N indicates the value userspace set */ - __GUEST_ASSERT(pmcr_n == expected_pmcr_n, - "Expected PMCR.N: 0x%lx, PMCR.N: 0x%lx", - expected_pmcr_n, pmcr_n); - - /* - * Make sure that (RAZ) bits corresponding to unimplemented event - * counters in {PMCNTEN,PMINTEN,PMOVS}{SET,CLR} registers are reset - * to zero. - * (NOTE: bits for implemented event counters are reset to UNKNOWN) - */ - unimp_mask = GENMASK_ULL(ARMV8_PMU_MAX_GENERAL_COUNTERS - 1, pmcr_n); - check_bitmap_pmu_regs(unimp_mask, false); - - /* - * Tests for reading/writing PMU registers for implemented counters. - * Use each combination of PMEV{CNTR,TYPER}_EL0 accessor functions. - */ - for (i = 0; i < ARRAY_SIZE(pmc_accessors); i++) { - for (pmc = 0; pmc < pmcr_n; pmc++) - test_access_pmc_regs(&pmc_accessors[i], pmc); - } - - /* - * Tests for reading/writing PMU registers for unimplemented counters. - * Use each combination of PMEV{CNTR,TYPER}_EL0 accessor functions. - */ - for (i = 0; i < ARRAY_SIZE(pmc_accessors); i++) { - for (pmc = pmcr_n; pmc < ARMV8_PMU_MAX_GENERAL_COUNTERS; pmc++) - test_access_invalid_pmc_regs(&pmc_accessors[i], pmc); - } - - GUEST_DONE(); -} - -/* Create a VM that has one vCPU with PMUv3 configured. */ -static void create_vpmu_vm(void *guest_code) -{ - struct kvm_vcpu_init init; - uint8_t pmuver, ec; - uint64_t dfr0, irq = 23; - struct kvm_device_attr irq_attr = { - .group = KVM_ARM_VCPU_PMU_V3_CTRL, - .attr = KVM_ARM_VCPU_PMU_V3_IRQ, - .addr = (uint64_t)&irq, - }; - struct kvm_device_attr init_attr = { - .group = KVM_ARM_VCPU_PMU_V3_CTRL, - .attr = KVM_ARM_VCPU_PMU_V3_INIT, - }; - - /* The test creates the vpmu_vm multiple times. Ensure a clean state */ - memset(&vpmu_vm, 0, sizeof(vpmu_vm)); - - vpmu_vm.vm = vm_create(1); - vm_init_descriptor_tables(vpmu_vm.vm); - for (ec = 0; ec < ESR_ELx_EC_MAX + 1; ec++) { - vm_install_sync_handler(vpmu_vm.vm, VECTOR_SYNC_CURRENT, ec, - guest_sync_handler); - } - - /* Create vCPU with PMUv3 */ - vm_ioctl(vpmu_vm.vm, KVM_ARM_PREFERRED_TARGET, &init); - init.features[0] |= (1 << KVM_ARM_VCPU_PMU_V3); - vpmu_vm.vcpu = aarch64_vcpu_add(vpmu_vm.vm, 0, &init, guest_code); - vcpu_init_descriptor_tables(vpmu_vm.vcpu); - vpmu_vm.gic_fd = vgic_v3_setup(vpmu_vm.vm, 1, 64); - __TEST_REQUIRE(vpmu_vm.gic_fd >= 0, - "Failed to create vgic-v3, skipping"); - - /* Make sure that PMUv3 support is indicated in the ID register */ - dfr0 = vcpu_get_reg(vpmu_vm.vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1)); - pmuver = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), dfr0); - TEST_ASSERT(pmuver != ID_AA64DFR0_EL1_PMUVer_IMP_DEF && - pmuver >= ID_AA64DFR0_EL1_PMUVer_IMP, - "Unexpected PMUVER (0x%x) on the vCPU with PMUv3", pmuver); - - /* Initialize vPMU */ - vcpu_ioctl(vpmu_vm.vcpu, KVM_SET_DEVICE_ATTR, &irq_attr); - vcpu_ioctl(vpmu_vm.vcpu, KVM_SET_DEVICE_ATTR, &init_attr); -} - -static void destroy_vpmu_vm(void) -{ - close(vpmu_vm.gic_fd); - kvm_vm_free(vpmu_vm.vm); -} - -static void run_vcpu(struct kvm_vcpu *vcpu, uint64_t pmcr_n) -{ - struct ucall uc; - - vcpu_args_set(vcpu, 1, pmcr_n); - vcpu_run(vcpu); - switch (get_ucall(vcpu, &uc)) { - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - break; - case UCALL_DONE: - break; - default: - TEST_FAIL("Unknown ucall %lu", uc.cmd); - break; - } -} - -static void test_create_vpmu_vm_with_pmcr_n(uint64_t pmcr_n, bool expect_fail) -{ - struct kvm_vcpu *vcpu; - uint64_t pmcr, pmcr_orig; - - create_vpmu_vm(guest_code); - vcpu = vpmu_vm.vcpu; - - pmcr_orig = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0)); - pmcr = pmcr_orig; - - /* - * Setting a larger value of PMCR.N should not modify the field, and - * return a success. - */ - set_pmcr_n(&pmcr, pmcr_n); - vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0), pmcr); - pmcr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0)); - - if (expect_fail) - TEST_ASSERT(pmcr_orig == pmcr, - "PMCR.N modified by KVM to a larger value (PMCR: 0x%lx) for pmcr_n: 0x%lx", - pmcr, pmcr_n); - else - TEST_ASSERT(pmcr_n == get_pmcr_n(pmcr), - "Failed to update PMCR.N to %lu (received: %lu)", - pmcr_n, get_pmcr_n(pmcr)); -} - -/* - * Create a guest with one vCPU, set the PMCR_EL0.N for the vCPU to @pmcr_n, - * and run the test. - */ -static void run_access_test(uint64_t pmcr_n) -{ - uint64_t sp; - struct kvm_vcpu *vcpu; - struct kvm_vcpu_init init; - - pr_debug("Test with pmcr_n %lu\n", pmcr_n); - - test_create_vpmu_vm_with_pmcr_n(pmcr_n, false); - vcpu = vpmu_vm.vcpu; - - /* Save the initial sp to restore them later to run the guest again */ - sp = vcpu_get_reg(vcpu, ARM64_CORE_REG(sp_el1)); - - run_vcpu(vcpu, pmcr_n); - - /* - * Reset and re-initialize the vCPU, and run the guest code again to - * check if PMCR_EL0.N is preserved. - */ - vm_ioctl(vpmu_vm.vm, KVM_ARM_PREFERRED_TARGET, &init); - init.features[0] |= (1 << KVM_ARM_VCPU_PMU_V3); - aarch64_vcpu_setup(vcpu, &init); - vcpu_init_descriptor_tables(vcpu); - vcpu_set_reg(vcpu, ARM64_CORE_REG(sp_el1), sp); - vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc), (uint64_t)guest_code); - - run_vcpu(vcpu, pmcr_n); - - destroy_vpmu_vm(); -} - -static struct pmreg_sets validity_check_reg_sets[] = { - PMREG_SET(SYS_PMCNTENSET_EL0, SYS_PMCNTENCLR_EL0), - PMREG_SET(SYS_PMINTENSET_EL1, SYS_PMINTENCLR_EL1), - PMREG_SET(SYS_PMOVSSET_EL0, SYS_PMOVSCLR_EL0), -}; - -/* - * Create a VM, and check if KVM handles the userspace accesses of - * the PMU register sets in @validity_check_reg_sets[] correctly. - */ -static void run_pmregs_validity_test(uint64_t pmcr_n) -{ - int i; - struct kvm_vcpu *vcpu; - uint64_t set_reg_id, clr_reg_id, reg_val; - uint64_t valid_counters_mask, max_counters_mask; - - test_create_vpmu_vm_with_pmcr_n(pmcr_n, false); - vcpu = vpmu_vm.vcpu; - - valid_counters_mask = get_counters_mask(pmcr_n); - max_counters_mask = get_counters_mask(ARMV8_PMU_MAX_COUNTERS); - - for (i = 0; i < ARRAY_SIZE(validity_check_reg_sets); i++) { - set_reg_id = validity_check_reg_sets[i].set_reg_id; - clr_reg_id = validity_check_reg_sets[i].clr_reg_id; - - /* - * Test if the 'set' and 'clr' variants of the registers - * are initialized based on the number of valid counters. - */ - reg_val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id)); - TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0, - "Initial read of set_reg: 0x%llx has unimplemented counters enabled: 0x%lx", - KVM_ARM64_SYS_REG(set_reg_id), reg_val); - - reg_val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(clr_reg_id)); - TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0, - "Initial read of clr_reg: 0x%llx has unimplemented counters enabled: 0x%lx", - KVM_ARM64_SYS_REG(clr_reg_id), reg_val); - - /* - * Using the 'set' variant, force-set the register to the - * max number of possible counters and test if KVM discards - * the bits for unimplemented counters as it should. - */ - vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id), max_counters_mask); - - reg_val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id)); - TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0, - "Read of set_reg: 0x%llx has unimplemented counters enabled: 0x%lx", - KVM_ARM64_SYS_REG(set_reg_id), reg_val); - - reg_val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(clr_reg_id)); - TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0, - "Read of clr_reg: 0x%llx has unimplemented counters enabled: 0x%lx", - KVM_ARM64_SYS_REG(clr_reg_id), reg_val); - } - - destroy_vpmu_vm(); -} - -/* - * Create a guest with one vCPU, and attempt to set the PMCR_EL0.N for - * the vCPU to @pmcr_n, which is larger than the host value. - * The attempt should fail as @pmcr_n is too big to set for the vCPU. - */ -static void run_error_test(uint64_t pmcr_n) -{ - pr_debug("Error test with pmcr_n %lu (larger than the host)\n", pmcr_n); - - test_create_vpmu_vm_with_pmcr_n(pmcr_n, true); - destroy_vpmu_vm(); -} - -/* - * Return the default number of implemented PMU event counters excluding - * the cycle counter (i.e. PMCR_EL0.N value) for the guest. - */ -static uint64_t get_pmcr_n_limit(void) -{ - uint64_t pmcr; - - create_vpmu_vm(guest_code); - pmcr = vcpu_get_reg(vpmu_vm.vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0)); - destroy_vpmu_vm(); - return get_pmcr_n(pmcr); -} - -int main(void) -{ - uint64_t i, pmcr_n; - - TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_PMU_V3)); - - pmcr_n = get_pmcr_n_limit(); - for (i = 0; i <= pmcr_n; i++) { - run_access_test(i); - run_pmregs_validity_test(i); - } - - for (i = pmcr_n + 1; i < ARMV8_PMU_MAX_COUNTERS; i++) - run_error_test(i); - - return 0; -} diff --git a/tools/testing/selftests/kvm/arm64/aarch32_id_regs.c b/tools/testing/selftests/kvm/arm64/aarch32_id_regs.c new file mode 100644 index 000000000000..447d61cae4db --- /dev/null +++ b/tools/testing/selftests/kvm/arm64/aarch32_id_regs.c @@ -0,0 +1,167 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * aarch32_id_regs - Test for ID register behavior on AArch64-only systems + * + * Copyright (c) 2022 Google LLC. + * + * Test that KVM handles the AArch64 views of the AArch32 ID registers as RAZ + * and WI from userspace. + */ + +#include + +#include "kvm_util.h" +#include "processor.h" +#include "test_util.h" +#include + +#define BAD_ID_REG_VAL 0x1badc0deul + +#define GUEST_ASSERT_REG_RAZ(reg) GUEST_ASSERT_EQ(read_sysreg_s(reg), 0) + +static void guest_main(void) +{ + GUEST_ASSERT_REG_RAZ(SYS_ID_PFR0_EL1); + GUEST_ASSERT_REG_RAZ(SYS_ID_PFR1_EL1); + GUEST_ASSERT_REG_RAZ(SYS_ID_DFR0_EL1); + GUEST_ASSERT_REG_RAZ(SYS_ID_AFR0_EL1); + GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR0_EL1); + GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR1_EL1); + GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR2_EL1); + GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR3_EL1); + GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR0_EL1); + GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR1_EL1); + GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR2_EL1); + GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR3_EL1); + GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR4_EL1); + GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR5_EL1); + GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR4_EL1); + GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR6_EL1); + GUEST_ASSERT_REG_RAZ(SYS_MVFR0_EL1); + GUEST_ASSERT_REG_RAZ(SYS_MVFR1_EL1); + GUEST_ASSERT_REG_RAZ(SYS_MVFR2_EL1); + GUEST_ASSERT_REG_RAZ(sys_reg(3, 0, 0, 3, 3)); + GUEST_ASSERT_REG_RAZ(SYS_ID_PFR2_EL1); + GUEST_ASSERT_REG_RAZ(SYS_ID_DFR1_EL1); + GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR5_EL1); + GUEST_ASSERT_REG_RAZ(sys_reg(3, 0, 0, 3, 7)); + + GUEST_DONE(); +} + +static void test_guest_raz(struct kvm_vcpu *vcpu) +{ + struct ucall uc; + + vcpu_run(vcpu); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + case UCALL_DONE: + break; + default: + TEST_FAIL("Unexpected ucall: %lu", uc.cmd); + } +} + +static uint64_t raz_wi_reg_ids[] = { + KVM_ARM64_SYS_REG(SYS_ID_PFR0_EL1), + KVM_ARM64_SYS_REG(SYS_ID_PFR1_EL1), + KVM_ARM64_SYS_REG(SYS_ID_DFR0_EL1), + KVM_ARM64_SYS_REG(SYS_ID_MMFR0_EL1), + KVM_ARM64_SYS_REG(SYS_ID_MMFR1_EL1), + KVM_ARM64_SYS_REG(SYS_ID_MMFR2_EL1), + KVM_ARM64_SYS_REG(SYS_ID_MMFR3_EL1), + KVM_ARM64_SYS_REG(SYS_ID_ISAR0_EL1), + KVM_ARM64_SYS_REG(SYS_ID_ISAR1_EL1), + KVM_ARM64_SYS_REG(SYS_ID_ISAR2_EL1), + KVM_ARM64_SYS_REG(SYS_ID_ISAR3_EL1), + KVM_ARM64_SYS_REG(SYS_ID_ISAR4_EL1), + KVM_ARM64_SYS_REG(SYS_ID_ISAR5_EL1), + KVM_ARM64_SYS_REG(SYS_ID_MMFR4_EL1), + KVM_ARM64_SYS_REG(SYS_ID_ISAR6_EL1), + KVM_ARM64_SYS_REG(SYS_MVFR0_EL1), + KVM_ARM64_SYS_REG(SYS_MVFR1_EL1), + KVM_ARM64_SYS_REG(SYS_MVFR2_EL1), + KVM_ARM64_SYS_REG(SYS_ID_PFR2_EL1), + KVM_ARM64_SYS_REG(SYS_ID_MMFR5_EL1), +}; + +static void test_user_raz_wi(struct kvm_vcpu *vcpu) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(raz_wi_reg_ids); i++) { + uint64_t reg_id = raz_wi_reg_ids[i]; + uint64_t val; + + val = vcpu_get_reg(vcpu, reg_id); + TEST_ASSERT_EQ(val, 0); + + /* + * Expect the ioctl to succeed with no effect on the register + * value. + */ + vcpu_set_reg(vcpu, reg_id, BAD_ID_REG_VAL); + + val = vcpu_get_reg(vcpu, reg_id); + TEST_ASSERT_EQ(val, 0); + } +} + +static uint64_t raz_invariant_reg_ids[] = { + KVM_ARM64_SYS_REG(SYS_ID_AFR0_EL1), + KVM_ARM64_SYS_REG(sys_reg(3, 0, 0, 3, 3)), + KVM_ARM64_SYS_REG(SYS_ID_DFR1_EL1), + KVM_ARM64_SYS_REG(sys_reg(3, 0, 0, 3, 7)), +}; + +static void test_user_raz_invariant(struct kvm_vcpu *vcpu) +{ + int i, r; + + for (i = 0; i < ARRAY_SIZE(raz_invariant_reg_ids); i++) { + uint64_t reg_id = raz_invariant_reg_ids[i]; + uint64_t val; + + val = vcpu_get_reg(vcpu, reg_id); + TEST_ASSERT_EQ(val, 0); + + r = __vcpu_set_reg(vcpu, reg_id, BAD_ID_REG_VAL); + TEST_ASSERT(r < 0 && errno == EINVAL, + "unexpected KVM_SET_ONE_REG error: r=%d, errno=%d", r, errno); + + val = vcpu_get_reg(vcpu, reg_id); + TEST_ASSERT_EQ(val, 0); + } +} + + + +static bool vcpu_aarch64_only(struct kvm_vcpu *vcpu) +{ + uint64_t val, el0; + + val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1)); + + el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), val); + return el0 == ID_AA64PFR0_EL1_ELx_64BIT_ONLY; +} + +int main(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + vm = vm_create_with_one_vcpu(&vcpu, guest_main); + + TEST_REQUIRE(vcpu_aarch64_only(vcpu)); + + test_user_raz_wi(vcpu); + test_user_raz_invariant(vcpu); + test_guest_raz(vcpu); + + kvm_vm_free(vm); +} diff --git a/tools/testing/selftests/kvm/arm64/arch_timer.c b/tools/testing/selftests/kvm/arm64/arch_timer.c new file mode 100644 index 000000000000..eeba1cc87ff8 --- /dev/null +++ b/tools/testing/selftests/kvm/arm64/arch_timer.c @@ -0,0 +1,220 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * The test validates both the virtual and physical timer IRQs using + * CVAL and TVAL registers. + * + * Copyright (c) 2021, Google LLC. + */ +#include "arch_timer.h" +#include "delay.h" +#include "gic.h" +#include "processor.h" +#include "timer_test.h" +#include "ucall_common.h" +#include "vgic.h" + +enum guest_stage { + GUEST_STAGE_VTIMER_CVAL = 1, + GUEST_STAGE_VTIMER_TVAL, + GUEST_STAGE_PTIMER_CVAL, + GUEST_STAGE_PTIMER_TVAL, + GUEST_STAGE_MAX, +}; + +static int vtimer_irq, ptimer_irq; + +static void +guest_configure_timer_action(struct test_vcpu_shared_data *shared_data) +{ + switch (shared_data->guest_stage) { + case GUEST_STAGE_VTIMER_CVAL: + timer_set_next_cval_ms(VIRTUAL, test_args.timer_period_ms); + shared_data->xcnt = timer_get_cntct(VIRTUAL); + timer_set_ctl(VIRTUAL, CTL_ENABLE); + break; + case GUEST_STAGE_VTIMER_TVAL: + timer_set_next_tval_ms(VIRTUAL, test_args.timer_period_ms); + shared_data->xcnt = timer_get_cntct(VIRTUAL); + timer_set_ctl(VIRTUAL, CTL_ENABLE); + break; + case GUEST_STAGE_PTIMER_CVAL: + timer_set_next_cval_ms(PHYSICAL, test_args.timer_period_ms); + shared_data->xcnt = timer_get_cntct(PHYSICAL); + timer_set_ctl(PHYSICAL, CTL_ENABLE); + break; + case GUEST_STAGE_PTIMER_TVAL: + timer_set_next_tval_ms(PHYSICAL, test_args.timer_period_ms); + shared_data->xcnt = timer_get_cntct(PHYSICAL); + timer_set_ctl(PHYSICAL, CTL_ENABLE); + break; + default: + GUEST_ASSERT(0); + } +} + +static void guest_validate_irq(unsigned int intid, + struct test_vcpu_shared_data *shared_data) +{ + enum guest_stage stage = shared_data->guest_stage; + uint64_t xcnt = 0, xcnt_diff_us, cval = 0; + unsigned long xctl = 0; + unsigned int timer_irq = 0; + unsigned int accessor; + + if (intid == IAR_SPURIOUS) + return; + + switch (stage) { + case GUEST_STAGE_VTIMER_CVAL: + case GUEST_STAGE_VTIMER_TVAL: + accessor = VIRTUAL; + timer_irq = vtimer_irq; + break; + case GUEST_STAGE_PTIMER_CVAL: + case GUEST_STAGE_PTIMER_TVAL: + accessor = PHYSICAL; + timer_irq = ptimer_irq; + break; + default: + GUEST_ASSERT(0); + return; + } + + xctl = timer_get_ctl(accessor); + if ((xctl & CTL_IMASK) || !(xctl & CTL_ENABLE)) + return; + + timer_set_ctl(accessor, CTL_IMASK); + xcnt = timer_get_cntct(accessor); + cval = timer_get_cval(accessor); + + xcnt_diff_us = cycles_to_usec(xcnt - shared_data->xcnt); + + /* Make sure we are dealing with the correct timer IRQ */ + GUEST_ASSERT_EQ(intid, timer_irq); + + /* Basic 'timer condition met' check */ + __GUEST_ASSERT(xcnt >= cval, + "xcnt = 0x%lx, cval = 0x%lx, xcnt_diff_us = 0x%lx", + xcnt, cval, xcnt_diff_us); + __GUEST_ASSERT(xctl & CTL_ISTATUS, "xctl = 0x%lx", xctl); + + WRITE_ONCE(shared_data->nr_iter, shared_data->nr_iter + 1); +} + +static void guest_irq_handler(struct ex_regs *regs) +{ + unsigned int intid = gic_get_and_ack_irq(); + uint32_t cpu = guest_get_vcpuid(); + struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu]; + + guest_validate_irq(intid, shared_data); + + gic_set_eoi(intid); +} + +static void guest_run_stage(struct test_vcpu_shared_data *shared_data, + enum guest_stage stage) +{ + uint32_t irq_iter, config_iter; + + shared_data->guest_stage = stage; + shared_data->nr_iter = 0; + + for (config_iter = 0; config_iter < test_args.nr_iter; config_iter++) { + /* Setup the next interrupt */ + guest_configure_timer_action(shared_data); + + /* Setup a timeout for the interrupt to arrive */ + udelay(msecs_to_usecs(test_args.timer_period_ms) + + test_args.timer_err_margin_us); + + irq_iter = READ_ONCE(shared_data->nr_iter); + __GUEST_ASSERT(config_iter + 1 == irq_iter, + "config_iter + 1 = 0x%x, irq_iter = 0x%x.\n" + " Guest timer interrupt was not triggered within the specified\n" + " interval, try to increase the error margin by [-e] option.\n", + config_iter + 1, irq_iter); + } +} + +static void guest_code(void) +{ + uint32_t cpu = guest_get_vcpuid(); + struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu]; + + local_irq_disable(); + + gic_init(GIC_V3, test_args.nr_vcpus); + + timer_set_ctl(VIRTUAL, CTL_IMASK); + timer_set_ctl(PHYSICAL, CTL_IMASK); + + gic_irq_enable(vtimer_irq); + gic_irq_enable(ptimer_irq); + local_irq_enable(); + + guest_run_stage(shared_data, GUEST_STAGE_VTIMER_CVAL); + guest_run_stage(shared_data, GUEST_STAGE_VTIMER_TVAL); + guest_run_stage(shared_data, GUEST_STAGE_PTIMER_CVAL); + guest_run_stage(shared_data, GUEST_STAGE_PTIMER_TVAL); + + GUEST_DONE(); +} + +static void test_init_timer_irq(struct kvm_vm *vm) +{ + /* Timer initid should be same for all the vCPUs, so query only vCPU-0 */ + vcpu_device_attr_get(vcpus[0], KVM_ARM_VCPU_TIMER_CTRL, + KVM_ARM_VCPU_TIMER_IRQ_PTIMER, &ptimer_irq); + vcpu_device_attr_get(vcpus[0], KVM_ARM_VCPU_TIMER_CTRL, + KVM_ARM_VCPU_TIMER_IRQ_VTIMER, &vtimer_irq); + + sync_global_to_guest(vm, ptimer_irq); + sync_global_to_guest(vm, vtimer_irq); + + pr_debug("ptimer_irq: %d; vtimer_irq: %d\n", ptimer_irq, vtimer_irq); +} + +static int gic_fd; + +struct kvm_vm *test_vm_create(void) +{ + struct kvm_vm *vm; + unsigned int i; + int nr_vcpus = test_args.nr_vcpus; + + vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus); + + vm_init_descriptor_tables(vm); + vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT, guest_irq_handler); + + if (!test_args.reserved) { + if (kvm_has_cap(KVM_CAP_COUNTER_OFFSET)) { + struct kvm_arm_counter_offset offset = { + .counter_offset = test_args.counter_offset, + .reserved = 0, + }; + vm_ioctl(vm, KVM_ARM_SET_COUNTER_OFFSET, &offset); + } else + TEST_FAIL("no support for global offset"); + } + + for (i = 0; i < nr_vcpus; i++) + vcpu_init_descriptor_tables(vcpus[i]); + + test_init_timer_irq(vm); + gic_fd = vgic_v3_setup(vm, nr_vcpus, 64); + __TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3"); + + /* Make all the test's cmdline args visible to the guest */ + sync_global_to_guest(vm, test_args); + + return vm; +} + +void test_vm_cleanup(struct kvm_vm *vm) +{ + close(gic_fd); + kvm_vm_free(vm); +} diff --git a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c new file mode 100644 index 000000000000..a36a7e2db434 --- /dev/null +++ b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c @@ -0,0 +1,1062 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * arch_timer_edge_cases.c - Tests the aarch64 timer IRQ functionality. + * + * The test validates some edge cases related to the arch-timer: + * - timers above the max TVAL value. + * - timers in the past + * - moving counters ahead and behind pending timers. + * - reprograming timers. + * - timers fired multiple times. + * - masking/unmasking using the timer control mask. + * + * Copyright (c) 2021, Google LLC. + */ + +#define _GNU_SOURCE + +#include +#include + +#include "arch_timer.h" +#include "gic.h" +#include "vgic.h" + +static const uint64_t CVAL_MAX = ~0ULL; +/* tval is a signed 32-bit int. */ +static const int32_t TVAL_MAX = INT32_MAX; +static const int32_t TVAL_MIN = INT32_MIN; + +/* After how much time we say there is no IRQ. */ +static const uint32_t TIMEOUT_NO_IRQ_US = 50000; + +/* A nice counter value to use as the starting one for most tests. */ +static const uint64_t DEF_CNT = (CVAL_MAX / 2); + +/* Number of runs. */ +static const uint32_t NR_TEST_ITERS_DEF = 5; + +/* Default wait test time in ms. */ +static const uint32_t WAIT_TEST_MS = 10; + +/* Default "long" wait test time in ms. */ +static const uint32_t LONG_WAIT_TEST_MS = 100; + +/* Shared with IRQ handler. */ +struct test_vcpu_shared_data { + atomic_t handled; + atomic_t spurious; +} shared_data; + +struct test_args { + /* Virtual or physical timer and counter tests. */ + enum arch_timer timer; + /* Delay used for most timer tests. */ + uint64_t wait_ms; + /* Delay used in the test_long_timer_delays test. */ + uint64_t long_wait_ms; + /* Number of iterations. */ + int iterations; + /* Whether to test the physical timer. */ + bool test_physical; + /* Whether to test the virtual timer. */ + bool test_virtual; +}; + +struct test_args test_args = { + .wait_ms = WAIT_TEST_MS, + .long_wait_ms = LONG_WAIT_TEST_MS, + .iterations = NR_TEST_ITERS_DEF, + .test_physical = true, + .test_virtual = true, +}; + +static int vtimer_irq, ptimer_irq; + +enum sync_cmd { + SET_COUNTER_VALUE, + USERSPACE_USLEEP, + USERSPACE_SCHED_YIELD, + USERSPACE_MIGRATE_SELF, + NO_USERSPACE_CMD, +}; + +typedef void (*sleep_method_t)(enum arch_timer timer, uint64_t usec); + +static void sleep_poll(enum arch_timer timer, uint64_t usec); +static void sleep_sched_poll(enum arch_timer timer, uint64_t usec); +static void sleep_in_userspace(enum arch_timer timer, uint64_t usec); +static void sleep_migrate(enum arch_timer timer, uint64_t usec); + +sleep_method_t sleep_method[] = { + sleep_poll, + sleep_sched_poll, + sleep_migrate, + sleep_in_userspace, +}; + +typedef void (*irq_wait_method_t)(void); + +static void wait_for_non_spurious_irq(void); +static void wait_poll_for_irq(void); +static void wait_sched_poll_for_irq(void); +static void wait_migrate_poll_for_irq(void); + +irq_wait_method_t irq_wait_method[] = { + wait_for_non_spurious_irq, + wait_poll_for_irq, + wait_sched_poll_for_irq, + wait_migrate_poll_for_irq, +}; + +enum timer_view { + TIMER_CVAL, + TIMER_TVAL, +}; + +static void assert_irqs_handled(uint32_t n) +{ + int h = atomic_read(&shared_data.handled); + + __GUEST_ASSERT(h == n, "Handled %d IRQS but expected %d", h, n); +} + +static void userspace_cmd(uint64_t cmd) +{ + GUEST_SYNC_ARGS(cmd, 0, 0, 0, 0); +} + +static void userspace_migrate_vcpu(void) +{ + userspace_cmd(USERSPACE_MIGRATE_SELF); +} + +static void userspace_sleep(uint64_t usecs) +{ + GUEST_SYNC_ARGS(USERSPACE_USLEEP, usecs, 0, 0, 0); +} + +static void set_counter(enum arch_timer timer, uint64_t counter) +{ + GUEST_SYNC_ARGS(SET_COUNTER_VALUE, counter, timer, 0, 0); +} + +static void guest_irq_handler(struct ex_regs *regs) +{ + unsigned int intid = gic_get_and_ack_irq(); + enum arch_timer timer; + uint64_t cnt, cval; + uint32_t ctl; + bool timer_condition, istatus; + + if (intid == IAR_SPURIOUS) { + atomic_inc(&shared_data.spurious); + goto out; + } + + if (intid == ptimer_irq) + timer = PHYSICAL; + else if (intid == vtimer_irq) + timer = VIRTUAL; + else + goto out; + + ctl = timer_get_ctl(timer); + cval = timer_get_cval(timer); + cnt = timer_get_cntct(timer); + timer_condition = cnt >= cval; + istatus = (ctl & CTL_ISTATUS) && (ctl & CTL_ENABLE); + GUEST_ASSERT_EQ(timer_condition, istatus); + + /* Disable and mask the timer. */ + timer_set_ctl(timer, CTL_IMASK); + + atomic_inc(&shared_data.handled); + +out: + gic_set_eoi(intid); +} + +static void set_cval_irq(enum arch_timer timer, uint64_t cval_cycles, + uint32_t ctl) +{ + atomic_set(&shared_data.handled, 0); + atomic_set(&shared_data.spurious, 0); + timer_set_cval(timer, cval_cycles); + timer_set_ctl(timer, ctl); +} + +static void set_tval_irq(enum arch_timer timer, uint64_t tval_cycles, + uint32_t ctl) +{ + atomic_set(&shared_data.handled, 0); + atomic_set(&shared_data.spurious, 0); + timer_set_ctl(timer, ctl); + timer_set_tval(timer, tval_cycles); +} + +static void set_xval_irq(enum arch_timer timer, uint64_t xval, uint32_t ctl, + enum timer_view tv) +{ + switch (tv) { + case TIMER_CVAL: + set_cval_irq(timer, xval, ctl); + break; + case TIMER_TVAL: + set_tval_irq(timer, xval, ctl); + break; + default: + GUEST_FAIL("Could not get timer %d", timer); + } +} + +/* + * Note that this can theoretically hang forever, so we rely on having + * a timeout mechanism in the "runner", like: + * tools/testing/selftests/kselftest/runner.sh. + */ +static void wait_for_non_spurious_irq(void) +{ + int h; + + local_irq_disable(); + + for (h = atomic_read(&shared_data.handled); h == atomic_read(&shared_data.handled);) { + wfi(); + local_irq_enable(); + isb(); /* handle IRQ */ + local_irq_disable(); + } +} + +/* + * Wait for an non-spurious IRQ by polling in the guest or in + * userspace (e.g. userspace_cmd=USERSPACE_SCHED_YIELD). + * + * Note that this can theoretically hang forever, so we rely on having + * a timeout mechanism in the "runner", like: + * tools/testing/selftests/kselftest/runner.sh. + */ +static void poll_for_non_spurious_irq(enum sync_cmd usp_cmd) +{ + int h; + + local_irq_disable(); + + h = atomic_read(&shared_data.handled); + + local_irq_enable(); + while (h == atomic_read(&shared_data.handled)) { + if (usp_cmd == NO_USERSPACE_CMD) + cpu_relax(); + else + userspace_cmd(usp_cmd); + } + local_irq_disable(); +} + +static void wait_poll_for_irq(void) +{ + poll_for_non_spurious_irq(NO_USERSPACE_CMD); +} + +static void wait_sched_poll_for_irq(void) +{ + poll_for_non_spurious_irq(USERSPACE_SCHED_YIELD); +} + +static void wait_migrate_poll_for_irq(void) +{ + poll_for_non_spurious_irq(USERSPACE_MIGRATE_SELF); +} + +/* + * Sleep for usec microseconds by polling in the guest or in + * userspace (e.g. userspace_cmd=USERSPACE_SCHEDULE). + */ +static void guest_poll(enum arch_timer test_timer, uint64_t usec, + enum sync_cmd usp_cmd) +{ + uint64_t cycles = usec_to_cycles(usec); + /* Whichever timer we are testing with, sleep with the other. */ + enum arch_timer sleep_timer = 1 - test_timer; + uint64_t start = timer_get_cntct(sleep_timer); + + while ((timer_get_cntct(sleep_timer) - start) < cycles) { + if (usp_cmd == NO_USERSPACE_CMD) + cpu_relax(); + else + userspace_cmd(usp_cmd); + } +} + +static void sleep_poll(enum arch_timer timer, uint64_t usec) +{ + guest_poll(timer, usec, NO_USERSPACE_CMD); +} + +static void sleep_sched_poll(enum arch_timer timer, uint64_t usec) +{ + guest_poll(timer, usec, USERSPACE_SCHED_YIELD); +} + +static void sleep_migrate(enum arch_timer timer, uint64_t usec) +{ + guest_poll(timer, usec, USERSPACE_MIGRATE_SELF); +} + +static void sleep_in_userspace(enum arch_timer timer, uint64_t usec) +{ + userspace_sleep(usec); +} + +/* + * Reset the timer state to some nice values like the counter not being close + * to the edge, and the control register masked and disabled. + */ +static void reset_timer_state(enum arch_timer timer, uint64_t cnt) +{ + set_counter(timer, cnt); + timer_set_ctl(timer, CTL_IMASK); +} + +static void test_timer_xval(enum arch_timer timer, uint64_t xval, + enum timer_view tv, irq_wait_method_t wm, bool reset_state, + uint64_t reset_cnt) +{ + local_irq_disable(); + + if (reset_state) + reset_timer_state(timer, reset_cnt); + + set_xval_irq(timer, xval, CTL_ENABLE, tv); + + /* This method re-enables IRQs to handle the one we're looking for. */ + wm(); + + assert_irqs_handled(1); + local_irq_enable(); +} + +/* + * The test_timer_* functions will program the timer, wait for it, and assert + * the firing of the correct IRQ. + * + * These functions don't have a timeout and return as soon as they receive an + * IRQ. They can hang (forever), so we rely on having a timeout mechanism in + * the "runner", like: tools/testing/selftests/kselftest/runner.sh. + */ + +static void test_timer_cval(enum arch_timer timer, uint64_t cval, + irq_wait_method_t wm, bool reset_state, + uint64_t reset_cnt) +{ + test_timer_xval(timer, cval, TIMER_CVAL, wm, reset_state, reset_cnt); +} + +static void test_timer_tval(enum arch_timer timer, int32_t tval, + irq_wait_method_t wm, bool reset_state, + uint64_t reset_cnt) +{ + test_timer_xval(timer, (uint64_t) tval, TIMER_TVAL, wm, reset_state, + reset_cnt); +} + +static void test_xval_check_no_irq(enum arch_timer timer, uint64_t xval, + uint64_t usec, enum timer_view timer_view, + sleep_method_t guest_sleep) +{ + local_irq_disable(); + + set_xval_irq(timer, xval, CTL_ENABLE | CTL_IMASK, timer_view); + guest_sleep(timer, usec); + + local_irq_enable(); + isb(); + + /* Assume success (no IRQ) after waiting usec microseconds */ + assert_irqs_handled(0); +} + +static void test_cval_no_irq(enum arch_timer timer, uint64_t cval, + uint64_t usec, sleep_method_t wm) +{ + test_xval_check_no_irq(timer, cval, usec, TIMER_CVAL, wm); +} + +static void test_tval_no_irq(enum arch_timer timer, int32_t tval, uint64_t usec, + sleep_method_t wm) +{ + /* tval will be cast to an int32_t in test_xval_check_no_irq */ + test_xval_check_no_irq(timer, (uint64_t) tval, usec, TIMER_TVAL, wm); +} + +/* Test masking/unmasking a timer using the timer mask (not the IRQ mask). */ +static void test_timer_control_mask_then_unmask(enum arch_timer timer) +{ + reset_timer_state(timer, DEF_CNT); + set_tval_irq(timer, -1, CTL_ENABLE | CTL_IMASK); + + /* Unmask the timer, and then get an IRQ. */ + local_irq_disable(); + timer_set_ctl(timer, CTL_ENABLE); + /* This method re-enables IRQs to handle the one we're looking for. */ + wait_for_non_spurious_irq(); + + assert_irqs_handled(1); + local_irq_enable(); +} + +/* Check that timer control masks actually mask a timer being fired. */ +static void test_timer_control_masks(enum arch_timer timer) +{ + reset_timer_state(timer, DEF_CNT); + + /* Local IRQs are not masked at this point. */ + + set_tval_irq(timer, -1, CTL_ENABLE | CTL_IMASK); + + /* Assume no IRQ after waiting TIMEOUT_NO_IRQ_US microseconds */ + sleep_poll(timer, TIMEOUT_NO_IRQ_US); + + assert_irqs_handled(0); + timer_set_ctl(timer, CTL_IMASK); +} + +static void test_fire_a_timer_multiple_times(enum arch_timer timer, + irq_wait_method_t wm, int num) +{ + int i; + + local_irq_disable(); + reset_timer_state(timer, DEF_CNT); + + set_tval_irq(timer, 0, CTL_ENABLE); + + for (i = 1; i <= num; i++) { + /* This method re-enables IRQs to handle the one we're looking for. */ + wm(); + + /* The IRQ handler masked and disabled the timer. + * Enable and unmmask it again. + */ + timer_set_ctl(timer, CTL_ENABLE); + + assert_irqs_handled(i); + } + + local_irq_enable(); +} + +static void test_timers_fired_multiple_times(enum arch_timer timer) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) + test_fire_a_timer_multiple_times(timer, irq_wait_method[i], 10); +} + +/* + * Set a timer for tval=delta_1_ms then reprogram it to + * tval=delta_2_ms. Check that we get the timer fired. There is no + * timeout for the wait: we use the wfi instruction. + */ +static void test_reprogramming_timer(enum arch_timer timer, irq_wait_method_t wm, + int32_t delta_1_ms, int32_t delta_2_ms) +{ + local_irq_disable(); + reset_timer_state(timer, DEF_CNT); + + /* Program the timer to DEF_CNT + delta_1_ms. */ + set_tval_irq(timer, msec_to_cycles(delta_1_ms), CTL_ENABLE); + + /* Reprogram the timer to DEF_CNT + delta_2_ms. */ + timer_set_tval(timer, msec_to_cycles(delta_2_ms)); + + /* This method re-enables IRQs to handle the one we're looking for. */ + wm(); + + /* The IRQ should arrive at DEF_CNT + delta_2_ms (or after). */ + GUEST_ASSERT(timer_get_cntct(timer) >= + DEF_CNT + msec_to_cycles(delta_2_ms)); + + local_irq_enable(); + assert_irqs_handled(1); +}; + +static void test_reprogram_timers(enum arch_timer timer) +{ + int i; + uint64_t base_wait = test_args.wait_ms; + + for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) { + /* + * Ensure reprogramming works whether going from a + * longer time to a shorter or vice versa. + */ + test_reprogramming_timer(timer, irq_wait_method[i], 2 * base_wait, + base_wait); + test_reprogramming_timer(timer, irq_wait_method[i], base_wait, + 2 * base_wait); + } +} + +static void test_basic_functionality(enum arch_timer timer) +{ + int32_t tval = (int32_t) msec_to_cycles(test_args.wait_ms); + uint64_t cval = DEF_CNT + msec_to_cycles(test_args.wait_ms); + int i; + + for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) { + irq_wait_method_t wm = irq_wait_method[i]; + + test_timer_cval(timer, cval, wm, true, DEF_CNT); + test_timer_tval(timer, tval, wm, true, DEF_CNT); + } +} + +/* + * This test checks basic timer behavior without actually firing timers, things + * like: the relationship between cval and tval, tval down-counting. + */ +static void timers_sanity_checks(enum arch_timer timer, bool use_sched) +{ + reset_timer_state(timer, DEF_CNT); + + local_irq_disable(); + + /* cval in the past */ + timer_set_cval(timer, + timer_get_cntct(timer) - + msec_to_cycles(test_args.wait_ms)); + if (use_sched) + userspace_migrate_vcpu(); + GUEST_ASSERT(timer_get_tval(timer) < 0); + + /* tval in the past */ + timer_set_tval(timer, -1); + if (use_sched) + userspace_migrate_vcpu(); + GUEST_ASSERT(timer_get_cval(timer) < timer_get_cntct(timer)); + + /* tval larger than TVAL_MAX. This requires programming with + * timer_set_cval instead so the value is expressible + */ + timer_set_cval(timer, + timer_get_cntct(timer) + TVAL_MAX + + msec_to_cycles(test_args.wait_ms)); + if (use_sched) + userspace_migrate_vcpu(); + GUEST_ASSERT(timer_get_tval(timer) <= 0); + + /* + * tval larger than 2 * TVAL_MAX. + * Twice the TVAL_MAX completely loops around the TVAL. + */ + timer_set_cval(timer, + timer_get_cntct(timer) + 2ULL * TVAL_MAX + + msec_to_cycles(test_args.wait_ms)); + if (use_sched) + userspace_migrate_vcpu(); + GUEST_ASSERT(timer_get_tval(timer) <= + msec_to_cycles(test_args.wait_ms)); + + /* negative tval that rollovers from 0. */ + set_counter(timer, msec_to_cycles(1)); + timer_set_tval(timer, -1 * msec_to_cycles(test_args.wait_ms)); + if (use_sched) + userspace_migrate_vcpu(); + GUEST_ASSERT(timer_get_cval(timer) >= (CVAL_MAX - msec_to_cycles(test_args.wait_ms))); + + /* tval should keep down-counting from 0 to -1. */ + timer_set_tval(timer, 0); + sleep_poll(timer, 1); + GUEST_ASSERT(timer_get_tval(timer) < 0); + + local_irq_enable(); + + /* Mask and disable any pending timer. */ + timer_set_ctl(timer, CTL_IMASK); +} + +static void test_timers_sanity_checks(enum arch_timer timer) +{ + timers_sanity_checks(timer, false); + /* Check how KVM saves/restores these edge-case values. */ + timers_sanity_checks(timer, true); +} + +static void test_set_cnt_after_tval_max(enum arch_timer timer, irq_wait_method_t wm) +{ + local_irq_disable(); + reset_timer_state(timer, DEF_CNT); + + set_cval_irq(timer, + (uint64_t) TVAL_MAX + + msec_to_cycles(test_args.wait_ms) / 2, CTL_ENABLE); + + set_counter(timer, TVAL_MAX); + + /* This method re-enables IRQs to handle the one we're looking for. */ + wm(); + + assert_irqs_handled(1); + local_irq_enable(); +} + +/* Test timers set for: cval = now + TVAL_MAX + wait_ms / 2 */ +static void test_timers_above_tval_max(enum arch_timer timer) +{ + uint64_t cval; + int i; + + /* + * Test that the system is not implementing cval in terms of + * tval. If that was the case, setting a cval to "cval = now + * + TVAL_MAX + wait_ms" would wrap to "cval = now + + * wait_ms", and the timer would fire immediately. Test that it + * doesn't. + */ + for (i = 0; i < ARRAY_SIZE(sleep_method); i++) { + reset_timer_state(timer, DEF_CNT); + cval = timer_get_cntct(timer) + TVAL_MAX + + msec_to_cycles(test_args.wait_ms); + test_cval_no_irq(timer, cval, + msecs_to_usecs(test_args.wait_ms) + + TIMEOUT_NO_IRQ_US, sleep_method[i]); + } + + for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) { + /* Get the IRQ by moving the counter forward. */ + test_set_cnt_after_tval_max(timer, irq_wait_method[i]); + } +} + +/* + * Template function to be used by the test_move_counter_ahead_* tests. It + * sets the counter to cnt_1, the [c|t]val, the counter to cnt_2, and + * then waits for an IRQ. + */ +static void test_set_cnt_after_xval(enum arch_timer timer, uint64_t cnt_1, + uint64_t xval, uint64_t cnt_2, + irq_wait_method_t wm, enum timer_view tv) +{ + local_irq_disable(); + + set_counter(timer, cnt_1); + timer_set_ctl(timer, CTL_IMASK); + + set_xval_irq(timer, xval, CTL_ENABLE, tv); + set_counter(timer, cnt_2); + /* This method re-enables IRQs to handle the one we're looking for. */ + wm(); + + assert_irqs_handled(1); + local_irq_enable(); +} + +/* + * Template function to be used by the test_move_counter_ahead_* tests. It + * sets the counter to cnt_1, the [c|t]val, the counter to cnt_2, and + * then waits for an IRQ. + */ +static void test_set_cnt_after_xval_no_irq(enum arch_timer timer, + uint64_t cnt_1, uint64_t xval, + uint64_t cnt_2, + sleep_method_t guest_sleep, + enum timer_view tv) +{ + local_irq_disable(); + + set_counter(timer, cnt_1); + timer_set_ctl(timer, CTL_IMASK); + + set_xval_irq(timer, xval, CTL_ENABLE, tv); + set_counter(timer, cnt_2); + guest_sleep(timer, TIMEOUT_NO_IRQ_US); + + local_irq_enable(); + isb(); + + /* Assume no IRQ after waiting TIMEOUT_NO_IRQ_US microseconds */ + assert_irqs_handled(0); + timer_set_ctl(timer, CTL_IMASK); +} + +static void test_set_cnt_after_tval(enum arch_timer timer, uint64_t cnt_1, + int32_t tval, uint64_t cnt_2, + irq_wait_method_t wm) +{ + test_set_cnt_after_xval(timer, cnt_1, tval, cnt_2, wm, TIMER_TVAL); +} + +static void test_set_cnt_after_cval(enum arch_timer timer, uint64_t cnt_1, + uint64_t cval, uint64_t cnt_2, + irq_wait_method_t wm) +{ + test_set_cnt_after_xval(timer, cnt_1, cval, cnt_2, wm, TIMER_CVAL); +} + +static void test_set_cnt_after_tval_no_irq(enum arch_timer timer, + uint64_t cnt_1, int32_t tval, + uint64_t cnt_2, sleep_method_t wm) +{ + test_set_cnt_after_xval_no_irq(timer, cnt_1, tval, cnt_2, wm, + TIMER_TVAL); +} + +static void test_set_cnt_after_cval_no_irq(enum arch_timer timer, + uint64_t cnt_1, uint64_t cval, + uint64_t cnt_2, sleep_method_t wm) +{ + test_set_cnt_after_xval_no_irq(timer, cnt_1, cval, cnt_2, wm, + TIMER_CVAL); +} + +/* Set a timer and then move the counter ahead of it. */ +static void test_move_counters_ahead_of_timers(enum arch_timer timer) +{ + int i; + int32_t tval; + + for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) { + irq_wait_method_t wm = irq_wait_method[i]; + + test_set_cnt_after_cval(timer, 0, DEF_CNT, DEF_CNT + 1, wm); + test_set_cnt_after_cval(timer, CVAL_MAX, 1, 2, wm); + + /* Move counter ahead of negative tval. */ + test_set_cnt_after_tval(timer, 0, -1, DEF_CNT + 1, wm); + test_set_cnt_after_tval(timer, 0, -1, TVAL_MAX, wm); + tval = TVAL_MAX; + test_set_cnt_after_tval(timer, 0, tval, (uint64_t) tval + 1, + wm); + } + + for (i = 0; i < ARRAY_SIZE(sleep_method); i++) { + sleep_method_t sm = sleep_method[i]; + + test_set_cnt_after_cval_no_irq(timer, 0, DEF_CNT, CVAL_MAX, sm); + } +} + +/* + * Program a timer, mask it, and then change the tval or counter to cancel it. + * Unmask it and check that nothing fires. + */ +static void test_move_counters_behind_timers(enum arch_timer timer) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(sleep_method); i++) { + sleep_method_t sm = sleep_method[i]; + + test_set_cnt_after_cval_no_irq(timer, DEF_CNT, DEF_CNT - 1, 0, + sm); + test_set_cnt_after_tval_no_irq(timer, DEF_CNT, -1, 0, sm); + } +} + +static void test_timers_in_the_past(enum arch_timer timer) +{ + int32_t tval = -1 * (int32_t) msec_to_cycles(test_args.wait_ms); + uint64_t cval; + int i; + + for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) { + irq_wait_method_t wm = irq_wait_method[i]; + + /* set a timer wait_ms the past. */ + cval = DEF_CNT - msec_to_cycles(test_args.wait_ms); + test_timer_cval(timer, cval, wm, true, DEF_CNT); + test_timer_tval(timer, tval, wm, true, DEF_CNT); + + /* Set a timer to counter=0 (in the past) */ + test_timer_cval(timer, 0, wm, true, DEF_CNT); + + /* Set a time for tval=0 (now) */ + test_timer_tval(timer, 0, wm, true, DEF_CNT); + + /* Set a timer to as far in the past as possible */ + test_timer_tval(timer, TVAL_MIN, wm, true, DEF_CNT); + } + + /* + * Set the counter to wait_ms, and a tval to -wait_ms. There should be no + * IRQ as that tval means cval=CVAL_MAX-wait_ms. + */ + for (i = 0; i < ARRAY_SIZE(sleep_method); i++) { + sleep_method_t sm = sleep_method[i]; + + set_counter(timer, msec_to_cycles(test_args.wait_ms)); + test_tval_no_irq(timer, tval, TIMEOUT_NO_IRQ_US, sm); + } +} + +static void test_long_timer_delays(enum arch_timer timer) +{ + int32_t tval = (int32_t) msec_to_cycles(test_args.long_wait_ms); + uint64_t cval = DEF_CNT + msec_to_cycles(test_args.long_wait_ms); + int i; + + for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) { + irq_wait_method_t wm = irq_wait_method[i]; + + test_timer_cval(timer, cval, wm, true, DEF_CNT); + test_timer_tval(timer, tval, wm, true, DEF_CNT); + } +} + +static void guest_run_iteration(enum arch_timer timer) +{ + test_basic_functionality(timer); + test_timers_sanity_checks(timer); + + test_timers_above_tval_max(timer); + test_timers_in_the_past(timer); + + test_move_counters_ahead_of_timers(timer); + test_move_counters_behind_timers(timer); + test_reprogram_timers(timer); + + test_timers_fired_multiple_times(timer); + + test_timer_control_mask_then_unmask(timer); + test_timer_control_masks(timer); +} + +static void guest_code(enum arch_timer timer) +{ + int i; + + local_irq_disable(); + + gic_init(GIC_V3, 1); + + timer_set_ctl(VIRTUAL, CTL_IMASK); + timer_set_ctl(PHYSICAL, CTL_IMASK); + + gic_irq_enable(vtimer_irq); + gic_irq_enable(ptimer_irq); + local_irq_enable(); + + for (i = 0; i < test_args.iterations; i++) { + GUEST_SYNC(i); + guest_run_iteration(timer); + } + + test_long_timer_delays(timer); + GUEST_DONE(); +} + +static uint32_t next_pcpu(void) +{ + uint32_t max = get_nprocs(); + uint32_t cur = sched_getcpu(); + uint32_t next = cur; + cpu_set_t cpuset; + + TEST_ASSERT(max > 1, "Need at least two physical cpus"); + + sched_getaffinity(0, sizeof(cpuset), &cpuset); + + do { + next = (next + 1) % CPU_SETSIZE; + } while (!CPU_ISSET(next, &cpuset)); + + return next; +} + +static void migrate_self(uint32_t new_pcpu) +{ + int ret; + cpu_set_t cpuset; + pthread_t thread; + + thread = pthread_self(); + + CPU_ZERO(&cpuset); + CPU_SET(new_pcpu, &cpuset); + + pr_debug("Migrating from %u to %u\n", sched_getcpu(), new_pcpu); + + ret = pthread_setaffinity_np(thread, sizeof(cpuset), &cpuset); + + TEST_ASSERT(ret == 0, "Failed to migrate to pCPU: %u; ret: %d\n", + new_pcpu, ret); +} + +static void kvm_set_cntxct(struct kvm_vcpu *vcpu, uint64_t cnt, + enum arch_timer timer) +{ + if (timer == PHYSICAL) + vcpu_set_reg(vcpu, KVM_REG_ARM_PTIMER_CNT, cnt); + else + vcpu_set_reg(vcpu, KVM_REG_ARM_TIMER_CNT, cnt); +} + +static void handle_sync(struct kvm_vcpu *vcpu, struct ucall *uc) +{ + enum sync_cmd cmd = uc->args[1]; + uint64_t val = uc->args[2]; + enum arch_timer timer = uc->args[3]; + + switch (cmd) { + case SET_COUNTER_VALUE: + kvm_set_cntxct(vcpu, val, timer); + break; + case USERSPACE_USLEEP: + usleep(val); + break; + case USERSPACE_SCHED_YIELD: + sched_yield(); + break; + case USERSPACE_MIGRATE_SELF: + migrate_self(next_pcpu()); + break; + default: + break; + } +} + +static void test_run(struct kvm_vm *vm, struct kvm_vcpu *vcpu) +{ + struct ucall uc; + + /* Start on CPU 0 */ + migrate_self(0); + + while (true) { + vcpu_run(vcpu); + switch (get_ucall(vcpu, &uc)) { + case UCALL_SYNC: + handle_sync(vcpu, &uc); + break; + case UCALL_DONE: + goto out; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + goto out; + default: + TEST_FAIL("Unexpected guest exit\n"); + } + } + + out: + return; +} + +static void test_init_timer_irq(struct kvm_vm *vm, struct kvm_vcpu *vcpu) +{ + vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL, + KVM_ARM_VCPU_TIMER_IRQ_PTIMER, &ptimer_irq); + vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL, + KVM_ARM_VCPU_TIMER_IRQ_VTIMER, &vtimer_irq); + + sync_global_to_guest(vm, ptimer_irq); + sync_global_to_guest(vm, vtimer_irq); + + pr_debug("ptimer_irq: %d; vtimer_irq: %d\n", ptimer_irq, vtimer_irq); +} + +static void test_vm_create(struct kvm_vm **vm, struct kvm_vcpu **vcpu, + enum arch_timer timer) +{ + *vm = vm_create_with_one_vcpu(vcpu, guest_code); + TEST_ASSERT(*vm, "Failed to create the test VM\n"); + + vm_init_descriptor_tables(*vm); + vm_install_exception_handler(*vm, VECTOR_IRQ_CURRENT, + guest_irq_handler); + + vcpu_init_descriptor_tables(*vcpu); + vcpu_args_set(*vcpu, 1, timer); + + test_init_timer_irq(*vm, *vcpu); + vgic_v3_setup(*vm, 1, 64); + sync_global_to_guest(*vm, test_args); +} + +static void test_print_help(char *name) +{ + pr_info("Usage: %s [-h] [-b] [-i iterations] [-l long_wait_ms] [-p] [-v]\n" + , name); + pr_info("\t-i: Number of iterations (default: %u)\n", + NR_TEST_ITERS_DEF); + pr_info("\t-b: Test both physical and virtual timers (default: true)\n"); + pr_info("\t-l: Delta (in ms) used for long wait time test (default: %u)\n", + LONG_WAIT_TEST_MS); + pr_info("\t-l: Delta (in ms) used for wait times (default: %u)\n", + WAIT_TEST_MS); + pr_info("\t-p: Test physical timer (default: true)\n"); + pr_info("\t-v: Test virtual timer (default: true)\n"); + pr_info("\t-h: Print this help message\n"); +} + +static bool parse_args(int argc, char *argv[]) +{ + int opt; + + while ((opt = getopt(argc, argv, "bhi:l:pvw:")) != -1) { + switch (opt) { + case 'b': + test_args.test_physical = true; + test_args.test_virtual = true; + break; + case 'i': + test_args.iterations = + atoi_positive("Number of iterations", optarg); + break; + case 'l': + test_args.long_wait_ms = + atoi_positive("Long wait time", optarg); + break; + case 'p': + test_args.test_physical = true; + test_args.test_virtual = false; + break; + case 'v': + test_args.test_virtual = true; + test_args.test_physical = false; + break; + case 'w': + test_args.wait_ms = atoi_positive("Wait time", optarg); + break; + case 'h': + default: + goto err; + } + } + + return true; + + err: + test_print_help(argv[0]); + return false; +} + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + /* Tell stdout not to buffer its content */ + setbuf(stdout, NULL); + + if (!parse_args(argc, argv)) + exit(KSFT_SKIP); + + if (test_args.test_virtual) { + test_vm_create(&vm, &vcpu, VIRTUAL); + test_run(vm, vcpu); + kvm_vm_free(vm); + } + + if (test_args.test_physical) { + test_vm_create(&vm, &vcpu, PHYSICAL); + test_run(vm, vcpu); + kvm_vm_free(vm); + } + + return 0; +} diff --git a/tools/testing/selftests/kvm/arm64/debug-exceptions.c b/tools/testing/selftests/kvm/arm64/debug-exceptions.c new file mode 100644 index 000000000000..c7fb55c9135b --- /dev/null +++ b/tools/testing/selftests/kvm/arm64/debug-exceptions.c @@ -0,0 +1,607 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include + +#define MDSCR_KDE (1 << 13) +#define MDSCR_MDE (1 << 15) +#define MDSCR_SS (1 << 0) + +#define DBGBCR_LEN8 (0xff << 5) +#define DBGBCR_EXEC (0x0 << 3) +#define DBGBCR_EL1 (0x1 << 1) +#define DBGBCR_E (0x1 << 0) +#define DBGBCR_LBN_SHIFT 16 +#define DBGBCR_BT_SHIFT 20 +#define DBGBCR_BT_ADDR_LINK_CTX (0x1 << DBGBCR_BT_SHIFT) +#define DBGBCR_BT_CTX_LINK (0x3 << DBGBCR_BT_SHIFT) + +#define DBGWCR_LEN8 (0xff << 5) +#define DBGWCR_RD (0x1 << 3) +#define DBGWCR_WR (0x2 << 3) +#define DBGWCR_EL1 (0x1 << 1) +#define DBGWCR_E (0x1 << 0) +#define DBGWCR_LBN_SHIFT 16 +#define DBGWCR_WT_SHIFT 20 +#define DBGWCR_WT_LINK (0x1 << DBGWCR_WT_SHIFT) + +#define SPSR_D (1 << 9) +#define SPSR_SS (1 << 21) + +extern unsigned char sw_bp, sw_bp2, hw_bp, hw_bp2, bp_svc, bp_brk, hw_wp, ss_start, hw_bp_ctx; +extern unsigned char iter_ss_begin, iter_ss_end; +static volatile uint64_t sw_bp_addr, hw_bp_addr; +static volatile uint64_t wp_addr, wp_data_addr; +static volatile uint64_t svc_addr; +static volatile uint64_t ss_addr[4], ss_idx; +#define PC(v) ((uint64_t)&(v)) + +#define GEN_DEBUG_WRITE_REG(reg_name) \ +static void write_##reg_name(int num, uint64_t val) \ +{ \ + switch (num) { \ + case 0: \ + write_sysreg(val, reg_name##0_el1); \ + break; \ + case 1: \ + write_sysreg(val, reg_name##1_el1); \ + break; \ + case 2: \ + write_sysreg(val, reg_name##2_el1); \ + break; \ + case 3: \ + write_sysreg(val, reg_name##3_el1); \ + break; \ + case 4: \ + write_sysreg(val, reg_name##4_el1); \ + break; \ + case 5: \ + write_sysreg(val, reg_name##5_el1); \ + break; \ + case 6: \ + write_sysreg(val, reg_name##6_el1); \ + break; \ + case 7: \ + write_sysreg(val, reg_name##7_el1); \ + break; \ + case 8: \ + write_sysreg(val, reg_name##8_el1); \ + break; \ + case 9: \ + write_sysreg(val, reg_name##9_el1); \ + break; \ + case 10: \ + write_sysreg(val, reg_name##10_el1); \ + break; \ + case 11: \ + write_sysreg(val, reg_name##11_el1); \ + break; \ + case 12: \ + write_sysreg(val, reg_name##12_el1); \ + break; \ + case 13: \ + write_sysreg(val, reg_name##13_el1); \ + break; \ + case 14: \ + write_sysreg(val, reg_name##14_el1); \ + break; \ + case 15: \ + write_sysreg(val, reg_name##15_el1); \ + break; \ + default: \ + GUEST_ASSERT(0); \ + } \ +} + +/* Define write_dbgbcr()/write_dbgbvr()/write_dbgwcr()/write_dbgwvr() */ +GEN_DEBUG_WRITE_REG(dbgbcr) +GEN_DEBUG_WRITE_REG(dbgbvr) +GEN_DEBUG_WRITE_REG(dbgwcr) +GEN_DEBUG_WRITE_REG(dbgwvr) + +static void reset_debug_state(void) +{ + uint8_t brps, wrps, i; + uint64_t dfr0; + + asm volatile("msr daifset, #8"); + + write_sysreg(0, osdlr_el1); + write_sysreg(0, oslar_el1); + isb(); + + write_sysreg(0, mdscr_el1); + write_sysreg(0, contextidr_el1); + + /* Reset all bcr/bvr/wcr/wvr registers */ + dfr0 = read_sysreg(id_aa64dfr0_el1); + brps = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_BRPs), dfr0); + for (i = 0; i <= brps; i++) { + write_dbgbcr(i, 0); + write_dbgbvr(i, 0); + } + wrps = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_WRPs), dfr0); + for (i = 0; i <= wrps; i++) { + write_dbgwcr(i, 0); + write_dbgwvr(i, 0); + } + + isb(); +} + +static void enable_os_lock(void) +{ + write_sysreg(1, oslar_el1); + isb(); + + GUEST_ASSERT(read_sysreg(oslsr_el1) & 2); +} + +static void enable_monitor_debug_exceptions(void) +{ + uint32_t mdscr; + + asm volatile("msr daifclr, #8"); + + mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_MDE; + write_sysreg(mdscr, mdscr_el1); + isb(); +} + +static void install_wp(uint8_t wpn, uint64_t addr) +{ + uint32_t wcr; + + wcr = DBGWCR_LEN8 | DBGWCR_RD | DBGWCR_WR | DBGWCR_EL1 | DBGWCR_E; + write_dbgwcr(wpn, wcr); + write_dbgwvr(wpn, addr); + + isb(); + + enable_monitor_debug_exceptions(); +} + +static void install_hw_bp(uint8_t bpn, uint64_t addr) +{ + uint32_t bcr; + + bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E; + write_dbgbcr(bpn, bcr); + write_dbgbvr(bpn, addr); + isb(); + + enable_monitor_debug_exceptions(); +} + +static void install_wp_ctx(uint8_t addr_wp, uint8_t ctx_bp, uint64_t addr, + uint64_t ctx) +{ + uint32_t wcr; + uint64_t ctx_bcr; + + /* Setup a context-aware breakpoint for Linked Context ID Match */ + ctx_bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E | + DBGBCR_BT_CTX_LINK; + write_dbgbcr(ctx_bp, ctx_bcr); + write_dbgbvr(ctx_bp, ctx); + + /* Setup a linked watchpoint (linked to the context-aware breakpoint) */ + wcr = DBGWCR_LEN8 | DBGWCR_RD | DBGWCR_WR | DBGWCR_EL1 | DBGWCR_E | + DBGWCR_WT_LINK | ((uint32_t)ctx_bp << DBGWCR_LBN_SHIFT); + write_dbgwcr(addr_wp, wcr); + write_dbgwvr(addr_wp, addr); + isb(); + + enable_monitor_debug_exceptions(); +} + +void install_hw_bp_ctx(uint8_t addr_bp, uint8_t ctx_bp, uint64_t addr, + uint64_t ctx) +{ + uint32_t addr_bcr, ctx_bcr; + + /* Setup a context-aware breakpoint for Linked Context ID Match */ + ctx_bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E | + DBGBCR_BT_CTX_LINK; + write_dbgbcr(ctx_bp, ctx_bcr); + write_dbgbvr(ctx_bp, ctx); + + /* + * Setup a normal breakpoint for Linked Address Match, and link it + * to the context-aware breakpoint. + */ + addr_bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E | + DBGBCR_BT_ADDR_LINK_CTX | + ((uint32_t)ctx_bp << DBGBCR_LBN_SHIFT); + write_dbgbcr(addr_bp, addr_bcr); + write_dbgbvr(addr_bp, addr); + isb(); + + enable_monitor_debug_exceptions(); +} + +static void install_ss(void) +{ + uint32_t mdscr; + + asm volatile("msr daifclr, #8"); + + mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_SS; + write_sysreg(mdscr, mdscr_el1); + isb(); +} + +static volatile char write_data; + +static void guest_code(uint8_t bpn, uint8_t wpn, uint8_t ctx_bpn) +{ + uint64_t ctx = 0xabcdef; /* a random context number */ + + /* Software-breakpoint */ + reset_debug_state(); + asm volatile("sw_bp: brk #0"); + GUEST_ASSERT_EQ(sw_bp_addr, PC(sw_bp)); + + /* Hardware-breakpoint */ + reset_debug_state(); + install_hw_bp(bpn, PC(hw_bp)); + asm volatile("hw_bp: nop"); + GUEST_ASSERT_EQ(hw_bp_addr, PC(hw_bp)); + + /* Hardware-breakpoint + svc */ + reset_debug_state(); + install_hw_bp(bpn, PC(bp_svc)); + asm volatile("bp_svc: svc #0"); + GUEST_ASSERT_EQ(hw_bp_addr, PC(bp_svc)); + GUEST_ASSERT_EQ(svc_addr, PC(bp_svc) + 4); + + /* Hardware-breakpoint + software-breakpoint */ + reset_debug_state(); + install_hw_bp(bpn, PC(bp_brk)); + asm volatile("bp_brk: brk #0"); + GUEST_ASSERT_EQ(sw_bp_addr, PC(bp_brk)); + GUEST_ASSERT_EQ(hw_bp_addr, PC(bp_brk)); + + /* Watchpoint */ + reset_debug_state(); + install_wp(wpn, PC(write_data)); + write_data = 'x'; + GUEST_ASSERT_EQ(write_data, 'x'); + GUEST_ASSERT_EQ(wp_data_addr, PC(write_data)); + + /* Single-step */ + reset_debug_state(); + install_ss(); + ss_idx = 0; + asm volatile("ss_start:\n" + "mrs x0, esr_el1\n" + "add x0, x0, #1\n" + "msr daifset, #8\n" + : : : "x0"); + GUEST_ASSERT_EQ(ss_addr[0], PC(ss_start)); + GUEST_ASSERT_EQ(ss_addr[1], PC(ss_start) + 4); + GUEST_ASSERT_EQ(ss_addr[2], PC(ss_start) + 8); + + /* OS Lock does not block software-breakpoint */ + reset_debug_state(); + enable_os_lock(); + sw_bp_addr = 0; + asm volatile("sw_bp2: brk #0"); + GUEST_ASSERT_EQ(sw_bp_addr, PC(sw_bp2)); + + /* OS Lock blocking hardware-breakpoint */ + reset_debug_state(); + enable_os_lock(); + install_hw_bp(bpn, PC(hw_bp2)); + hw_bp_addr = 0; + asm volatile("hw_bp2: nop"); + GUEST_ASSERT_EQ(hw_bp_addr, 0); + + /* OS Lock blocking watchpoint */ + reset_debug_state(); + enable_os_lock(); + write_data = '\0'; + wp_data_addr = 0; + install_wp(wpn, PC(write_data)); + write_data = 'x'; + GUEST_ASSERT_EQ(write_data, 'x'); + GUEST_ASSERT_EQ(wp_data_addr, 0); + + /* OS Lock blocking single-step */ + reset_debug_state(); + enable_os_lock(); + ss_addr[0] = 0; + install_ss(); + ss_idx = 0; + asm volatile("mrs x0, esr_el1\n\t" + "add x0, x0, #1\n\t" + "msr daifset, #8\n\t" + : : : "x0"); + GUEST_ASSERT_EQ(ss_addr[0], 0); + + /* Linked hardware-breakpoint */ + hw_bp_addr = 0; + reset_debug_state(); + install_hw_bp_ctx(bpn, ctx_bpn, PC(hw_bp_ctx), ctx); + /* Set context id */ + write_sysreg(ctx, contextidr_el1); + isb(); + asm volatile("hw_bp_ctx: nop"); + write_sysreg(0, contextidr_el1); + GUEST_ASSERT_EQ(hw_bp_addr, PC(hw_bp_ctx)); + + /* Linked watchpoint */ + reset_debug_state(); + install_wp_ctx(wpn, ctx_bpn, PC(write_data), ctx); + /* Set context id */ + write_sysreg(ctx, contextidr_el1); + isb(); + write_data = 'x'; + GUEST_ASSERT_EQ(write_data, 'x'); + GUEST_ASSERT_EQ(wp_data_addr, PC(write_data)); + + GUEST_DONE(); +} + +static void guest_sw_bp_handler(struct ex_regs *regs) +{ + sw_bp_addr = regs->pc; + regs->pc += 4; +} + +static void guest_hw_bp_handler(struct ex_regs *regs) +{ + hw_bp_addr = regs->pc; + regs->pstate |= SPSR_D; +} + +static void guest_wp_handler(struct ex_regs *regs) +{ + wp_data_addr = read_sysreg(far_el1); + wp_addr = regs->pc; + regs->pstate |= SPSR_D; +} + +static void guest_ss_handler(struct ex_regs *regs) +{ + __GUEST_ASSERT(ss_idx < 4, "Expected index < 4, got '%lu'", ss_idx); + ss_addr[ss_idx++] = regs->pc; + regs->pstate |= SPSR_SS; +} + +static void guest_svc_handler(struct ex_regs *regs) +{ + svc_addr = regs->pc; +} + +static void guest_code_ss(int test_cnt) +{ + uint64_t i; + uint64_t bvr, wvr, w_bvr, w_wvr; + + for (i = 0; i < test_cnt; i++) { + /* Bits [1:0] of dbg{b,w}vr are RES0 */ + w_bvr = i << 2; + w_wvr = i << 2; + + /* + * Enable Single Step execution. Note! This _must_ be a bare + * ucall as the ucall() path uses atomic operations to manage + * the ucall structures, and the built-in "atomics" are usually + * implemented via exclusive access instructions. The exlusive + * monitor is cleared on ERET, and so taking debug exceptions + * during a LDREX=>STREX sequence will prevent forward progress + * and hang the guest/test. + */ + GUEST_UCALL_NONE(); + + /* + * The userspace will verify that the pc is as expected during + * single step execution between iter_ss_begin and iter_ss_end. + */ + asm volatile("iter_ss_begin:nop\n"); + + write_sysreg(w_bvr, dbgbvr0_el1); + write_sysreg(w_wvr, dbgwvr0_el1); + bvr = read_sysreg(dbgbvr0_el1); + wvr = read_sysreg(dbgwvr0_el1); + + /* Userspace disables Single Step when the end is nigh. */ + asm volatile("iter_ss_end:\n"); + + GUEST_ASSERT_EQ(bvr, w_bvr); + GUEST_ASSERT_EQ(wvr, w_wvr); + } + GUEST_DONE(); +} + +static int debug_version(uint64_t id_aa64dfr0) +{ + return FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer), id_aa64dfr0); +} + +static void test_guest_debug_exceptions(uint8_t bpn, uint8_t wpn, uint8_t ctx_bpn) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + struct ucall uc; + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vcpu); + + vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, + ESR_ELx_EC_BRK64, guest_sw_bp_handler); + vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, + ESR_ELx_EC_BREAKPT_CUR, guest_hw_bp_handler); + vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, + ESR_ELx_EC_WATCHPT_CUR, guest_wp_handler); + vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, + ESR_ELx_EC_SOFTSTP_CUR, guest_ss_handler); + vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, + ESR_ELx_EC_SVC64, guest_svc_handler); + + /* Specify bpn/wpn/ctx_bpn to be tested */ + vcpu_args_set(vcpu, 3, bpn, wpn, ctx_bpn); + pr_debug("Use bpn#%d, wpn#%d and ctx_bpn#%d\n", bpn, wpn, ctx_bpn); + + vcpu_run(vcpu); + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + case UCALL_DONE: + goto done; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + +done: + kvm_vm_free(vm); +} + +void test_single_step_from_userspace(int test_cnt) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + struct ucall uc; + struct kvm_run *run; + uint64_t pc, cmd; + uint64_t test_pc = 0; + bool ss_enable = false; + struct kvm_guest_debug debug = {}; + + vm = vm_create_with_one_vcpu(&vcpu, guest_code_ss); + run = vcpu->run; + vcpu_args_set(vcpu, 1, test_cnt); + + while (1) { + vcpu_run(vcpu); + if (run->exit_reason != KVM_EXIT_DEBUG) { + cmd = get_ucall(vcpu, &uc); + if (cmd == UCALL_ABORT) { + REPORT_GUEST_ASSERT(uc); + /* NOT REACHED */ + } else if (cmd == UCALL_DONE) { + break; + } + + TEST_ASSERT(cmd == UCALL_NONE, + "Unexpected ucall cmd 0x%lx", cmd); + + debug.control = KVM_GUESTDBG_ENABLE | + KVM_GUESTDBG_SINGLESTEP; + ss_enable = true; + vcpu_guest_debug_set(vcpu, &debug); + continue; + } + + TEST_ASSERT(ss_enable, "Unexpected KVM_EXIT_DEBUG"); + + /* Check if the current pc is expected. */ + pc = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc)); + TEST_ASSERT(!test_pc || pc == test_pc, + "Unexpected pc 0x%lx (expected 0x%lx)", + pc, test_pc); + + if ((pc + 4) == (uint64_t)&iter_ss_end) { + test_pc = 0; + debug.control = KVM_GUESTDBG_ENABLE; + ss_enable = false; + vcpu_guest_debug_set(vcpu, &debug); + continue; + } + + /* + * If the current pc is between iter_ss_bgin and + * iter_ss_end, the pc for the next KVM_EXIT_DEBUG should + * be the current pc + 4. + */ + if ((pc >= (uint64_t)&iter_ss_begin) && + (pc < (uint64_t)&iter_ss_end)) + test_pc = pc + 4; + else + test_pc = 0; + } + + kvm_vm_free(vm); +} + +/* + * Run debug testing using the various breakpoint#, watchpoint# and + * context-aware breakpoint# with the given ID_AA64DFR0_EL1 configuration. + */ +void test_guest_debug_exceptions_all(uint64_t aa64dfr0) +{ + uint8_t brp_num, wrp_num, ctx_brp_num, normal_brp_num, ctx_brp_base; + int b, w, c; + + /* Number of breakpoints */ + brp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_BRPs), aa64dfr0) + 1; + __TEST_REQUIRE(brp_num >= 2, "At least two breakpoints are required"); + + /* Number of watchpoints */ + wrp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_WRPs), aa64dfr0) + 1; + + /* Number of context aware breakpoints */ + ctx_brp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_CTX_CMPs), aa64dfr0) + 1; + + pr_debug("%s brp_num:%d, wrp_num:%d, ctx_brp_num:%d\n", __func__, + brp_num, wrp_num, ctx_brp_num); + + /* Number of normal (non-context aware) breakpoints */ + normal_brp_num = brp_num - ctx_brp_num; + + /* Lowest context aware breakpoint number */ + ctx_brp_base = normal_brp_num; + + /* Run tests with all supported breakpoints/watchpoints */ + for (c = ctx_brp_base; c < ctx_brp_base + ctx_brp_num; c++) { + for (b = 0; b < normal_brp_num; b++) { + for (w = 0; w < wrp_num; w++) + test_guest_debug_exceptions(b, w, c); + } + } +} + +static void help(char *name) +{ + puts(""); + printf("Usage: %s [-h] [-i iterations of the single step test]\n", name); + puts(""); + exit(0); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + int opt; + int ss_iteration = 10000; + uint64_t aa64dfr0; + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + aa64dfr0 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1)); + __TEST_REQUIRE(debug_version(aa64dfr0) >= 6, + "Armv8 debug architecture not supported."); + kvm_vm_free(vm); + + while ((opt = getopt(argc, argv, "i:")) != -1) { + switch (opt) { + case 'i': + ss_iteration = atoi_positive("Number of iterations", optarg); + break; + case 'h': + default: + help(argv[0]); + break; + } + } + + test_guest_debug_exceptions_all(aa64dfr0); + test_single_step_from_userspace(ss_iteration); + + return 0; +} diff --git a/tools/testing/selftests/kvm/arm64/get-reg-list.c b/tools/testing/selftests/kvm/arm64/get-reg-list.c new file mode 100644 index 000000000000..d43fb3f49050 --- /dev/null +++ b/tools/testing/selftests/kvm/arm64/get-reg-list.c @@ -0,0 +1,771 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Check for KVM_GET_REG_LIST regressions. + * + * Copyright (C) 2020, Red Hat, Inc. + * + * While the blessed list should be created from the oldest possible + * kernel, we can't go older than v5.2, though, because that's the first + * release which includes df205b5c6328 ("KVM: arm64: Filter out invalid + * core register IDs in KVM_GET_REG_LIST"). Without that commit the core + * registers won't match expectations. + */ +#include +#include "kvm_util.h" +#include "test_util.h" +#include "processor.h" + +struct feature_id_reg { + __u64 reg; + __u64 id_reg; + __u64 feat_shift; + __u64 feat_min; +}; + +static struct feature_id_reg feat_id_regs[] = { + { + ARM64_SYS_REG(3, 0, 2, 0, 3), /* TCR2_EL1 */ + ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ + 0, + 1 + }, + { + ARM64_SYS_REG(3, 0, 10, 2, 2), /* PIRE0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ + 8, + 1 + }, + { + ARM64_SYS_REG(3, 0, 10, 2, 3), /* PIR_EL1 */ + ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ + 8, + 1 + }, + { + ARM64_SYS_REG(3, 0, 10, 2, 4), /* POR_EL1 */ + ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ + 16, + 1 + }, + { + ARM64_SYS_REG(3, 3, 10, 2, 4), /* POR_EL0 */ + ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ + 16, + 1 + } +}; + +bool filter_reg(__u64 reg) +{ + /* + * DEMUX register presence depends on the host's CLIDR_EL1. + * This means there's no set of them that we can bless. + */ + if ((reg & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX) + return true; + + return false; +} + +static bool check_supported_feat_reg(struct kvm_vcpu *vcpu, __u64 reg) +{ + int i, ret; + __u64 data, feat_val; + + for (i = 0; i < ARRAY_SIZE(feat_id_regs); i++) { + if (feat_id_regs[i].reg == reg) { + ret = __vcpu_get_reg(vcpu, feat_id_regs[i].id_reg, &data); + if (ret < 0) + return false; + + feat_val = ((data >> feat_id_regs[i].feat_shift) & 0xf); + return feat_val >= feat_id_regs[i].feat_min; + } + } + + return true; +} + +bool check_supported_reg(struct kvm_vcpu *vcpu, __u64 reg) +{ + return check_supported_feat_reg(vcpu, reg); +} + +bool check_reject_set(int err) +{ + return err == EPERM; +} + +void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c) +{ + struct vcpu_reg_sublist *s; + int feature; + + for_each_sublist(c, s) { + if (s->finalize) { + feature = s->feature; + vcpu_ioctl(vcpu, KVM_ARM_VCPU_FINALIZE, &feature); + } + } +} + +#define REG_MASK (KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_COPROC_MASK) + +#define CORE_REGS_XX_NR_WORDS 2 +#define CORE_SPSR_XX_NR_WORDS 2 +#define CORE_FPREGS_XX_NR_WORDS 4 + +static const char *core_id_to_str(const char *prefix, __u64 id) +{ + __u64 core_off = id & ~REG_MASK, idx; + + /* + * core_off is the offset into struct kvm_regs + */ + switch (core_off) { + case KVM_REG_ARM_CORE_REG(regs.regs[0]) ... + KVM_REG_ARM_CORE_REG(regs.regs[30]): + idx = (core_off - KVM_REG_ARM_CORE_REG(regs.regs[0])) / CORE_REGS_XX_NR_WORDS; + TEST_ASSERT(idx < 31, "%s: Unexpected regs.regs index: %lld", prefix, idx); + return strdup_printf("KVM_REG_ARM_CORE_REG(regs.regs[%lld])", idx); + case KVM_REG_ARM_CORE_REG(regs.sp): + return "KVM_REG_ARM_CORE_REG(regs.sp)"; + case KVM_REG_ARM_CORE_REG(regs.pc): + return "KVM_REG_ARM_CORE_REG(regs.pc)"; + case KVM_REG_ARM_CORE_REG(regs.pstate): + return "KVM_REG_ARM_CORE_REG(regs.pstate)"; + case KVM_REG_ARM_CORE_REG(sp_el1): + return "KVM_REG_ARM_CORE_REG(sp_el1)"; + case KVM_REG_ARM_CORE_REG(elr_el1): + return "KVM_REG_ARM_CORE_REG(elr_el1)"; + case KVM_REG_ARM_CORE_REG(spsr[0]) ... + KVM_REG_ARM_CORE_REG(spsr[KVM_NR_SPSR - 1]): + idx = (core_off - KVM_REG_ARM_CORE_REG(spsr[0])) / CORE_SPSR_XX_NR_WORDS; + TEST_ASSERT(idx < KVM_NR_SPSR, "%s: Unexpected spsr index: %lld", prefix, idx); + return strdup_printf("KVM_REG_ARM_CORE_REG(spsr[%lld])", idx); + case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ... + KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]): + idx = (core_off - KVM_REG_ARM_CORE_REG(fp_regs.vregs[0])) / CORE_FPREGS_XX_NR_WORDS; + TEST_ASSERT(idx < 32, "%s: Unexpected fp_regs.vregs index: %lld", prefix, idx); + return strdup_printf("KVM_REG_ARM_CORE_REG(fp_regs.vregs[%lld])", idx); + case KVM_REG_ARM_CORE_REG(fp_regs.fpsr): + return "KVM_REG_ARM_CORE_REG(fp_regs.fpsr)"; + case KVM_REG_ARM_CORE_REG(fp_regs.fpcr): + return "KVM_REG_ARM_CORE_REG(fp_regs.fpcr)"; + } + + TEST_FAIL("%s: Unknown core reg id: 0x%llx", prefix, id); + return NULL; +} + +static const char *sve_id_to_str(const char *prefix, __u64 id) +{ + __u64 sve_off, n, i; + + if (id == KVM_REG_ARM64_SVE_VLS) + return "KVM_REG_ARM64_SVE_VLS"; + + sve_off = id & ~(REG_MASK | ((1ULL << 5) - 1)); + i = id & (KVM_ARM64_SVE_MAX_SLICES - 1); + + TEST_ASSERT(i == 0, "%s: Currently we don't expect slice > 0, reg id 0x%llx", prefix, id); + + switch (sve_off) { + case KVM_REG_ARM64_SVE_ZREG_BASE ... + KVM_REG_ARM64_SVE_ZREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_ZREGS - 1: + n = (id >> 5) & (KVM_ARM64_SVE_NUM_ZREGS - 1); + TEST_ASSERT(id == KVM_REG_ARM64_SVE_ZREG(n, 0), + "%s: Unexpected bits set in SVE ZREG id: 0x%llx", prefix, id); + return strdup_printf("KVM_REG_ARM64_SVE_ZREG(%lld, 0)", n); + case KVM_REG_ARM64_SVE_PREG_BASE ... + KVM_REG_ARM64_SVE_PREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_PREGS - 1: + n = (id >> 5) & (KVM_ARM64_SVE_NUM_PREGS - 1); + TEST_ASSERT(id == KVM_REG_ARM64_SVE_PREG(n, 0), + "%s: Unexpected bits set in SVE PREG id: 0x%llx", prefix, id); + return strdup_printf("KVM_REG_ARM64_SVE_PREG(%lld, 0)", n); + case KVM_REG_ARM64_SVE_FFR_BASE: + TEST_ASSERT(id == KVM_REG_ARM64_SVE_FFR(0), + "%s: Unexpected bits set in SVE FFR id: 0x%llx", prefix, id); + return "KVM_REG_ARM64_SVE_FFR(0)"; + } + + return NULL; +} + +void print_reg(const char *prefix, __u64 id) +{ + unsigned op0, op1, crn, crm, op2; + const char *reg_size = NULL; + + TEST_ASSERT((id & KVM_REG_ARCH_MASK) == KVM_REG_ARM64, + "%s: KVM_REG_ARM64 missing in reg id: 0x%llx", prefix, id); + + switch (id & KVM_REG_SIZE_MASK) { + case KVM_REG_SIZE_U8: + reg_size = "KVM_REG_SIZE_U8"; + break; + case KVM_REG_SIZE_U16: + reg_size = "KVM_REG_SIZE_U16"; + break; + case KVM_REG_SIZE_U32: + reg_size = "KVM_REG_SIZE_U32"; + break; + case KVM_REG_SIZE_U64: + reg_size = "KVM_REG_SIZE_U64"; + break; + case KVM_REG_SIZE_U128: + reg_size = "KVM_REG_SIZE_U128"; + break; + case KVM_REG_SIZE_U256: + reg_size = "KVM_REG_SIZE_U256"; + break; + case KVM_REG_SIZE_U512: + reg_size = "KVM_REG_SIZE_U512"; + break; + case KVM_REG_SIZE_U1024: + reg_size = "KVM_REG_SIZE_U1024"; + break; + case KVM_REG_SIZE_U2048: + reg_size = "KVM_REG_SIZE_U2048"; + break; + default: + TEST_FAIL("%s: Unexpected reg size: 0x%llx in reg id: 0x%llx", + prefix, (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id); + } + + switch (id & KVM_REG_ARM_COPROC_MASK) { + case KVM_REG_ARM_CORE: + printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_CORE | %s,\n", reg_size, core_id_to_str(prefix, id)); + break; + case KVM_REG_ARM_DEMUX: + TEST_ASSERT(!(id & ~(REG_MASK | KVM_REG_ARM_DEMUX_ID_MASK | KVM_REG_ARM_DEMUX_VAL_MASK)), + "%s: Unexpected bits set in DEMUX reg id: 0x%llx", prefix, id); + printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | %lld,\n", + reg_size, id & KVM_REG_ARM_DEMUX_VAL_MASK); + break; + case KVM_REG_ARM64_SYSREG: + op0 = (id & KVM_REG_ARM64_SYSREG_OP0_MASK) >> KVM_REG_ARM64_SYSREG_OP0_SHIFT; + op1 = (id & KVM_REG_ARM64_SYSREG_OP1_MASK) >> KVM_REG_ARM64_SYSREG_OP1_SHIFT; + crn = (id & KVM_REG_ARM64_SYSREG_CRN_MASK) >> KVM_REG_ARM64_SYSREG_CRN_SHIFT; + crm = (id & KVM_REG_ARM64_SYSREG_CRM_MASK) >> KVM_REG_ARM64_SYSREG_CRM_SHIFT; + op2 = (id & KVM_REG_ARM64_SYSREG_OP2_MASK) >> KVM_REG_ARM64_SYSREG_OP2_SHIFT; + TEST_ASSERT(id == ARM64_SYS_REG(op0, op1, crn, crm, op2), + "%s: Unexpected bits set in SYSREG reg id: 0x%llx", prefix, id); + printf("\tARM64_SYS_REG(%d, %d, %d, %d, %d),\n", op0, op1, crn, crm, op2); + break; + case KVM_REG_ARM_FW: + TEST_ASSERT(id == KVM_REG_ARM_FW_REG(id & 0xffff), + "%s: Unexpected bits set in FW reg id: 0x%llx", prefix, id); + printf("\tKVM_REG_ARM_FW_REG(%lld),\n", id & 0xffff); + break; + case KVM_REG_ARM_FW_FEAT_BMAP: + TEST_ASSERT(id == KVM_REG_ARM_FW_FEAT_BMAP_REG(id & 0xffff), + "%s: Unexpected bits set in the bitmap feature FW reg id: 0x%llx", prefix, id); + printf("\tKVM_REG_ARM_FW_FEAT_BMAP_REG(%lld),\n", id & 0xffff); + break; + case KVM_REG_ARM64_SVE: + printf("\t%s,\n", sve_id_to_str(prefix, id)); + break; + default: + TEST_FAIL("%s: Unexpected coproc type: 0x%llx in reg id: 0x%llx", + prefix, (id & KVM_REG_ARM_COPROC_MASK) >> KVM_REG_ARM_COPROC_SHIFT, id); + } +} + +/* + * The original blessed list was primed with the output of kernel version + * v4.15 with --core-reg-fixup and then later updated with new registers. + * (The --core-reg-fixup option and it's fixup function have been removed + * from the test, as it's unlikely to use this type of test on a kernel + * older than v5.2.) + * + * The blessed list is up to date with kernel version v6.4 (or so we hope) + */ +static __u64 base_regs[] = { + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[0]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[1]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[2]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[3]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[4]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[5]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[6]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[7]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[8]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[9]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[10]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[11]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[12]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[13]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[14]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[15]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[16]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[17]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[18]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[19]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[20]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[21]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[22]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[23]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[24]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[25]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[26]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[27]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[28]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[29]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[30]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.sp), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.pc), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.pstate), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(sp_el1), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(elr_el1), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[0]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[1]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[2]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[3]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[4]), + KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.fpsr), + KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.fpcr), + KVM_REG_ARM_FW_REG(0), /* KVM_REG_ARM_PSCI_VERSION */ + KVM_REG_ARM_FW_REG(1), /* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1 */ + KVM_REG_ARM_FW_REG(2), /* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2 */ + KVM_REG_ARM_FW_REG(3), /* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3 */ + KVM_REG_ARM_FW_FEAT_BMAP_REG(0), /* KVM_REG_ARM_STD_BMAP */ + KVM_REG_ARM_FW_FEAT_BMAP_REG(1), /* KVM_REG_ARM_STD_HYP_BMAP */ + KVM_REG_ARM_FW_FEAT_BMAP_REG(2), /* KVM_REG_ARM_VENDOR_HYP_BMAP */ + ARM64_SYS_REG(3, 3, 14, 3, 1), /* CNTV_CTL_EL0 */ + ARM64_SYS_REG(3, 3, 14, 3, 2), /* CNTV_CVAL_EL0 */ + ARM64_SYS_REG(3, 3, 14, 0, 2), + ARM64_SYS_REG(3, 0, 0, 0, 0), /* MIDR_EL1 */ + ARM64_SYS_REG(3, 0, 0, 0, 6), /* REVIDR_EL1 */ + ARM64_SYS_REG(3, 1, 0, 0, 1), /* CLIDR_EL1 */ + ARM64_SYS_REG(3, 1, 0, 0, 7), /* AIDR_EL1 */ + ARM64_SYS_REG(3, 3, 0, 0, 1), /* CTR_EL0 */ + ARM64_SYS_REG(2, 0, 0, 0, 4), + ARM64_SYS_REG(2, 0, 0, 0, 5), + ARM64_SYS_REG(2, 0, 0, 0, 6), + ARM64_SYS_REG(2, 0, 0, 0, 7), + ARM64_SYS_REG(2, 0, 0, 1, 4), + ARM64_SYS_REG(2, 0, 0, 1, 5), + ARM64_SYS_REG(2, 0, 0, 1, 6), + ARM64_SYS_REG(2, 0, 0, 1, 7), + ARM64_SYS_REG(2, 0, 0, 2, 0), /* MDCCINT_EL1 */ + ARM64_SYS_REG(2, 0, 0, 2, 2), /* MDSCR_EL1 */ + ARM64_SYS_REG(2, 0, 0, 2, 4), + ARM64_SYS_REG(2, 0, 0, 2, 5), + ARM64_SYS_REG(2, 0, 0, 2, 6), + ARM64_SYS_REG(2, 0, 0, 2, 7), + ARM64_SYS_REG(2, 0, 0, 3, 4), + ARM64_SYS_REG(2, 0, 0, 3, 5), + ARM64_SYS_REG(2, 0, 0, 3, 6), + ARM64_SYS_REG(2, 0, 0, 3, 7), + ARM64_SYS_REG(2, 0, 0, 4, 4), + ARM64_SYS_REG(2, 0, 0, 4, 5), + ARM64_SYS_REG(2, 0, 0, 4, 6), + ARM64_SYS_REG(2, 0, 0, 4, 7), + ARM64_SYS_REG(2, 0, 0, 5, 4), + ARM64_SYS_REG(2, 0, 0, 5, 5), + ARM64_SYS_REG(2, 0, 0, 5, 6), + ARM64_SYS_REG(2, 0, 0, 5, 7), + ARM64_SYS_REG(2, 0, 0, 6, 4), + ARM64_SYS_REG(2, 0, 0, 6, 5), + ARM64_SYS_REG(2, 0, 0, 6, 6), + ARM64_SYS_REG(2, 0, 0, 6, 7), + ARM64_SYS_REG(2, 0, 0, 7, 4), + ARM64_SYS_REG(2, 0, 0, 7, 5), + ARM64_SYS_REG(2, 0, 0, 7, 6), + ARM64_SYS_REG(2, 0, 0, 7, 7), + ARM64_SYS_REG(2, 0, 0, 8, 4), + ARM64_SYS_REG(2, 0, 0, 8, 5), + ARM64_SYS_REG(2, 0, 0, 8, 6), + ARM64_SYS_REG(2, 0, 0, 8, 7), + ARM64_SYS_REG(2, 0, 0, 9, 4), + ARM64_SYS_REG(2, 0, 0, 9, 5), + ARM64_SYS_REG(2, 0, 0, 9, 6), + ARM64_SYS_REG(2, 0, 0, 9, 7), + ARM64_SYS_REG(2, 0, 0, 10, 4), + ARM64_SYS_REG(2, 0, 0, 10, 5), + ARM64_SYS_REG(2, 0, 0, 10, 6), + ARM64_SYS_REG(2, 0, 0, 10, 7), + ARM64_SYS_REG(2, 0, 0, 11, 4), + ARM64_SYS_REG(2, 0, 0, 11, 5), + ARM64_SYS_REG(2, 0, 0, 11, 6), + ARM64_SYS_REG(2, 0, 0, 11, 7), + ARM64_SYS_REG(2, 0, 0, 12, 4), + ARM64_SYS_REG(2, 0, 0, 12, 5), + ARM64_SYS_REG(2, 0, 0, 12, 6), + ARM64_SYS_REG(2, 0, 0, 12, 7), + ARM64_SYS_REG(2, 0, 0, 13, 4), + ARM64_SYS_REG(2, 0, 0, 13, 5), + ARM64_SYS_REG(2, 0, 0, 13, 6), + ARM64_SYS_REG(2, 0, 0, 13, 7), + ARM64_SYS_REG(2, 0, 0, 14, 4), + ARM64_SYS_REG(2, 0, 0, 14, 5), + ARM64_SYS_REG(2, 0, 0, 14, 6), + ARM64_SYS_REG(2, 0, 0, 14, 7), + ARM64_SYS_REG(2, 0, 0, 15, 4), + ARM64_SYS_REG(2, 0, 0, 15, 5), + ARM64_SYS_REG(2, 0, 0, 15, 6), + ARM64_SYS_REG(2, 0, 0, 15, 7), + ARM64_SYS_REG(2, 0, 1, 1, 4), /* OSLSR_EL1 */ + ARM64_SYS_REG(2, 4, 0, 7, 0), /* DBGVCR32_EL2 */ + ARM64_SYS_REG(3, 0, 0, 0, 5), /* MPIDR_EL1 */ + ARM64_SYS_REG(3, 0, 0, 1, 0), /* ID_PFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 1, 1), /* ID_PFR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 1, 2), /* ID_DFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 1, 3), /* ID_AFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 1, 4), /* ID_MMFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 1, 5), /* ID_MMFR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 1, 6), /* ID_MMFR2_EL1 */ + ARM64_SYS_REG(3, 0, 0, 1, 7), /* ID_MMFR3_EL1 */ + ARM64_SYS_REG(3, 0, 0, 2, 0), /* ID_ISAR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 2, 1), /* ID_ISAR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 2, 2), /* ID_ISAR2_EL1 */ + ARM64_SYS_REG(3, 0, 0, 2, 3), /* ID_ISAR3_EL1 */ + ARM64_SYS_REG(3, 0, 0, 2, 4), /* ID_ISAR4_EL1 */ + ARM64_SYS_REG(3, 0, 0, 2, 5), /* ID_ISAR5_EL1 */ + ARM64_SYS_REG(3, 0, 0, 2, 6), /* ID_MMFR4_EL1 */ + ARM64_SYS_REG(3, 0, 0, 2, 7), /* ID_ISAR6_EL1 */ + ARM64_SYS_REG(3, 0, 0, 3, 0), /* MVFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 3, 1), /* MVFR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 3, 2), /* MVFR2_EL1 */ + ARM64_SYS_REG(3, 0, 0, 3, 3), + ARM64_SYS_REG(3, 0, 0, 3, 4), /* ID_PFR2_EL1 */ + ARM64_SYS_REG(3, 0, 0, 3, 5), /* ID_DFR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 3, 6), /* ID_MMFR5_EL1 */ + ARM64_SYS_REG(3, 0, 0, 3, 7), + ARM64_SYS_REG(3, 0, 0, 4, 0), /* ID_AA64PFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 4, 1), /* ID_AA64PFR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 4, 2), /* ID_AA64PFR2_EL1 */ + ARM64_SYS_REG(3, 0, 0, 4, 3), + ARM64_SYS_REG(3, 0, 0, 4, 4), /* ID_AA64ZFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 4, 5), /* ID_AA64SMFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 4, 6), + ARM64_SYS_REG(3, 0, 0, 4, 7), + ARM64_SYS_REG(3, 0, 0, 5, 0), /* ID_AA64DFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 5, 1), /* ID_AA64DFR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 5, 2), + ARM64_SYS_REG(3, 0, 0, 5, 3), + ARM64_SYS_REG(3, 0, 0, 5, 4), /* ID_AA64AFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 5, 5), /* ID_AA64AFR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 5, 6), + ARM64_SYS_REG(3, 0, 0, 5, 7), + ARM64_SYS_REG(3, 0, 0, 6, 0), /* ID_AA64ISAR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 6, 1), /* ID_AA64ISAR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 6, 2), /* ID_AA64ISAR2_EL1 */ + ARM64_SYS_REG(3, 0, 0, 6, 3), + ARM64_SYS_REG(3, 0, 0, 6, 4), + ARM64_SYS_REG(3, 0, 0, 6, 5), + ARM64_SYS_REG(3, 0, 0, 6, 6), + ARM64_SYS_REG(3, 0, 0, 6, 7), + ARM64_SYS_REG(3, 0, 0, 7, 0), /* ID_AA64MMFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 7, 1), /* ID_AA64MMFR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 7, 2), /* ID_AA64MMFR2_EL1 */ + ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ + ARM64_SYS_REG(3, 0, 0, 7, 4), /* ID_AA64MMFR4_EL1 */ + ARM64_SYS_REG(3, 0, 0, 7, 5), + ARM64_SYS_REG(3, 0, 0, 7, 6), + ARM64_SYS_REG(3, 0, 0, 7, 7), + ARM64_SYS_REG(3, 0, 1, 0, 0), /* SCTLR_EL1 */ + ARM64_SYS_REG(3, 0, 1, 0, 1), /* ACTLR_EL1 */ + ARM64_SYS_REG(3, 0, 1, 0, 2), /* CPACR_EL1 */ + ARM64_SYS_REG(3, 0, 2, 0, 0), /* TTBR0_EL1 */ + ARM64_SYS_REG(3, 0, 2, 0, 1), /* TTBR1_EL1 */ + ARM64_SYS_REG(3, 0, 2, 0, 2), /* TCR_EL1 */ + ARM64_SYS_REG(3, 0, 2, 0, 3), /* TCR2_EL1 */ + ARM64_SYS_REG(3, 0, 5, 1, 0), /* AFSR0_EL1 */ + ARM64_SYS_REG(3, 0, 5, 1, 1), /* AFSR1_EL1 */ + ARM64_SYS_REG(3, 0, 5, 2, 0), /* ESR_EL1 */ + ARM64_SYS_REG(3, 0, 6, 0, 0), /* FAR_EL1 */ + ARM64_SYS_REG(3, 0, 7, 4, 0), /* PAR_EL1 */ + ARM64_SYS_REG(3, 0, 10, 2, 0), /* MAIR_EL1 */ + ARM64_SYS_REG(3, 0, 10, 2, 2), /* PIRE0_EL1 */ + ARM64_SYS_REG(3, 0, 10, 2, 3), /* PIR_EL1 */ + ARM64_SYS_REG(3, 0, 10, 2, 4), /* POR_EL1 */ + ARM64_SYS_REG(3, 0, 10, 3, 0), /* AMAIR_EL1 */ + ARM64_SYS_REG(3, 0, 12, 0, 0), /* VBAR_EL1 */ + ARM64_SYS_REG(3, 0, 12, 1, 1), /* DISR_EL1 */ + ARM64_SYS_REG(3, 0, 13, 0, 1), /* CONTEXTIDR_EL1 */ + ARM64_SYS_REG(3, 0, 13, 0, 4), /* TPIDR_EL1 */ + ARM64_SYS_REG(3, 0, 14, 1, 0), /* CNTKCTL_EL1 */ + ARM64_SYS_REG(3, 2, 0, 0, 0), /* CSSELR_EL1 */ + ARM64_SYS_REG(3, 3, 10, 2, 4), /* POR_EL0 */ + ARM64_SYS_REG(3, 3, 13, 0, 2), /* TPIDR_EL0 */ + ARM64_SYS_REG(3, 3, 13, 0, 3), /* TPIDRRO_EL0 */ + ARM64_SYS_REG(3, 3, 14, 0, 1), /* CNTPCT_EL0 */ + ARM64_SYS_REG(3, 3, 14, 2, 1), /* CNTP_CTL_EL0 */ + ARM64_SYS_REG(3, 3, 14, 2, 2), /* CNTP_CVAL_EL0 */ + ARM64_SYS_REG(3, 4, 3, 0, 0), /* DACR32_EL2 */ + ARM64_SYS_REG(3, 4, 5, 0, 1), /* IFSR32_EL2 */ + ARM64_SYS_REG(3, 4, 5, 3, 0), /* FPEXC32_EL2 */ +}; + +static __u64 pmu_regs[] = { + ARM64_SYS_REG(3, 0, 9, 14, 1), /* PMINTENSET_EL1 */ + ARM64_SYS_REG(3, 0, 9, 14, 2), /* PMINTENCLR_EL1 */ + ARM64_SYS_REG(3, 3, 9, 12, 0), /* PMCR_EL0 */ + ARM64_SYS_REG(3, 3, 9, 12, 1), /* PMCNTENSET_EL0 */ + ARM64_SYS_REG(3, 3, 9, 12, 2), /* PMCNTENCLR_EL0 */ + ARM64_SYS_REG(3, 3, 9, 12, 3), /* PMOVSCLR_EL0 */ + ARM64_SYS_REG(3, 3, 9, 12, 4), /* PMSWINC_EL0 */ + ARM64_SYS_REG(3, 3, 9, 12, 5), /* PMSELR_EL0 */ + ARM64_SYS_REG(3, 3, 9, 13, 0), /* PMCCNTR_EL0 */ + ARM64_SYS_REG(3, 3, 9, 14, 0), /* PMUSERENR_EL0 */ + ARM64_SYS_REG(3, 3, 9, 14, 3), /* PMOVSSET_EL0 */ + ARM64_SYS_REG(3, 3, 14, 8, 0), + ARM64_SYS_REG(3, 3, 14, 8, 1), + ARM64_SYS_REG(3, 3, 14, 8, 2), + ARM64_SYS_REG(3, 3, 14, 8, 3), + ARM64_SYS_REG(3, 3, 14, 8, 4), + ARM64_SYS_REG(3, 3, 14, 8, 5), + ARM64_SYS_REG(3, 3, 14, 8, 6), + ARM64_SYS_REG(3, 3, 14, 8, 7), + ARM64_SYS_REG(3, 3, 14, 9, 0), + ARM64_SYS_REG(3, 3, 14, 9, 1), + ARM64_SYS_REG(3, 3, 14, 9, 2), + ARM64_SYS_REG(3, 3, 14, 9, 3), + ARM64_SYS_REG(3, 3, 14, 9, 4), + ARM64_SYS_REG(3, 3, 14, 9, 5), + ARM64_SYS_REG(3, 3, 14, 9, 6), + ARM64_SYS_REG(3, 3, 14, 9, 7), + ARM64_SYS_REG(3, 3, 14, 10, 0), + ARM64_SYS_REG(3, 3, 14, 10, 1), + ARM64_SYS_REG(3, 3, 14, 10, 2), + ARM64_SYS_REG(3, 3, 14, 10, 3), + ARM64_SYS_REG(3, 3, 14, 10, 4), + ARM64_SYS_REG(3, 3, 14, 10, 5), + ARM64_SYS_REG(3, 3, 14, 10, 6), + ARM64_SYS_REG(3, 3, 14, 10, 7), + ARM64_SYS_REG(3, 3, 14, 11, 0), + ARM64_SYS_REG(3, 3, 14, 11, 1), + ARM64_SYS_REG(3, 3, 14, 11, 2), + ARM64_SYS_REG(3, 3, 14, 11, 3), + ARM64_SYS_REG(3, 3, 14, 11, 4), + ARM64_SYS_REG(3, 3, 14, 11, 5), + ARM64_SYS_REG(3, 3, 14, 11, 6), + ARM64_SYS_REG(3, 3, 14, 12, 0), + ARM64_SYS_REG(3, 3, 14, 12, 1), + ARM64_SYS_REG(3, 3, 14, 12, 2), + ARM64_SYS_REG(3, 3, 14, 12, 3), + ARM64_SYS_REG(3, 3, 14, 12, 4), + ARM64_SYS_REG(3, 3, 14, 12, 5), + ARM64_SYS_REG(3, 3, 14, 12, 6), + ARM64_SYS_REG(3, 3, 14, 12, 7), + ARM64_SYS_REG(3, 3, 14, 13, 0), + ARM64_SYS_REG(3, 3, 14, 13, 1), + ARM64_SYS_REG(3, 3, 14, 13, 2), + ARM64_SYS_REG(3, 3, 14, 13, 3), + ARM64_SYS_REG(3, 3, 14, 13, 4), + ARM64_SYS_REG(3, 3, 14, 13, 5), + ARM64_SYS_REG(3, 3, 14, 13, 6), + ARM64_SYS_REG(3, 3, 14, 13, 7), + ARM64_SYS_REG(3, 3, 14, 14, 0), + ARM64_SYS_REG(3, 3, 14, 14, 1), + ARM64_SYS_REG(3, 3, 14, 14, 2), + ARM64_SYS_REG(3, 3, 14, 14, 3), + ARM64_SYS_REG(3, 3, 14, 14, 4), + ARM64_SYS_REG(3, 3, 14, 14, 5), + ARM64_SYS_REG(3, 3, 14, 14, 6), + ARM64_SYS_REG(3, 3, 14, 14, 7), + ARM64_SYS_REG(3, 3, 14, 15, 0), + ARM64_SYS_REG(3, 3, 14, 15, 1), + ARM64_SYS_REG(3, 3, 14, 15, 2), + ARM64_SYS_REG(3, 3, 14, 15, 3), + ARM64_SYS_REG(3, 3, 14, 15, 4), + ARM64_SYS_REG(3, 3, 14, 15, 5), + ARM64_SYS_REG(3, 3, 14, 15, 6), + ARM64_SYS_REG(3, 3, 14, 15, 7), /* PMCCFILTR_EL0 */ +}; + +static __u64 vregs[] = { + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[1]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[2]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[3]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[4]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[5]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[6]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[7]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[8]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[9]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[10]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[11]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[12]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[13]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[14]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[15]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[16]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[17]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[18]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[19]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[20]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[21]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[22]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[23]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[24]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[25]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[26]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[27]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[28]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[29]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[30]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]), +}; + +static __u64 sve_regs[] = { + KVM_REG_ARM64_SVE_VLS, + KVM_REG_ARM64_SVE_ZREG(0, 0), + KVM_REG_ARM64_SVE_ZREG(1, 0), + KVM_REG_ARM64_SVE_ZREG(2, 0), + KVM_REG_ARM64_SVE_ZREG(3, 0), + KVM_REG_ARM64_SVE_ZREG(4, 0), + KVM_REG_ARM64_SVE_ZREG(5, 0), + KVM_REG_ARM64_SVE_ZREG(6, 0), + KVM_REG_ARM64_SVE_ZREG(7, 0), + KVM_REG_ARM64_SVE_ZREG(8, 0), + KVM_REG_ARM64_SVE_ZREG(9, 0), + KVM_REG_ARM64_SVE_ZREG(10, 0), + KVM_REG_ARM64_SVE_ZREG(11, 0), + KVM_REG_ARM64_SVE_ZREG(12, 0), + KVM_REG_ARM64_SVE_ZREG(13, 0), + KVM_REG_ARM64_SVE_ZREG(14, 0), + KVM_REG_ARM64_SVE_ZREG(15, 0), + KVM_REG_ARM64_SVE_ZREG(16, 0), + KVM_REG_ARM64_SVE_ZREG(17, 0), + KVM_REG_ARM64_SVE_ZREG(18, 0), + KVM_REG_ARM64_SVE_ZREG(19, 0), + KVM_REG_ARM64_SVE_ZREG(20, 0), + KVM_REG_ARM64_SVE_ZREG(21, 0), + KVM_REG_ARM64_SVE_ZREG(22, 0), + KVM_REG_ARM64_SVE_ZREG(23, 0), + KVM_REG_ARM64_SVE_ZREG(24, 0), + KVM_REG_ARM64_SVE_ZREG(25, 0), + KVM_REG_ARM64_SVE_ZREG(26, 0), + KVM_REG_ARM64_SVE_ZREG(27, 0), + KVM_REG_ARM64_SVE_ZREG(28, 0), + KVM_REG_ARM64_SVE_ZREG(29, 0), + KVM_REG_ARM64_SVE_ZREG(30, 0), + KVM_REG_ARM64_SVE_ZREG(31, 0), + KVM_REG_ARM64_SVE_PREG(0, 0), + KVM_REG_ARM64_SVE_PREG(1, 0), + KVM_REG_ARM64_SVE_PREG(2, 0), + KVM_REG_ARM64_SVE_PREG(3, 0), + KVM_REG_ARM64_SVE_PREG(4, 0), + KVM_REG_ARM64_SVE_PREG(5, 0), + KVM_REG_ARM64_SVE_PREG(6, 0), + KVM_REG_ARM64_SVE_PREG(7, 0), + KVM_REG_ARM64_SVE_PREG(8, 0), + KVM_REG_ARM64_SVE_PREG(9, 0), + KVM_REG_ARM64_SVE_PREG(10, 0), + KVM_REG_ARM64_SVE_PREG(11, 0), + KVM_REG_ARM64_SVE_PREG(12, 0), + KVM_REG_ARM64_SVE_PREG(13, 0), + KVM_REG_ARM64_SVE_PREG(14, 0), + KVM_REG_ARM64_SVE_PREG(15, 0), + KVM_REG_ARM64_SVE_FFR(0), + ARM64_SYS_REG(3, 0, 1, 2, 0), /* ZCR_EL1 */ +}; + +static __u64 sve_rejects_set[] = { + KVM_REG_ARM64_SVE_VLS, +}; + +static __u64 pauth_addr_regs[] = { + ARM64_SYS_REG(3, 0, 2, 1, 0), /* APIAKEYLO_EL1 */ + ARM64_SYS_REG(3, 0, 2, 1, 1), /* APIAKEYHI_EL1 */ + ARM64_SYS_REG(3, 0, 2, 1, 2), /* APIBKEYLO_EL1 */ + ARM64_SYS_REG(3, 0, 2, 1, 3), /* APIBKEYHI_EL1 */ + ARM64_SYS_REG(3, 0, 2, 2, 0), /* APDAKEYLO_EL1 */ + ARM64_SYS_REG(3, 0, 2, 2, 1), /* APDAKEYHI_EL1 */ + ARM64_SYS_REG(3, 0, 2, 2, 2), /* APDBKEYLO_EL1 */ + ARM64_SYS_REG(3, 0, 2, 2, 3) /* APDBKEYHI_EL1 */ +}; + +static __u64 pauth_generic_regs[] = { + ARM64_SYS_REG(3, 0, 2, 3, 0), /* APGAKEYLO_EL1 */ + ARM64_SYS_REG(3, 0, 2, 3, 1), /* APGAKEYHI_EL1 */ +}; + +#define BASE_SUBLIST \ + { "base", .regs = base_regs, .regs_n = ARRAY_SIZE(base_regs), } +#define VREGS_SUBLIST \ + { "vregs", .regs = vregs, .regs_n = ARRAY_SIZE(vregs), } +#define PMU_SUBLIST \ + { "pmu", .capability = KVM_CAP_ARM_PMU_V3, .feature = KVM_ARM_VCPU_PMU_V3, \ + .regs = pmu_regs, .regs_n = ARRAY_SIZE(pmu_regs), } +#define SVE_SUBLIST \ + { "sve", .capability = KVM_CAP_ARM_SVE, .feature = KVM_ARM_VCPU_SVE, .finalize = true, \ + .regs = sve_regs, .regs_n = ARRAY_SIZE(sve_regs), \ + .rejects_set = sve_rejects_set, .rejects_set_n = ARRAY_SIZE(sve_rejects_set), } +#define PAUTH_SUBLIST \ + { \ + .name = "pauth_address", \ + .capability = KVM_CAP_ARM_PTRAUTH_ADDRESS, \ + .feature = KVM_ARM_VCPU_PTRAUTH_ADDRESS, \ + .regs = pauth_addr_regs, \ + .regs_n = ARRAY_SIZE(pauth_addr_regs), \ + }, \ + { \ + .name = "pauth_generic", \ + .capability = KVM_CAP_ARM_PTRAUTH_GENERIC, \ + .feature = KVM_ARM_VCPU_PTRAUTH_GENERIC, \ + .regs = pauth_generic_regs, \ + .regs_n = ARRAY_SIZE(pauth_generic_regs), \ + } + +static struct vcpu_reg_list vregs_config = { + .sublists = { + BASE_SUBLIST, + VREGS_SUBLIST, + {0}, + }, +}; +static struct vcpu_reg_list vregs_pmu_config = { + .sublists = { + BASE_SUBLIST, + VREGS_SUBLIST, + PMU_SUBLIST, + {0}, + }, +}; +static struct vcpu_reg_list sve_config = { + .sublists = { + BASE_SUBLIST, + SVE_SUBLIST, + {0}, + }, +}; +static struct vcpu_reg_list sve_pmu_config = { + .sublists = { + BASE_SUBLIST, + SVE_SUBLIST, + PMU_SUBLIST, + {0}, + }, +}; +static struct vcpu_reg_list pauth_config = { + .sublists = { + BASE_SUBLIST, + VREGS_SUBLIST, + PAUTH_SUBLIST, + {0}, + }, +}; +static struct vcpu_reg_list pauth_pmu_config = { + .sublists = { + BASE_SUBLIST, + VREGS_SUBLIST, + PAUTH_SUBLIST, + PMU_SUBLIST, + {0}, + }, +}; + +struct vcpu_reg_list *vcpu_configs[] = { + &vregs_config, + &vregs_pmu_config, + &sve_config, + &sve_pmu_config, + &pauth_config, + &pauth_pmu_config, +}; +int vcpu_configs_n = ARRAY_SIZE(vcpu_configs); diff --git a/tools/testing/selftests/kvm/arm64/hypercalls.c b/tools/testing/selftests/kvm/arm64/hypercalls.c new file mode 100644 index 000000000000..ec54ec7726e9 --- /dev/null +++ b/tools/testing/selftests/kvm/arm64/hypercalls.c @@ -0,0 +1,308 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* hypercalls: Check the ARM64's psuedo-firmware bitmap register interface. + * + * The test validates the basic hypercall functionalities that are exposed + * via the psuedo-firmware bitmap register. This includes the registers' + * read/write behavior before and after the VM has started, and if the + * hypercalls are properly masked or unmasked to the guest when disabled or + * enabled from the KVM userspace, respectively. + */ +#include +#include +#include +#include + +#include "processor.h" + +#define FW_REG_ULIMIT_VAL(max_feat_bit) (GENMASK(max_feat_bit, 0)) + +/* Last valid bits of the bitmapped firmware registers */ +#define KVM_REG_ARM_STD_BMAP_BIT_MAX 0 +#define KVM_REG_ARM_STD_HYP_BMAP_BIT_MAX 0 +#define KVM_REG_ARM_VENDOR_HYP_BMAP_BIT_MAX 1 + +struct kvm_fw_reg_info { + uint64_t reg; /* Register definition */ + uint64_t max_feat_bit; /* Bit that represents the upper limit of the feature-map */ +}; + +#define FW_REG_INFO(r) \ + { \ + .reg = r, \ + .max_feat_bit = r##_BIT_MAX, \ + } + +static const struct kvm_fw_reg_info fw_reg_info[] = { + FW_REG_INFO(KVM_REG_ARM_STD_BMAP), + FW_REG_INFO(KVM_REG_ARM_STD_HYP_BMAP), + FW_REG_INFO(KVM_REG_ARM_VENDOR_HYP_BMAP), +}; + +enum test_stage { + TEST_STAGE_REG_IFACE, + TEST_STAGE_HVC_IFACE_FEAT_DISABLED, + TEST_STAGE_HVC_IFACE_FEAT_ENABLED, + TEST_STAGE_HVC_IFACE_FALSE_INFO, + TEST_STAGE_END, +}; + +static int stage = TEST_STAGE_REG_IFACE; + +struct test_hvc_info { + uint32_t func_id; + uint64_t arg1; +}; + +#define TEST_HVC_INFO(f, a1) \ + { \ + .func_id = f, \ + .arg1 = a1, \ + } + +static const struct test_hvc_info hvc_info[] = { + /* KVM_REG_ARM_STD_BMAP */ + TEST_HVC_INFO(ARM_SMCCC_TRNG_VERSION, 0), + TEST_HVC_INFO(ARM_SMCCC_TRNG_FEATURES, ARM_SMCCC_TRNG_RND64), + TEST_HVC_INFO(ARM_SMCCC_TRNG_GET_UUID, 0), + TEST_HVC_INFO(ARM_SMCCC_TRNG_RND32, 0), + TEST_HVC_INFO(ARM_SMCCC_TRNG_RND64, 0), + + /* KVM_REG_ARM_STD_HYP_BMAP */ + TEST_HVC_INFO(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, ARM_SMCCC_HV_PV_TIME_FEATURES), + TEST_HVC_INFO(ARM_SMCCC_HV_PV_TIME_FEATURES, ARM_SMCCC_HV_PV_TIME_ST), + TEST_HVC_INFO(ARM_SMCCC_HV_PV_TIME_ST, 0), + + /* KVM_REG_ARM_VENDOR_HYP_BMAP */ + TEST_HVC_INFO(ARM_SMCCC_VENDOR_HYP_KVM_FEATURES_FUNC_ID, + ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID), + TEST_HVC_INFO(ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID, 0), + TEST_HVC_INFO(ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID, KVM_PTP_VIRT_COUNTER), +}; + +/* Feed false hypercall info to test the KVM behavior */ +static const struct test_hvc_info false_hvc_info[] = { + /* Feature support check against a different family of hypercalls */ + TEST_HVC_INFO(ARM_SMCCC_TRNG_FEATURES, ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID), + TEST_HVC_INFO(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, ARM_SMCCC_TRNG_RND64), + TEST_HVC_INFO(ARM_SMCCC_HV_PV_TIME_FEATURES, ARM_SMCCC_TRNG_RND64), +}; + +static void guest_test_hvc(const struct test_hvc_info *hc_info) +{ + unsigned int i; + struct arm_smccc_res res; + unsigned int hvc_info_arr_sz; + + hvc_info_arr_sz = + hc_info == hvc_info ? ARRAY_SIZE(hvc_info) : ARRAY_SIZE(false_hvc_info); + + for (i = 0; i < hvc_info_arr_sz; i++, hc_info++) { + memset(&res, 0, sizeof(res)); + smccc_hvc(hc_info->func_id, hc_info->arg1, 0, 0, 0, 0, 0, 0, &res); + + switch (stage) { + case TEST_STAGE_HVC_IFACE_FEAT_DISABLED: + case TEST_STAGE_HVC_IFACE_FALSE_INFO: + __GUEST_ASSERT(res.a0 == SMCCC_RET_NOT_SUPPORTED, + "a0 = 0x%lx, func_id = 0x%x, arg1 = 0x%lx, stage = %u", + res.a0, hc_info->func_id, hc_info->arg1, stage); + break; + case TEST_STAGE_HVC_IFACE_FEAT_ENABLED: + __GUEST_ASSERT(res.a0 != SMCCC_RET_NOT_SUPPORTED, + "a0 = 0x%lx, func_id = 0x%x, arg1 = 0x%lx, stage = %u", + res.a0, hc_info->func_id, hc_info->arg1, stage); + break; + default: + GUEST_FAIL("Unexpected stage = %u", stage); + } + } +} + +static void guest_code(void) +{ + while (stage != TEST_STAGE_END) { + switch (stage) { + case TEST_STAGE_REG_IFACE: + break; + case TEST_STAGE_HVC_IFACE_FEAT_DISABLED: + case TEST_STAGE_HVC_IFACE_FEAT_ENABLED: + guest_test_hvc(hvc_info); + break; + case TEST_STAGE_HVC_IFACE_FALSE_INFO: + guest_test_hvc(false_hvc_info); + break; + default: + GUEST_FAIL("Unexpected stage = %u", stage); + } + + GUEST_SYNC(stage); + } + + GUEST_DONE(); +} + +struct st_time { + uint32_t rev; + uint32_t attr; + uint64_t st_time; +}; + +#define STEAL_TIME_SIZE ((sizeof(struct st_time) + 63) & ~63) +#define ST_GPA_BASE (1 << 30) + +static void steal_time_init(struct kvm_vcpu *vcpu) +{ + uint64_t st_ipa = (ulong)ST_GPA_BASE; + unsigned int gpages; + + gpages = vm_calc_num_guest_pages(VM_MODE_DEFAULT, STEAL_TIME_SIZE); + vm_userspace_mem_region_add(vcpu->vm, VM_MEM_SRC_ANONYMOUS, ST_GPA_BASE, 1, gpages, 0); + + vcpu_device_attr_set(vcpu, KVM_ARM_VCPU_PVTIME_CTRL, + KVM_ARM_VCPU_PVTIME_IPA, &st_ipa); +} + +static void test_fw_regs_before_vm_start(struct kvm_vcpu *vcpu) +{ + uint64_t val; + unsigned int i; + int ret; + + for (i = 0; i < ARRAY_SIZE(fw_reg_info); i++) { + const struct kvm_fw_reg_info *reg_info = &fw_reg_info[i]; + + /* First 'read' should be an upper limit of the features supported */ + val = vcpu_get_reg(vcpu, reg_info->reg); + TEST_ASSERT(val == FW_REG_ULIMIT_VAL(reg_info->max_feat_bit), + "Expected all the features to be set for reg: 0x%lx; expected: 0x%lx; read: 0x%lx", + reg_info->reg, FW_REG_ULIMIT_VAL(reg_info->max_feat_bit), val); + + /* Test a 'write' by disabling all the features of the register map */ + ret = __vcpu_set_reg(vcpu, reg_info->reg, 0); + TEST_ASSERT(ret == 0, + "Failed to clear all the features of reg: 0x%lx; ret: %d", + reg_info->reg, errno); + + val = vcpu_get_reg(vcpu, reg_info->reg); + TEST_ASSERT(val == 0, + "Expected all the features to be cleared for reg: 0x%lx", reg_info->reg); + + /* + * Test enabling a feature that's not supported. + * Avoid this check if all the bits are occupied. + */ + if (reg_info->max_feat_bit < 63) { + ret = __vcpu_set_reg(vcpu, reg_info->reg, BIT(reg_info->max_feat_bit + 1)); + TEST_ASSERT(ret != 0 && errno == EINVAL, + "Unexpected behavior or return value (%d) while setting an unsupported feature for reg: 0x%lx", + errno, reg_info->reg); + } + } +} + +static void test_fw_regs_after_vm_start(struct kvm_vcpu *vcpu) +{ + uint64_t val; + unsigned int i; + int ret; + + for (i = 0; i < ARRAY_SIZE(fw_reg_info); i++) { + const struct kvm_fw_reg_info *reg_info = &fw_reg_info[i]; + + /* + * Before starting the VM, the test clears all the bits. + * Check if that's still the case. + */ + val = vcpu_get_reg(vcpu, reg_info->reg); + TEST_ASSERT(val == 0, + "Expected all the features to be cleared for reg: 0x%lx", + reg_info->reg); + + /* + * Since the VM has run at least once, KVM shouldn't allow modification of + * the registers and should return EBUSY. Set the registers and check for + * the expected errno. + */ + ret = __vcpu_set_reg(vcpu, reg_info->reg, FW_REG_ULIMIT_VAL(reg_info->max_feat_bit)); + TEST_ASSERT(ret != 0 && errno == EBUSY, + "Unexpected behavior or return value (%d) while setting a feature while VM is running for reg: 0x%lx", + errno, reg_info->reg); + } +} + +static struct kvm_vm *test_vm_create(struct kvm_vcpu **vcpu) +{ + struct kvm_vm *vm; + + vm = vm_create_with_one_vcpu(vcpu, guest_code); + + steal_time_init(*vcpu); + + return vm; +} + +static void test_guest_stage(struct kvm_vm **vm, struct kvm_vcpu **vcpu) +{ + int prev_stage = stage; + + pr_debug("Stage: %d\n", prev_stage); + + /* Sync the stage early, the VM might be freed below. */ + stage++; + sync_global_to_guest(*vm, stage); + + switch (prev_stage) { + case TEST_STAGE_REG_IFACE: + test_fw_regs_after_vm_start(*vcpu); + break; + case TEST_STAGE_HVC_IFACE_FEAT_DISABLED: + /* Start a new VM so that all the features are now enabled by default */ + kvm_vm_free(*vm); + *vm = test_vm_create(vcpu); + break; + case TEST_STAGE_HVC_IFACE_FEAT_ENABLED: + case TEST_STAGE_HVC_IFACE_FALSE_INFO: + break; + default: + TEST_FAIL("Unknown test stage: %d", prev_stage); + } +} + +static void test_run(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + struct ucall uc; + bool guest_done = false; + + vm = test_vm_create(&vcpu); + + test_fw_regs_before_vm_start(vcpu); + + while (!guest_done) { + vcpu_run(vcpu); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_SYNC: + test_guest_stage(&vm, &vcpu); + break; + case UCALL_DONE: + guest_done = true; + break; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + default: + TEST_FAIL("Unexpected guest exit"); + } + } + + kvm_vm_free(vm); +} + +int main(void) +{ + test_run(); + return 0; +} diff --git a/tools/testing/selftests/kvm/arm64/mmio_abort.c b/tools/testing/selftests/kvm/arm64/mmio_abort.c new file mode 100644 index 000000000000..8b7a80a51b1c --- /dev/null +++ b/tools/testing/selftests/kvm/arm64/mmio_abort.c @@ -0,0 +1,159 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * mmio_abort - Tests for userspace MMIO abort injection + * + * Copyright (c) 2024 Google LLC + */ +#include "processor.h" +#include "test_util.h" + +#define MMIO_ADDR 0x8000000ULL + +static u64 expected_abort_pc; + +static void expect_sea_handler(struct ex_regs *regs) +{ + u64 esr = read_sysreg(esr_el1); + + GUEST_ASSERT_EQ(regs->pc, expected_abort_pc); + GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_CUR); + GUEST_ASSERT_EQ(esr & ESR_ELx_FSC_TYPE, ESR_ELx_FSC_EXTABT); + + GUEST_DONE(); +} + +static void unexpected_dabt_handler(struct ex_regs *regs) +{ + GUEST_FAIL("Unexpected data abort at PC: %lx\n", regs->pc); +} + +static struct kvm_vm *vm_create_with_dabt_handler(struct kvm_vcpu **vcpu, void *guest_code, + handler_fn dabt_handler) +{ + struct kvm_vm *vm = vm_create_with_one_vcpu(vcpu, guest_code); + + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(*vcpu); + vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, ESR_ELx_EC_DABT_CUR, dabt_handler); + + virt_map(vm, MMIO_ADDR, MMIO_ADDR, 1); + + return vm; +} + +static void vcpu_inject_extabt(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_events events = {}; + + events.exception.ext_dabt_pending = true; + vcpu_events_set(vcpu, &events); +} + +static void vcpu_run_expect_done(struct kvm_vcpu *vcpu) +{ + struct ucall uc; + + vcpu_run(vcpu); + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + case UCALL_DONE: + break; + default: + TEST_FAIL("Unexpected ucall: %lu", uc.cmd); + } +} + +extern char test_mmio_abort_insn; + +static void test_mmio_abort_guest(void) +{ + WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_abort_insn); + + asm volatile("test_mmio_abort_insn:\n\t" + "ldr x0, [%0]\n\t" + : : "r" (MMIO_ADDR) : "x0", "memory"); + + GUEST_FAIL("MMIO instruction should not retire"); +} + +/* + * Test that KVM doesn't complete MMIO emulation when userspace has made an + * external abort pending for the instruction. + */ +static void test_mmio_abort(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_abort_guest, + expect_sea_handler); + struct kvm_run *run = vcpu->run; + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_MMIO); + TEST_ASSERT_EQ(run->mmio.phys_addr, MMIO_ADDR); + TEST_ASSERT_EQ(run->mmio.len, sizeof(unsigned long)); + TEST_ASSERT(!run->mmio.is_write, "Expected MMIO read"); + + vcpu_inject_extabt(vcpu); + vcpu_run_expect_done(vcpu); + kvm_vm_free(vm); +} + +extern char test_mmio_nisv_insn; + +static void test_mmio_nisv_guest(void) +{ + WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_nisv_insn); + + asm volatile("test_mmio_nisv_insn:\n\t" + "ldr x0, [%0], #8\n\t" + : : "r" (MMIO_ADDR) : "x0", "memory"); + + GUEST_FAIL("MMIO instruction should not retire"); +} + +/* + * Test that the KVM_RUN ioctl fails for ESR_EL2.ISV=0 MMIO aborts if userspace + * hasn't enabled KVM_CAP_ARM_NISV_TO_USER. + */ +static void test_mmio_nisv(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest, + unexpected_dabt_handler); + + TEST_ASSERT(_vcpu_run(vcpu), "Expected nonzero return code from KVM_RUN"); + TEST_ASSERT_EQ(errno, ENOSYS); + + kvm_vm_free(vm); +} + +/* + * Test that ESR_EL2.ISV=0 MMIO aborts reach userspace and that an injected SEA + * reaches the guest. + */ +static void test_mmio_nisv_abort(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest, + expect_sea_handler); + struct kvm_run *run = vcpu->run; + + vm_enable_cap(vm, KVM_CAP_ARM_NISV_TO_USER, 1); + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_ARM_NISV); + TEST_ASSERT_EQ(run->arm_nisv.fault_ipa, MMIO_ADDR); + + vcpu_inject_extabt(vcpu); + vcpu_run_expect_done(vcpu); + kvm_vm_free(vm); +} + +int main(void) +{ + test_mmio_abort(); + test_mmio_nisv(); + test_mmio_nisv_abort(); +} diff --git a/tools/testing/selftests/kvm/arm64/no-vgic-v3.c b/tools/testing/selftests/kvm/arm64/no-vgic-v3.c new file mode 100644 index 000000000000..ebd70430c89d --- /dev/null +++ b/tools/testing/selftests/kvm/arm64/no-vgic-v3.c @@ -0,0 +1,175 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Check that, on a GICv3 system, not configuring GICv3 correctly +// results in all of the sysregs generating an UNDEF exception. + +#include +#include +#include + +static volatile bool handled; + +#define __check_sr_read(r) \ + ({ \ + uint64_t val; \ + \ + handled = false; \ + dsb(sy); \ + val = read_sysreg_s(SYS_ ## r); \ + val; \ + }) + +#define __check_sr_write(r) \ + do { \ + handled = false; \ + dsb(sy); \ + write_sysreg_s(0, SYS_ ## r); \ + isb(); \ + } while(0) + +/* Fatal checks */ +#define check_sr_read(r) \ + do { \ + __check_sr_read(r); \ + __GUEST_ASSERT(handled, #r " no read trap"); \ + } while(0) + +#define check_sr_write(r) \ + do { \ + __check_sr_write(r); \ + __GUEST_ASSERT(handled, #r " no write trap"); \ + } while(0) + +#define check_sr_rw(r) \ + do { \ + check_sr_read(r); \ + check_sr_write(r); \ + } while(0) + +static void guest_code(void) +{ + uint64_t val; + + /* + * Check that we advertise that ID_AA64PFR0_EL1.GIC == 0, having + * hidden the feature at runtime without any other userspace action. + */ + __GUEST_ASSERT(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC), + read_sysreg(id_aa64pfr0_el1)) == 0, + "GICv3 wrongly advertised"); + + /* + * Access all GICv3 registers, and fail if we don't get an UNDEF. + * Note that we happily access all the APxRn registers without + * checking their existance, as all we want to see is a failure. + */ + check_sr_rw(ICC_PMR_EL1); + check_sr_read(ICC_IAR0_EL1); + check_sr_write(ICC_EOIR0_EL1); + check_sr_rw(ICC_HPPIR0_EL1); + check_sr_rw(ICC_BPR0_EL1); + check_sr_rw(ICC_AP0R0_EL1); + check_sr_rw(ICC_AP0R1_EL1); + check_sr_rw(ICC_AP0R2_EL1); + check_sr_rw(ICC_AP0R3_EL1); + check_sr_rw(ICC_AP1R0_EL1); + check_sr_rw(ICC_AP1R1_EL1); + check_sr_rw(ICC_AP1R2_EL1); + check_sr_rw(ICC_AP1R3_EL1); + check_sr_write(ICC_DIR_EL1); + check_sr_read(ICC_RPR_EL1); + check_sr_write(ICC_SGI1R_EL1); + check_sr_write(ICC_ASGI1R_EL1); + check_sr_write(ICC_SGI0R_EL1); + check_sr_read(ICC_IAR1_EL1); + check_sr_write(ICC_EOIR1_EL1); + check_sr_rw(ICC_HPPIR1_EL1); + check_sr_rw(ICC_BPR1_EL1); + check_sr_rw(ICC_CTLR_EL1); + check_sr_rw(ICC_IGRPEN0_EL1); + check_sr_rw(ICC_IGRPEN1_EL1); + + /* + * ICC_SRE_EL1 may not be trappable, as ICC_SRE_EL2.Enable can + * be RAO/WI. Engage in non-fatal accesses, starting with a + * write of 0 to try and disable SRE, and let's see if it + * sticks. + */ + __check_sr_write(ICC_SRE_EL1); + if (!handled) + GUEST_PRINTF("ICC_SRE_EL1 write not trapping (OK)\n"); + + val = __check_sr_read(ICC_SRE_EL1); + if (!handled) { + __GUEST_ASSERT((val & BIT(0)), + "ICC_SRE_EL1 not trapped but ICC_SRE_EL1.SRE not set\n"); + GUEST_PRINTF("ICC_SRE_EL1 read not trapping (OK)\n"); + } + + GUEST_DONE(); +} + +static void guest_undef_handler(struct ex_regs *regs) +{ + /* Success, we've gracefully exploded! */ + handled = true; + regs->pc += 4; +} + +static void test_run_vcpu(struct kvm_vcpu *vcpu) +{ + struct ucall uc; + + do { + vcpu_run(vcpu); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + case UCALL_PRINTF: + printf("%s", uc.buffer); + break; + case UCALL_DONE: + break; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + } while (uc.cmd != UCALL_DONE); +} + +static void test_guest_no_gicv3(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + /* Create a VM without a GICv3 */ + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vcpu); + + vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, + ESR_ELx_EC_UNKNOWN, guest_undef_handler); + + test_run_vcpu(vcpu); + + kvm_vm_free(vm); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + uint64_t pfr0; + + vm = vm_create_with_one_vcpu(&vcpu, NULL); + pfr0 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1)); + __TEST_REQUIRE(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC), pfr0), + "GICv3 not supported."); + kvm_vm_free(vm); + + test_guest_no_gicv3(); + + return 0; +} diff --git a/tools/testing/selftests/kvm/arm64/page_fault_test.c b/tools/testing/selftests/kvm/arm64/page_fault_test.c new file mode 100644 index 000000000000..ec33a8f9c908 --- /dev/null +++ b/tools/testing/selftests/kvm/arm64/page_fault_test.c @@ -0,0 +1,1135 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * page_fault_test.c - Test stage 2 faults. + * + * This test tries different combinations of guest accesses (e.g., write, + * S1PTW), backing source type (e.g., anon) and types of faults (e.g., read on + * hugetlbfs with a hole). It checks that the expected handling method is + * called (e.g., uffd faults with the right address and write/read flag). + */ +#include +#include +#include +#include +#include +#include +#include +#include "guest_modes.h" +#include "userfaultfd_util.h" + +/* Guest virtual addresses that point to the test page and its PTE. */ +#define TEST_GVA 0xc0000000 +#define TEST_EXEC_GVA (TEST_GVA + 0x8) +#define TEST_PTE_GVA 0xb0000000 +#define TEST_DATA 0x0123456789ABCDEF + +static uint64_t *guest_test_memory = (uint64_t *)TEST_GVA; + +#define CMD_NONE (0) +#define CMD_SKIP_TEST (1ULL << 1) +#define CMD_HOLE_PT (1ULL << 2) +#define CMD_HOLE_DATA (1ULL << 3) +#define CMD_CHECK_WRITE_IN_DIRTY_LOG (1ULL << 4) +#define CMD_CHECK_S1PTW_WR_IN_DIRTY_LOG (1ULL << 5) +#define CMD_CHECK_NO_WRITE_IN_DIRTY_LOG (1ULL << 6) +#define CMD_CHECK_NO_S1PTW_WR_IN_DIRTY_LOG (1ULL << 7) +#define CMD_SET_PTE_AF (1ULL << 8) + +#define PREPARE_FN_NR 10 +#define CHECK_FN_NR 10 + +static struct event_cnt { + int mmio_exits; + int fail_vcpu_runs; + int uffd_faults; + /* uffd_faults is incremented from multiple threads. */ + pthread_mutex_t uffd_faults_mutex; +} events; + +struct test_desc { + const char *name; + uint64_t mem_mark_cmd; + /* Skip the test if any prepare function returns false */ + bool (*guest_prepare[PREPARE_FN_NR])(void); + void (*guest_test)(void); + void (*guest_test_check[CHECK_FN_NR])(void); + uffd_handler_t uffd_pt_handler; + uffd_handler_t uffd_data_handler; + void (*dabt_handler)(struct ex_regs *regs); + void (*iabt_handler)(struct ex_regs *regs); + void (*mmio_handler)(struct kvm_vm *vm, struct kvm_run *run); + void (*fail_vcpu_run_handler)(int ret); + uint32_t pt_memslot_flags; + uint32_t data_memslot_flags; + bool skip; + struct event_cnt expected_events; +}; + +struct test_params { + enum vm_mem_backing_src_type src_type; + struct test_desc *test_desc; +}; + +static inline void flush_tlb_page(uint64_t vaddr) +{ + uint64_t page = vaddr >> 12; + + dsb(ishst); + asm volatile("tlbi vaae1is, %0" :: "r" (page)); + dsb(ish); + isb(); +} + +static void guest_write64(void) +{ + uint64_t val; + + WRITE_ONCE(*guest_test_memory, TEST_DATA); + val = READ_ONCE(*guest_test_memory); + GUEST_ASSERT_EQ(val, TEST_DATA); +} + +/* Check the system for atomic instructions. */ +static bool guest_check_lse(void) +{ + uint64_t isar0 = read_sysreg(id_aa64isar0_el1); + uint64_t atomic; + + atomic = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_ATOMIC), isar0); + return atomic >= 2; +} + +static bool guest_check_dc_zva(void) +{ + uint64_t dczid = read_sysreg(dczid_el0); + uint64_t dzp = FIELD_GET(ARM64_FEATURE_MASK(DCZID_EL0_DZP), dczid); + + return dzp == 0; +} + +/* Compare and swap instruction. */ +static void guest_cas(void) +{ + uint64_t val; + + GUEST_ASSERT(guest_check_lse()); + asm volatile(".arch_extension lse\n" + "casal %0, %1, [%2]\n" + :: "r" (0ul), "r" (TEST_DATA), "r" (guest_test_memory)); + val = READ_ONCE(*guest_test_memory); + GUEST_ASSERT_EQ(val, TEST_DATA); +} + +static void guest_read64(void) +{ + uint64_t val; + + val = READ_ONCE(*guest_test_memory); + GUEST_ASSERT_EQ(val, 0); +} + +/* Address translation instruction */ +static void guest_at(void) +{ + uint64_t par; + + asm volatile("at s1e1r, %0" :: "r" (guest_test_memory)); + isb(); + par = read_sysreg(par_el1); + + /* Bit 1 indicates whether the AT was successful */ + GUEST_ASSERT_EQ(par & 1, 0); +} + +/* + * The size of the block written by "dc zva" is guaranteed to be between (2 << + * 0) and (2 << 9), which is safe in our case as we need the write to happen + * for at least a word, and not more than a page. + */ +static void guest_dc_zva(void) +{ + uint16_t val; + + asm volatile("dc zva, %0" :: "r" (guest_test_memory)); + dsb(ish); + val = READ_ONCE(*guest_test_memory); + GUEST_ASSERT_EQ(val, 0); +} + +/* + * Pre-indexing loads and stores don't have a valid syndrome (ESR_EL2.ISV==0). + * And that's special because KVM must take special care with those: they + * should still count as accesses for dirty logging or user-faulting, but + * should be handled differently on mmio. + */ +static void guest_ld_preidx(void) +{ + uint64_t val; + uint64_t addr = TEST_GVA - 8; + + /* + * This ends up accessing "TEST_GVA + 8 - 8", where "TEST_GVA - 8" is + * in a gap between memslots not backing by anything. + */ + asm volatile("ldr %0, [%1, #8]!" + : "=r" (val), "+r" (addr)); + GUEST_ASSERT_EQ(val, 0); + GUEST_ASSERT_EQ(addr, TEST_GVA); +} + +static void guest_st_preidx(void) +{ + uint64_t val = TEST_DATA; + uint64_t addr = TEST_GVA - 8; + + asm volatile("str %0, [%1, #8]!" + : "+r" (val), "+r" (addr)); + + GUEST_ASSERT_EQ(addr, TEST_GVA); + val = READ_ONCE(*guest_test_memory); +} + +static bool guest_set_ha(void) +{ + uint64_t mmfr1 = read_sysreg(id_aa64mmfr1_el1); + uint64_t hadbs, tcr; + + /* Skip if HA is not supported. */ + hadbs = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HAFDBS), mmfr1); + if (hadbs == 0) + return false; + + tcr = read_sysreg(tcr_el1) | TCR_EL1_HA; + write_sysreg(tcr, tcr_el1); + isb(); + + return true; +} + +static bool guest_clear_pte_af(void) +{ + *((uint64_t *)TEST_PTE_GVA) &= ~PTE_AF; + flush_tlb_page(TEST_GVA); + + return true; +} + +static void guest_check_pte_af(void) +{ + dsb(ish); + GUEST_ASSERT_EQ(*((uint64_t *)TEST_PTE_GVA) & PTE_AF, PTE_AF); +} + +static void guest_check_write_in_dirty_log(void) +{ + GUEST_SYNC(CMD_CHECK_WRITE_IN_DIRTY_LOG); +} + +static void guest_check_no_write_in_dirty_log(void) +{ + GUEST_SYNC(CMD_CHECK_NO_WRITE_IN_DIRTY_LOG); +} + +static void guest_check_s1ptw_wr_in_dirty_log(void) +{ + GUEST_SYNC(CMD_CHECK_S1PTW_WR_IN_DIRTY_LOG); +} + +static void guest_check_no_s1ptw_wr_in_dirty_log(void) +{ + GUEST_SYNC(CMD_CHECK_NO_S1PTW_WR_IN_DIRTY_LOG); +} + +static void guest_exec(void) +{ + int (*code)(void) = (int (*)(void))TEST_EXEC_GVA; + int ret; + + ret = code(); + GUEST_ASSERT_EQ(ret, 0x77); +} + +static bool guest_prepare(struct test_desc *test) +{ + bool (*prepare_fn)(void); + int i; + + for (i = 0; i < PREPARE_FN_NR; i++) { + prepare_fn = test->guest_prepare[i]; + if (prepare_fn && !prepare_fn()) + return false; + } + + return true; +} + +static void guest_test_check(struct test_desc *test) +{ + void (*check_fn)(void); + int i; + + for (i = 0; i < CHECK_FN_NR; i++) { + check_fn = test->guest_test_check[i]; + if (check_fn) + check_fn(); + } +} + +static void guest_code(struct test_desc *test) +{ + if (!guest_prepare(test)) + GUEST_SYNC(CMD_SKIP_TEST); + + GUEST_SYNC(test->mem_mark_cmd); + + if (test->guest_test) + test->guest_test(); + + guest_test_check(test); + GUEST_DONE(); +} + +static void no_dabt_handler(struct ex_regs *regs) +{ + GUEST_FAIL("Unexpected dabt, far_el1 = 0x%lx", read_sysreg(far_el1)); +} + +static void no_iabt_handler(struct ex_regs *regs) +{ + GUEST_FAIL("Unexpected iabt, pc = 0x%lx", regs->pc); +} + +static struct uffd_args { + char *copy; + void *hva; + uint64_t paging_size; +} pt_args, data_args; + +/* Returns true to continue the test, and false if it should be skipped. */ +static int uffd_generic_handler(int uffd_mode, int uffd, struct uffd_msg *msg, + struct uffd_args *args) +{ + uint64_t addr = msg->arg.pagefault.address; + uint64_t flags = msg->arg.pagefault.flags; + struct uffdio_copy copy; + int ret; + + TEST_ASSERT(uffd_mode == UFFDIO_REGISTER_MODE_MISSING, + "The only expected UFFD mode is MISSING"); + TEST_ASSERT_EQ(addr, (uint64_t)args->hva); + + pr_debug("uffd fault: addr=%p write=%d\n", + (void *)addr, !!(flags & UFFD_PAGEFAULT_FLAG_WRITE)); + + copy.src = (uint64_t)args->copy; + copy.dst = addr; + copy.len = args->paging_size; + copy.mode = 0; + + ret = ioctl(uffd, UFFDIO_COPY, ©); + if (ret == -1) { + pr_info("Failed UFFDIO_COPY in 0x%lx with errno: %d\n", + addr, errno); + return ret; + } + + pthread_mutex_lock(&events.uffd_faults_mutex); + events.uffd_faults += 1; + pthread_mutex_unlock(&events.uffd_faults_mutex); + return 0; +} + +static int uffd_pt_handler(int mode, int uffd, struct uffd_msg *msg) +{ + return uffd_generic_handler(mode, uffd, msg, &pt_args); +} + +static int uffd_data_handler(int mode, int uffd, struct uffd_msg *msg) +{ + return uffd_generic_handler(mode, uffd, msg, &data_args); +} + +static void setup_uffd_args(struct userspace_mem_region *region, + struct uffd_args *args) +{ + args->hva = (void *)region->region.userspace_addr; + args->paging_size = region->region.memory_size; + + args->copy = malloc(args->paging_size); + TEST_ASSERT(args->copy, "Failed to allocate data copy."); + memcpy(args->copy, args->hva, args->paging_size); +} + +static void setup_uffd(struct kvm_vm *vm, struct test_params *p, + struct uffd_desc **pt_uffd, struct uffd_desc **data_uffd) +{ + struct test_desc *test = p->test_desc; + int uffd_mode = UFFDIO_REGISTER_MODE_MISSING; + + setup_uffd_args(vm_get_mem_region(vm, MEM_REGION_PT), &pt_args); + setup_uffd_args(vm_get_mem_region(vm, MEM_REGION_TEST_DATA), &data_args); + + *pt_uffd = NULL; + if (test->uffd_pt_handler) + *pt_uffd = uffd_setup_demand_paging(uffd_mode, 0, + pt_args.hva, + pt_args.paging_size, + 1, test->uffd_pt_handler); + + *data_uffd = NULL; + if (test->uffd_data_handler) + *data_uffd = uffd_setup_demand_paging(uffd_mode, 0, + data_args.hva, + data_args.paging_size, + 1, test->uffd_data_handler); +} + +static void free_uffd(struct test_desc *test, struct uffd_desc *pt_uffd, + struct uffd_desc *data_uffd) +{ + if (test->uffd_pt_handler) + uffd_stop_demand_paging(pt_uffd); + if (test->uffd_data_handler) + uffd_stop_demand_paging(data_uffd); + + free(pt_args.copy); + free(data_args.copy); +} + +static int uffd_no_handler(int mode, int uffd, struct uffd_msg *msg) +{ + TEST_FAIL("There was no UFFD fault expected."); + return -1; +} + +/* Returns false if the test should be skipped. */ +static bool punch_hole_in_backing_store(struct kvm_vm *vm, + struct userspace_mem_region *region) +{ + void *hva = (void *)region->region.userspace_addr; + uint64_t paging_size = region->region.memory_size; + int ret, fd = region->fd; + + if (fd != -1) { + ret = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, + 0, paging_size); + TEST_ASSERT(ret == 0, "fallocate failed"); + } else { + ret = madvise(hva, paging_size, MADV_DONTNEED); + TEST_ASSERT(ret == 0, "madvise failed"); + } + + return true; +} + +static void mmio_on_test_gpa_handler(struct kvm_vm *vm, struct kvm_run *run) +{ + struct userspace_mem_region *region; + void *hva; + + region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA); + hva = (void *)region->region.userspace_addr; + + TEST_ASSERT_EQ(run->mmio.phys_addr, region->region.guest_phys_addr); + + memcpy(hva, run->mmio.data, run->mmio.len); + events.mmio_exits += 1; +} + +static void mmio_no_handler(struct kvm_vm *vm, struct kvm_run *run) +{ + uint64_t data; + + memcpy(&data, run->mmio.data, sizeof(data)); + pr_debug("addr=%lld len=%d w=%d data=%lx\n", + run->mmio.phys_addr, run->mmio.len, + run->mmio.is_write, data); + TEST_FAIL("There was no MMIO exit expected."); +} + +static bool check_write_in_dirty_log(struct kvm_vm *vm, + struct userspace_mem_region *region, + uint64_t host_pg_nr) +{ + unsigned long *bmap; + bool first_page_dirty; + uint64_t size = region->region.memory_size; + + /* getpage_size() is not always equal to vm->page_size */ + bmap = bitmap_zalloc(size / getpagesize()); + kvm_vm_get_dirty_log(vm, region->region.slot, bmap); + first_page_dirty = test_bit(host_pg_nr, bmap); + free(bmap); + return first_page_dirty; +} + +/* Returns true to continue the test, and false if it should be skipped. */ +static bool handle_cmd(struct kvm_vm *vm, int cmd) +{ + struct userspace_mem_region *data_region, *pt_region; + bool continue_test = true; + uint64_t pte_gpa, pte_pg; + + data_region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA); + pt_region = vm_get_mem_region(vm, MEM_REGION_PT); + pte_gpa = addr_hva2gpa(vm, virt_get_pte_hva(vm, TEST_GVA)); + pte_pg = (pte_gpa - pt_region->region.guest_phys_addr) / getpagesize(); + + if (cmd == CMD_SKIP_TEST) + continue_test = false; + + if (cmd & CMD_HOLE_PT) + continue_test = punch_hole_in_backing_store(vm, pt_region); + if (cmd & CMD_HOLE_DATA) + continue_test = punch_hole_in_backing_store(vm, data_region); + if (cmd & CMD_CHECK_WRITE_IN_DIRTY_LOG) + TEST_ASSERT(check_write_in_dirty_log(vm, data_region, 0), + "Missing write in dirty log"); + if (cmd & CMD_CHECK_S1PTW_WR_IN_DIRTY_LOG) + TEST_ASSERT(check_write_in_dirty_log(vm, pt_region, pte_pg), + "Missing s1ptw write in dirty log"); + if (cmd & CMD_CHECK_NO_WRITE_IN_DIRTY_LOG) + TEST_ASSERT(!check_write_in_dirty_log(vm, data_region, 0), + "Unexpected write in dirty log"); + if (cmd & CMD_CHECK_NO_S1PTW_WR_IN_DIRTY_LOG) + TEST_ASSERT(!check_write_in_dirty_log(vm, pt_region, pte_pg), + "Unexpected s1ptw write in dirty log"); + + return continue_test; +} + +void fail_vcpu_run_no_handler(int ret) +{ + TEST_FAIL("Unexpected vcpu run failure"); +} + +void fail_vcpu_run_mmio_no_syndrome_handler(int ret) +{ + TEST_ASSERT(errno == ENOSYS, + "The mmio handler should have returned not implemented."); + events.fail_vcpu_runs += 1; +} + +typedef uint32_t aarch64_insn_t; +extern aarch64_insn_t __exec_test[2]; + +noinline void __return_0x77(void) +{ + asm volatile("__exec_test: mov x0, #0x77\n" + "ret\n"); +} + +/* + * Note that this function runs on the host before the test VM starts: there's + * no need to sync the D$ and I$ caches. + */ +static void load_exec_code_for_test(struct kvm_vm *vm) +{ + uint64_t *code; + struct userspace_mem_region *region; + void *hva; + + region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA); + hva = (void *)region->region.userspace_addr; + + assert(TEST_EXEC_GVA > TEST_GVA); + code = hva + TEST_EXEC_GVA - TEST_GVA; + memcpy(code, __exec_test, sizeof(__exec_test)); +} + +static void setup_abort_handlers(struct kvm_vm *vm, struct kvm_vcpu *vcpu, + struct test_desc *test) +{ + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vcpu); + + vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, + ESR_ELx_EC_DABT_CUR, no_dabt_handler); + vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, + ESR_ELx_EC_IABT_CUR, no_iabt_handler); +} + +static void setup_gva_maps(struct kvm_vm *vm) +{ + struct userspace_mem_region *region; + uint64_t pte_gpa; + + region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA); + /* Map TEST_GVA first. This will install a new PTE. */ + virt_pg_map(vm, TEST_GVA, region->region.guest_phys_addr); + /* Then map TEST_PTE_GVA to the above PTE. */ + pte_gpa = addr_hva2gpa(vm, virt_get_pte_hva(vm, TEST_GVA)); + virt_pg_map(vm, TEST_PTE_GVA, pte_gpa); +} + +enum pf_test_memslots { + CODE_AND_DATA_MEMSLOT, + PAGE_TABLE_MEMSLOT, + TEST_DATA_MEMSLOT, +}; + +/* + * Create a memslot for code and data at pfn=0, and test-data and PT ones + * at max_gfn. + */ +static void setup_memslots(struct kvm_vm *vm, struct test_params *p) +{ + uint64_t backing_src_pagesz = get_backing_src_pagesz(p->src_type); + uint64_t guest_page_size = vm->page_size; + uint64_t max_gfn = vm_compute_max_gfn(vm); + /* Enough for 2M of code when using 4K guest pages. */ + uint64_t code_npages = 512; + uint64_t pt_size, data_size, data_gpa; + + /* + * This test requires 1 pgd, 2 pud, 4 pmd, and 6 pte pages when using + * VM_MODE_P48V48_4K. Note that the .text takes ~1.6MBs. That's 13 + * pages. VM_MODE_P48V48_4K is the mode with most PT pages; let's use + * twice that just in case. + */ + pt_size = 26 * guest_page_size; + + /* memslot sizes and gpa's must be aligned to the backing page size */ + pt_size = align_up(pt_size, backing_src_pagesz); + data_size = align_up(guest_page_size, backing_src_pagesz); + data_gpa = (max_gfn * guest_page_size) - data_size; + data_gpa = align_down(data_gpa, backing_src_pagesz); + + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, + CODE_AND_DATA_MEMSLOT, code_npages, 0); + vm->memslots[MEM_REGION_CODE] = CODE_AND_DATA_MEMSLOT; + vm->memslots[MEM_REGION_DATA] = CODE_AND_DATA_MEMSLOT; + + vm_userspace_mem_region_add(vm, p->src_type, data_gpa - pt_size, + PAGE_TABLE_MEMSLOT, pt_size / guest_page_size, + p->test_desc->pt_memslot_flags); + vm->memslots[MEM_REGION_PT] = PAGE_TABLE_MEMSLOT; + + vm_userspace_mem_region_add(vm, p->src_type, data_gpa, TEST_DATA_MEMSLOT, + data_size / guest_page_size, + p->test_desc->data_memslot_flags); + vm->memslots[MEM_REGION_TEST_DATA] = TEST_DATA_MEMSLOT; +} + +static void setup_ucall(struct kvm_vm *vm) +{ + struct userspace_mem_region *region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA); + + ucall_init(vm, region->region.guest_phys_addr + region->region.memory_size); +} + +static void setup_default_handlers(struct test_desc *test) +{ + if (!test->mmio_handler) + test->mmio_handler = mmio_no_handler; + + if (!test->fail_vcpu_run_handler) + test->fail_vcpu_run_handler = fail_vcpu_run_no_handler; +} + +static void check_event_counts(struct test_desc *test) +{ + TEST_ASSERT_EQ(test->expected_events.uffd_faults, events.uffd_faults); + TEST_ASSERT_EQ(test->expected_events.mmio_exits, events.mmio_exits); + TEST_ASSERT_EQ(test->expected_events.fail_vcpu_runs, events.fail_vcpu_runs); +} + +static void print_test_banner(enum vm_guest_mode mode, struct test_params *p) +{ + struct test_desc *test = p->test_desc; + + pr_debug("Test: %s\n", test->name); + pr_debug("Testing guest mode: %s\n", vm_guest_mode_string(mode)); + pr_debug("Testing memory backing src type: %s\n", + vm_mem_backing_src_alias(p->src_type)->name); +} + +static void reset_event_counts(void) +{ + memset(&events, 0, sizeof(events)); +} + +/* + * This function either succeeds, skips the test (after setting test->skip), or + * fails with a TEST_FAIL that aborts all tests. + */ +static void vcpu_run_loop(struct kvm_vm *vm, struct kvm_vcpu *vcpu, + struct test_desc *test) +{ + struct kvm_run *run; + struct ucall uc; + int ret; + + run = vcpu->run; + + for (;;) { + ret = _vcpu_run(vcpu); + if (ret) { + test->fail_vcpu_run_handler(ret); + goto done; + } + + switch (get_ucall(vcpu, &uc)) { + case UCALL_SYNC: + if (!handle_cmd(vm, uc.args[1])) { + test->skip = true; + goto done; + } + break; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + case UCALL_DONE: + goto done; + case UCALL_NONE: + if (run->exit_reason == KVM_EXIT_MMIO) + test->mmio_handler(vm, run); + break; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + } + +done: + pr_debug(test->skip ? "Skipped.\n" : "Done.\n"); +} + +static void run_test(enum vm_guest_mode mode, void *arg) +{ + struct test_params *p = (struct test_params *)arg; + struct test_desc *test = p->test_desc; + struct kvm_vm *vm; + struct kvm_vcpu *vcpu; + struct uffd_desc *pt_uffd, *data_uffd; + + print_test_banner(mode, p); + + vm = ____vm_create(VM_SHAPE(mode)); + setup_memslots(vm, p); + kvm_vm_elf_load(vm, program_invocation_name); + setup_ucall(vm); + vcpu = vm_vcpu_add(vm, 0, guest_code); + + setup_gva_maps(vm); + + reset_event_counts(); + + /* + * Set some code in the data memslot for the guest to execute (only + * applicable to the EXEC tests). This has to be done before + * setup_uffd() as that function copies the memslot data for the uffd + * handler. + */ + load_exec_code_for_test(vm); + setup_uffd(vm, p, &pt_uffd, &data_uffd); + setup_abort_handlers(vm, vcpu, test); + setup_default_handlers(test); + vcpu_args_set(vcpu, 1, test); + + vcpu_run_loop(vm, vcpu, test); + + kvm_vm_free(vm); + free_uffd(test, pt_uffd, data_uffd); + + /* + * Make sure we check the events after the uffd threads have exited, + * which means they updated their respective event counters. + */ + if (!test->skip) + check_event_counts(test); +} + +static void help(char *name) +{ + puts(""); + printf("usage: %s [-h] [-s mem-type]\n", name); + puts(""); + guest_modes_help(); + backing_src_help("-s"); + puts(""); +} + +#define SNAME(s) #s +#define SCAT2(a, b) SNAME(a ## _ ## b) +#define SCAT3(a, b, c) SCAT2(a, SCAT2(b, c)) +#define SCAT4(a, b, c, d) SCAT2(a, SCAT3(b, c, d)) + +#define _CHECK(_test) _CHECK_##_test +#define _PREPARE(_test) _PREPARE_##_test +#define _PREPARE_guest_read64 NULL +#define _PREPARE_guest_ld_preidx NULL +#define _PREPARE_guest_write64 NULL +#define _PREPARE_guest_st_preidx NULL +#define _PREPARE_guest_exec NULL +#define _PREPARE_guest_at NULL +#define _PREPARE_guest_dc_zva guest_check_dc_zva +#define _PREPARE_guest_cas guest_check_lse + +/* With or without access flag checks */ +#define _PREPARE_with_af guest_set_ha, guest_clear_pte_af +#define _PREPARE_no_af NULL +#define _CHECK_with_af guest_check_pte_af +#define _CHECK_no_af NULL + +/* Performs an access and checks that no faults were triggered. */ +#define TEST_ACCESS(_access, _with_af, _mark_cmd) \ +{ \ + .name = SCAT3(_access, _with_af, #_mark_cmd), \ + .guest_prepare = { _PREPARE(_with_af), \ + _PREPARE(_access) }, \ + .mem_mark_cmd = _mark_cmd, \ + .guest_test = _access, \ + .guest_test_check = { _CHECK(_with_af) }, \ + .expected_events = { 0 }, \ +} + +#define TEST_UFFD(_access, _with_af, _mark_cmd, \ + _uffd_data_handler, _uffd_pt_handler, _uffd_faults) \ +{ \ + .name = SCAT4(uffd, _access, _with_af, #_mark_cmd), \ + .guest_prepare = { _PREPARE(_with_af), \ + _PREPARE(_access) }, \ + .guest_test = _access, \ + .mem_mark_cmd = _mark_cmd, \ + .guest_test_check = { _CHECK(_with_af) }, \ + .uffd_data_handler = _uffd_data_handler, \ + .uffd_pt_handler = _uffd_pt_handler, \ + .expected_events = { .uffd_faults = _uffd_faults, }, \ +} + +#define TEST_DIRTY_LOG(_access, _with_af, _test_check, _pt_check) \ +{ \ + .name = SCAT3(dirty_log, _access, _with_af), \ + .data_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \ + .pt_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \ + .guest_prepare = { _PREPARE(_with_af), \ + _PREPARE(_access) }, \ + .guest_test = _access, \ + .guest_test_check = { _CHECK(_with_af), _test_check, _pt_check }, \ + .expected_events = { 0 }, \ +} + +#define TEST_UFFD_AND_DIRTY_LOG(_access, _with_af, _uffd_data_handler, \ + _uffd_faults, _test_check, _pt_check) \ +{ \ + .name = SCAT3(uffd_and_dirty_log, _access, _with_af), \ + .data_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \ + .pt_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \ + .guest_prepare = { _PREPARE(_with_af), \ + _PREPARE(_access) }, \ + .guest_test = _access, \ + .mem_mark_cmd = CMD_HOLE_DATA | CMD_HOLE_PT, \ + .guest_test_check = { _CHECK(_with_af), _test_check, _pt_check }, \ + .uffd_data_handler = _uffd_data_handler, \ + .uffd_pt_handler = uffd_pt_handler, \ + .expected_events = { .uffd_faults = _uffd_faults, }, \ +} + +#define TEST_RO_MEMSLOT(_access, _mmio_handler, _mmio_exits) \ +{ \ + .name = SCAT2(ro_memslot, _access), \ + .data_memslot_flags = KVM_MEM_READONLY, \ + .pt_memslot_flags = KVM_MEM_READONLY, \ + .guest_prepare = { _PREPARE(_access) }, \ + .guest_test = _access, \ + .mmio_handler = _mmio_handler, \ + .expected_events = { .mmio_exits = _mmio_exits }, \ +} + +#define TEST_RO_MEMSLOT_NO_SYNDROME(_access) \ +{ \ + .name = SCAT2(ro_memslot_no_syndrome, _access), \ + .data_memslot_flags = KVM_MEM_READONLY, \ + .pt_memslot_flags = KVM_MEM_READONLY, \ + .guest_prepare = { _PREPARE(_access) }, \ + .guest_test = _access, \ + .fail_vcpu_run_handler = fail_vcpu_run_mmio_no_syndrome_handler, \ + .expected_events = { .fail_vcpu_runs = 1 }, \ +} + +#define TEST_RO_MEMSLOT_AND_DIRTY_LOG(_access, _mmio_handler, _mmio_exits, \ + _test_check) \ +{ \ + .name = SCAT2(ro_memslot, _access), \ + .data_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \ + .pt_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \ + .guest_prepare = { _PREPARE(_access) }, \ + .guest_test = _access, \ + .guest_test_check = { _test_check }, \ + .mmio_handler = _mmio_handler, \ + .expected_events = { .mmio_exits = _mmio_exits}, \ +} + +#define TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(_access, _test_check) \ +{ \ + .name = SCAT2(ro_memslot_no_syn_and_dlog, _access), \ + .data_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \ + .pt_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \ + .guest_prepare = { _PREPARE(_access) }, \ + .guest_test = _access, \ + .guest_test_check = { _test_check }, \ + .fail_vcpu_run_handler = fail_vcpu_run_mmio_no_syndrome_handler, \ + .expected_events = { .fail_vcpu_runs = 1 }, \ +} + +#define TEST_RO_MEMSLOT_AND_UFFD(_access, _mmio_handler, _mmio_exits, \ + _uffd_data_handler, _uffd_faults) \ +{ \ + .name = SCAT2(ro_memslot_uffd, _access), \ + .data_memslot_flags = KVM_MEM_READONLY, \ + .pt_memslot_flags = KVM_MEM_READONLY, \ + .mem_mark_cmd = CMD_HOLE_DATA | CMD_HOLE_PT, \ + .guest_prepare = { _PREPARE(_access) }, \ + .guest_test = _access, \ + .uffd_data_handler = _uffd_data_handler, \ + .uffd_pt_handler = uffd_pt_handler, \ + .mmio_handler = _mmio_handler, \ + .expected_events = { .mmio_exits = _mmio_exits, \ + .uffd_faults = _uffd_faults }, \ +} + +#define TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(_access, _uffd_data_handler, \ + _uffd_faults) \ +{ \ + .name = SCAT2(ro_memslot_no_syndrome, _access), \ + .data_memslot_flags = KVM_MEM_READONLY, \ + .pt_memslot_flags = KVM_MEM_READONLY, \ + .mem_mark_cmd = CMD_HOLE_DATA | CMD_HOLE_PT, \ + .guest_prepare = { _PREPARE(_access) }, \ + .guest_test = _access, \ + .uffd_data_handler = _uffd_data_handler, \ + .uffd_pt_handler = uffd_pt_handler, \ + .fail_vcpu_run_handler = fail_vcpu_run_mmio_no_syndrome_handler, \ + .expected_events = { .fail_vcpu_runs = 1, \ + .uffd_faults = _uffd_faults }, \ +} + +static struct test_desc tests[] = { + + /* Check that HW is setting the Access Flag (AF) (sanity checks). */ + TEST_ACCESS(guest_read64, with_af, CMD_NONE), + TEST_ACCESS(guest_ld_preidx, with_af, CMD_NONE), + TEST_ACCESS(guest_cas, with_af, CMD_NONE), + TEST_ACCESS(guest_write64, with_af, CMD_NONE), + TEST_ACCESS(guest_st_preidx, with_af, CMD_NONE), + TEST_ACCESS(guest_dc_zva, with_af, CMD_NONE), + TEST_ACCESS(guest_exec, with_af, CMD_NONE), + + /* + * Punch a hole in the data backing store, and then try multiple + * accesses: reads should rturn zeroes, and writes should + * re-populate the page. Moreover, the test also check that no + * exception was generated in the guest. Note that this + * reading/writing behavior is the same as reading/writing a + * punched page (with fallocate(FALLOC_FL_PUNCH_HOLE)) from + * userspace. + */ + TEST_ACCESS(guest_read64, no_af, CMD_HOLE_DATA), + TEST_ACCESS(guest_cas, no_af, CMD_HOLE_DATA), + TEST_ACCESS(guest_ld_preidx, no_af, CMD_HOLE_DATA), + TEST_ACCESS(guest_write64, no_af, CMD_HOLE_DATA), + TEST_ACCESS(guest_st_preidx, no_af, CMD_HOLE_DATA), + TEST_ACCESS(guest_at, no_af, CMD_HOLE_DATA), + TEST_ACCESS(guest_dc_zva, no_af, CMD_HOLE_DATA), + + /* + * Punch holes in the data and PT backing stores and mark them for + * userfaultfd handling. This should result in 2 faults: the access + * on the data backing store, and its respective S1 page table walk + * (S1PTW). + */ + TEST_UFFD(guest_read64, with_af, CMD_HOLE_DATA | CMD_HOLE_PT, + uffd_data_handler, uffd_pt_handler, 2), + TEST_UFFD(guest_read64, no_af, CMD_HOLE_DATA | CMD_HOLE_PT, + uffd_data_handler, uffd_pt_handler, 2), + TEST_UFFD(guest_cas, with_af, CMD_HOLE_DATA | CMD_HOLE_PT, + uffd_data_handler, uffd_pt_handler, 2), + /* + * Can't test guest_at with_af as it's IMPDEF whether the AF is set. + * The S1PTW fault should still be marked as a write. + */ + TEST_UFFD(guest_at, no_af, CMD_HOLE_DATA | CMD_HOLE_PT, + uffd_no_handler, uffd_pt_handler, 1), + TEST_UFFD(guest_ld_preidx, with_af, CMD_HOLE_DATA | CMD_HOLE_PT, + uffd_data_handler, uffd_pt_handler, 2), + TEST_UFFD(guest_write64, with_af, CMD_HOLE_DATA | CMD_HOLE_PT, + uffd_data_handler, uffd_pt_handler, 2), + TEST_UFFD(guest_dc_zva, with_af, CMD_HOLE_DATA | CMD_HOLE_PT, + uffd_data_handler, uffd_pt_handler, 2), + TEST_UFFD(guest_st_preidx, with_af, CMD_HOLE_DATA | CMD_HOLE_PT, + uffd_data_handler, uffd_pt_handler, 2), + TEST_UFFD(guest_exec, with_af, CMD_HOLE_DATA | CMD_HOLE_PT, + uffd_data_handler, uffd_pt_handler, 2), + + /* + * Try accesses when the data and PT memory regions are both + * tracked for dirty logging. + */ + TEST_DIRTY_LOG(guest_read64, with_af, guest_check_no_write_in_dirty_log, + guest_check_s1ptw_wr_in_dirty_log), + TEST_DIRTY_LOG(guest_read64, no_af, guest_check_no_write_in_dirty_log, + guest_check_no_s1ptw_wr_in_dirty_log), + TEST_DIRTY_LOG(guest_ld_preidx, with_af, + guest_check_no_write_in_dirty_log, + guest_check_s1ptw_wr_in_dirty_log), + TEST_DIRTY_LOG(guest_at, no_af, guest_check_no_write_in_dirty_log, + guest_check_no_s1ptw_wr_in_dirty_log), + TEST_DIRTY_LOG(guest_exec, with_af, guest_check_no_write_in_dirty_log, + guest_check_s1ptw_wr_in_dirty_log), + TEST_DIRTY_LOG(guest_write64, with_af, guest_check_write_in_dirty_log, + guest_check_s1ptw_wr_in_dirty_log), + TEST_DIRTY_LOG(guest_cas, with_af, guest_check_write_in_dirty_log, + guest_check_s1ptw_wr_in_dirty_log), + TEST_DIRTY_LOG(guest_dc_zva, with_af, guest_check_write_in_dirty_log, + guest_check_s1ptw_wr_in_dirty_log), + TEST_DIRTY_LOG(guest_st_preidx, with_af, guest_check_write_in_dirty_log, + guest_check_s1ptw_wr_in_dirty_log), + + /* + * Access when the data and PT memory regions are both marked for + * dirty logging and UFFD at the same time. The expected result is + * that writes should mark the dirty log and trigger a userfaultfd + * write fault. Reads/execs should result in a read userfaultfd + * fault, and nothing in the dirty log. Any S1PTW should result in + * a write in the dirty log and a userfaultfd write. + */ + TEST_UFFD_AND_DIRTY_LOG(guest_read64, with_af, + uffd_data_handler, 2, + guest_check_no_write_in_dirty_log, + guest_check_s1ptw_wr_in_dirty_log), + TEST_UFFD_AND_DIRTY_LOG(guest_read64, no_af, + uffd_data_handler, 2, + guest_check_no_write_in_dirty_log, + guest_check_no_s1ptw_wr_in_dirty_log), + TEST_UFFD_AND_DIRTY_LOG(guest_ld_preidx, with_af, + uffd_data_handler, + 2, guest_check_no_write_in_dirty_log, + guest_check_s1ptw_wr_in_dirty_log), + TEST_UFFD_AND_DIRTY_LOG(guest_at, with_af, uffd_no_handler, 1, + guest_check_no_write_in_dirty_log, + guest_check_s1ptw_wr_in_dirty_log), + TEST_UFFD_AND_DIRTY_LOG(guest_exec, with_af, + uffd_data_handler, 2, + guest_check_no_write_in_dirty_log, + guest_check_s1ptw_wr_in_dirty_log), + TEST_UFFD_AND_DIRTY_LOG(guest_write64, with_af, + uffd_data_handler, + 2, guest_check_write_in_dirty_log, + guest_check_s1ptw_wr_in_dirty_log), + TEST_UFFD_AND_DIRTY_LOG(guest_cas, with_af, + uffd_data_handler, 2, + guest_check_write_in_dirty_log, + guest_check_s1ptw_wr_in_dirty_log), + TEST_UFFD_AND_DIRTY_LOG(guest_dc_zva, with_af, + uffd_data_handler, + 2, guest_check_write_in_dirty_log, + guest_check_s1ptw_wr_in_dirty_log), + TEST_UFFD_AND_DIRTY_LOG(guest_st_preidx, with_af, + uffd_data_handler, 2, + guest_check_write_in_dirty_log, + guest_check_s1ptw_wr_in_dirty_log), + /* + * Access when both the PT and data regions are marked read-only + * (with KVM_MEM_READONLY). Writes with a syndrome result in an + * MMIO exit, writes with no syndrome (e.g., CAS) result in a + * failed vcpu run, and reads/execs with and without syndroms do + * not fault. + */ + TEST_RO_MEMSLOT(guest_read64, 0, 0), + TEST_RO_MEMSLOT(guest_ld_preidx, 0, 0), + TEST_RO_MEMSLOT(guest_at, 0, 0), + TEST_RO_MEMSLOT(guest_exec, 0, 0), + TEST_RO_MEMSLOT(guest_write64, mmio_on_test_gpa_handler, 1), + TEST_RO_MEMSLOT_NO_SYNDROME(guest_dc_zva), + TEST_RO_MEMSLOT_NO_SYNDROME(guest_cas), + TEST_RO_MEMSLOT_NO_SYNDROME(guest_st_preidx), + + /* + * The PT and data regions are both read-only and marked + * for dirty logging at the same time. The expected result is that + * for writes there should be no write in the dirty log. The + * readonly handling is the same as if the memslot was not marked + * for dirty logging: writes with a syndrome result in an MMIO + * exit, and writes with no syndrome result in a failed vcpu run. + */ + TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_read64, 0, 0, + guest_check_no_write_in_dirty_log), + TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_ld_preidx, 0, 0, + guest_check_no_write_in_dirty_log), + TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_at, 0, 0, + guest_check_no_write_in_dirty_log), + TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_exec, 0, 0, + guest_check_no_write_in_dirty_log), + TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_write64, mmio_on_test_gpa_handler, + 1, guest_check_no_write_in_dirty_log), + TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(guest_dc_zva, + guest_check_no_write_in_dirty_log), + TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(guest_cas, + guest_check_no_write_in_dirty_log), + TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(guest_st_preidx, + guest_check_no_write_in_dirty_log), + + /* + * The PT and data regions are both read-only and punched with + * holes tracked with userfaultfd. The expected result is the + * union of both userfaultfd and read-only behaviors. For example, + * write accesses result in a userfaultfd write fault and an MMIO + * exit. Writes with no syndrome result in a failed vcpu run and + * no userfaultfd write fault. Reads result in userfaultfd getting + * triggered. + */ + TEST_RO_MEMSLOT_AND_UFFD(guest_read64, 0, 0, uffd_data_handler, 2), + TEST_RO_MEMSLOT_AND_UFFD(guest_ld_preidx, 0, 0, uffd_data_handler, 2), + TEST_RO_MEMSLOT_AND_UFFD(guest_at, 0, 0, uffd_no_handler, 1), + TEST_RO_MEMSLOT_AND_UFFD(guest_exec, 0, 0, uffd_data_handler, 2), + TEST_RO_MEMSLOT_AND_UFFD(guest_write64, mmio_on_test_gpa_handler, 1, + uffd_data_handler, 2), + TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_cas, uffd_data_handler, 2), + TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_dc_zva, uffd_no_handler, 1), + TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_st_preidx, uffd_no_handler, 1), + + { 0 } +}; + +static void for_each_test_and_guest_mode(enum vm_mem_backing_src_type src_type) +{ + struct test_desc *t; + + for (t = &tests[0]; t->name; t++) { + if (t->skip) + continue; + + struct test_params p = { + .src_type = src_type, + .test_desc = t, + }; + + for_each_guest_mode(run_test, &p); + } +} + +int main(int argc, char *argv[]) +{ + enum vm_mem_backing_src_type src_type; + int opt; + + src_type = DEFAULT_VM_MEM_SRC; + + while ((opt = getopt(argc, argv, "hm:s:")) != -1) { + switch (opt) { + case 'm': + guest_modes_cmdline(optarg); + break; + case 's': + src_type = parse_backing_src_type(optarg); + break; + case 'h': + default: + help(argv[0]); + exit(0); + } + } + + for_each_test_and_guest_mode(src_type); + return 0; +} diff --git a/tools/testing/selftests/kvm/arm64/psci_test.c b/tools/testing/selftests/kvm/arm64/psci_test.c new file mode 100644 index 000000000000..ab491ee9e5f7 --- /dev/null +++ b/tools/testing/selftests/kvm/arm64/psci_test.c @@ -0,0 +1,290 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * psci_test - Tests relating to KVM's PSCI implementation. + * + * Copyright (c) 2021 Google LLC. + * + * This test includes: + * - A regression test for a race between KVM servicing the PSCI CPU_ON call + * and userspace reading the targeted vCPU's registers. + * - A test for KVM's handling of PSCI SYSTEM_SUSPEND and the associated + * KVM_SYSTEM_EVENT_SUSPEND UAPI. + */ + +#include +#include +#include + +#include "kvm_util.h" +#include "processor.h" +#include "test_util.h" + +#define CPU_ON_ENTRY_ADDR 0xfeedf00dul +#define CPU_ON_CONTEXT_ID 0xdeadc0deul + +static uint64_t psci_cpu_on(uint64_t target_cpu, uint64_t entry_addr, + uint64_t context_id) +{ + struct arm_smccc_res res; + + smccc_hvc(PSCI_0_2_FN64_CPU_ON, target_cpu, entry_addr, context_id, + 0, 0, 0, 0, &res); + + return res.a0; +} + +static uint64_t psci_affinity_info(uint64_t target_affinity, + uint64_t lowest_affinity_level) +{ + struct arm_smccc_res res; + + smccc_hvc(PSCI_0_2_FN64_AFFINITY_INFO, target_affinity, lowest_affinity_level, + 0, 0, 0, 0, 0, &res); + + return res.a0; +} + +static uint64_t psci_system_suspend(uint64_t entry_addr, uint64_t context_id) +{ + struct arm_smccc_res res; + + smccc_hvc(PSCI_1_0_FN64_SYSTEM_SUSPEND, entry_addr, context_id, + 0, 0, 0, 0, 0, &res); + + return res.a0; +} + +static uint64_t psci_system_off2(uint64_t type, uint64_t cookie) +{ + struct arm_smccc_res res; + + smccc_hvc(PSCI_1_3_FN64_SYSTEM_OFF2, type, cookie, 0, 0, 0, 0, 0, &res); + + return res.a0; +} + +static uint64_t psci_features(uint32_t func_id) +{ + struct arm_smccc_res res; + + smccc_hvc(PSCI_1_0_FN_PSCI_FEATURES, func_id, 0, 0, 0, 0, 0, 0, &res); + + return res.a0; +} + +static void vcpu_power_off(struct kvm_vcpu *vcpu) +{ + struct kvm_mp_state mp_state = { + .mp_state = KVM_MP_STATE_STOPPED, + }; + + vcpu_mp_state_set(vcpu, &mp_state); +} + +static struct kvm_vm *setup_vm(void *guest_code, struct kvm_vcpu **source, + struct kvm_vcpu **target) +{ + struct kvm_vcpu_init init; + struct kvm_vm *vm; + + vm = vm_create(2); + + vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init); + init.features[0] |= (1 << KVM_ARM_VCPU_PSCI_0_2); + + *source = aarch64_vcpu_add(vm, 0, &init, guest_code); + *target = aarch64_vcpu_add(vm, 1, &init, guest_code); + + return vm; +} + +static void enter_guest(struct kvm_vcpu *vcpu) +{ + struct ucall uc; + + vcpu_run(vcpu); + if (get_ucall(vcpu, &uc) == UCALL_ABORT) + REPORT_GUEST_ASSERT(uc); +} + +static void assert_vcpu_reset(struct kvm_vcpu *vcpu) +{ + uint64_t obs_pc, obs_x0; + + obs_pc = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc)); + obs_x0 = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.regs[0])); + + TEST_ASSERT(obs_pc == CPU_ON_ENTRY_ADDR, + "unexpected target cpu pc: %lx (expected: %lx)", + obs_pc, CPU_ON_ENTRY_ADDR); + TEST_ASSERT(obs_x0 == CPU_ON_CONTEXT_ID, + "unexpected target context id: %lx (expected: %lx)", + obs_x0, CPU_ON_CONTEXT_ID); +} + +static void guest_test_cpu_on(uint64_t target_cpu) +{ + uint64_t target_state; + + GUEST_ASSERT(!psci_cpu_on(target_cpu, CPU_ON_ENTRY_ADDR, CPU_ON_CONTEXT_ID)); + + do { + target_state = psci_affinity_info(target_cpu, 0); + + GUEST_ASSERT((target_state == PSCI_0_2_AFFINITY_LEVEL_ON) || + (target_state == PSCI_0_2_AFFINITY_LEVEL_OFF)); + } while (target_state != PSCI_0_2_AFFINITY_LEVEL_ON); + + GUEST_DONE(); +} + +static void host_test_cpu_on(void) +{ + struct kvm_vcpu *source, *target; + uint64_t target_mpidr; + struct kvm_vm *vm; + struct ucall uc; + + vm = setup_vm(guest_test_cpu_on, &source, &target); + + /* + * make sure the target is already off when executing the test. + */ + vcpu_power_off(target); + + target_mpidr = vcpu_get_reg(target, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1)); + vcpu_args_set(source, 1, target_mpidr & MPIDR_HWID_BITMASK); + enter_guest(source); + + if (get_ucall(source, &uc) != UCALL_DONE) + TEST_FAIL("Unhandled ucall: %lu", uc.cmd); + + assert_vcpu_reset(target); + kvm_vm_free(vm); +} + +static void guest_test_system_suspend(void) +{ + uint64_t ret; + + /* assert that SYSTEM_SUSPEND is discoverable */ + GUEST_ASSERT(!psci_features(PSCI_1_0_FN_SYSTEM_SUSPEND)); + GUEST_ASSERT(!psci_features(PSCI_1_0_FN64_SYSTEM_SUSPEND)); + + ret = psci_system_suspend(CPU_ON_ENTRY_ADDR, CPU_ON_CONTEXT_ID); + GUEST_SYNC(ret); +} + +static void host_test_system_suspend(void) +{ + struct kvm_vcpu *source, *target; + struct kvm_run *run; + struct kvm_vm *vm; + + vm = setup_vm(guest_test_system_suspend, &source, &target); + vm_enable_cap(vm, KVM_CAP_ARM_SYSTEM_SUSPEND, 0); + + vcpu_power_off(target); + run = source->run; + + enter_guest(source); + + TEST_ASSERT_KVM_EXIT_REASON(source, KVM_EXIT_SYSTEM_EVENT); + TEST_ASSERT(run->system_event.type == KVM_SYSTEM_EVENT_SUSPEND, + "Unhandled system event: %u (expected: %u)", + run->system_event.type, KVM_SYSTEM_EVENT_SUSPEND); + + kvm_vm_free(vm); +} + +static void guest_test_system_off2(void) +{ + uint64_t ret; + + /* assert that SYSTEM_OFF2 is discoverable */ + GUEST_ASSERT(psci_features(PSCI_1_3_FN_SYSTEM_OFF2) & + PSCI_1_3_OFF_TYPE_HIBERNATE_OFF); + GUEST_ASSERT(psci_features(PSCI_1_3_FN64_SYSTEM_OFF2) & + PSCI_1_3_OFF_TYPE_HIBERNATE_OFF); + + /* With non-zero 'cookie' field, it should fail */ + ret = psci_system_off2(PSCI_1_3_OFF_TYPE_HIBERNATE_OFF, 1); + GUEST_ASSERT(ret == PSCI_RET_INVALID_PARAMS); + + /* + * This would normally never return, so KVM sets the return value + * to PSCI_RET_INTERNAL_FAILURE. The test case *does* return, so + * that it can test both values for HIBERNATE_OFF. + */ + ret = psci_system_off2(PSCI_1_3_OFF_TYPE_HIBERNATE_OFF, 0); + GUEST_ASSERT(ret == PSCI_RET_INTERNAL_FAILURE); + + /* + * Revision F.b of the PSCI v1.3 specification documents zero as an + * alias for HIBERNATE_OFF, since that's the value used in earlier + * revisions of the spec and some implementations in the field. + */ + ret = psci_system_off2(0, 1); + GUEST_ASSERT(ret == PSCI_RET_INVALID_PARAMS); + + ret = psci_system_off2(0, 0); + GUEST_ASSERT(ret == PSCI_RET_INTERNAL_FAILURE); + + GUEST_DONE(); +} + +static void host_test_system_off2(void) +{ + struct kvm_vcpu *source, *target; + struct kvm_mp_state mps; + uint64_t psci_version = 0; + int nr_shutdowns = 0; + struct kvm_run *run; + struct ucall uc; + + setup_vm(guest_test_system_off2, &source, &target); + + psci_version = vcpu_get_reg(target, KVM_REG_ARM_PSCI_VERSION); + + TEST_ASSERT(psci_version >= PSCI_VERSION(1, 3), + "Unexpected PSCI version %lu.%lu", + PSCI_VERSION_MAJOR(psci_version), + PSCI_VERSION_MINOR(psci_version)); + + vcpu_power_off(target); + run = source->run; + + enter_guest(source); + while (run->exit_reason == KVM_EXIT_SYSTEM_EVENT) { + TEST_ASSERT(run->system_event.type == KVM_SYSTEM_EVENT_SHUTDOWN, + "Unhandled system event: %u (expected: %u)", + run->system_event.type, KVM_SYSTEM_EVENT_SHUTDOWN); + TEST_ASSERT(run->system_event.ndata >= 1, + "Unexpected amount of system event data: %u (expected, >= 1)", + run->system_event.ndata); + TEST_ASSERT(run->system_event.data[0] & KVM_SYSTEM_EVENT_SHUTDOWN_FLAG_PSCI_OFF2, + "PSCI_OFF2 flag not set. Flags %llu (expected %llu)", + run->system_event.data[0], KVM_SYSTEM_EVENT_SHUTDOWN_FLAG_PSCI_OFF2); + + nr_shutdowns++; + + /* Restart the vCPU */ + mps.mp_state = KVM_MP_STATE_RUNNABLE; + vcpu_mp_state_set(source, &mps); + + enter_guest(source); + } + + TEST_ASSERT(get_ucall(source, &uc) == UCALL_DONE, "Guest did not exit cleanly"); + TEST_ASSERT(nr_shutdowns == 2, "Two shutdown events were expected, but saw %d", nr_shutdowns); +} + +int main(void) +{ + TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_SYSTEM_SUSPEND)); + + host_test_cpu_on(); + host_test_system_suspend(); + host_test_system_off2(); + return 0; +} diff --git a/tools/testing/selftests/kvm/arm64/set_id_regs.c b/tools/testing/selftests/kvm/arm64/set_id_regs.c new file mode 100644 index 000000000000..bc6cf50e5135 --- /dev/null +++ b/tools/testing/selftests/kvm/arm64/set_id_regs.c @@ -0,0 +1,695 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * set_id_regs - Test for setting ID register from usersapce. + * + * Copyright (c) 2023 Google LLC. + * + * + * Test that KVM supports setting ID registers from userspace and handles the + * feature set correctly. + */ + +#include +#include "kvm_util.h" +#include "processor.h" +#include "test_util.h" +#include + +enum ftr_type { + FTR_EXACT, /* Use a predefined safe value */ + FTR_LOWER_SAFE, /* Smaller value is safe */ + FTR_HIGHER_SAFE, /* Bigger value is safe */ + FTR_HIGHER_OR_ZERO_SAFE, /* Bigger value is safe, but 0 is biggest */ + FTR_END, /* Mark the last ftr bits */ +}; + +#define FTR_SIGNED true /* Value should be treated as signed */ +#define FTR_UNSIGNED false /* Value should be treated as unsigned */ + +struct reg_ftr_bits { + char *name; + bool sign; + enum ftr_type type; + uint8_t shift; + uint64_t mask; + /* + * For FTR_EXACT, safe_val is used as the exact safe value. + * For FTR_LOWER_SAFE, safe_val is used as the minimal safe value. + */ + int64_t safe_val; +}; + +struct test_feature_reg { + uint32_t reg; + const struct reg_ftr_bits *ftr_bits; +}; + +#define __REG_FTR_BITS(NAME, SIGNED, TYPE, SHIFT, MASK, SAFE_VAL) \ + { \ + .name = #NAME, \ + .sign = SIGNED, \ + .type = TYPE, \ + .shift = SHIFT, \ + .mask = MASK, \ + .safe_val = SAFE_VAL, \ + } + +#define REG_FTR_BITS(type, reg, field, safe_val) \ + __REG_FTR_BITS(reg##_##field, FTR_UNSIGNED, type, reg##_##field##_SHIFT, \ + reg##_##field##_MASK, safe_val) + +#define S_REG_FTR_BITS(type, reg, field, safe_val) \ + __REG_FTR_BITS(reg##_##field, FTR_SIGNED, type, reg##_##field##_SHIFT, \ + reg##_##field##_MASK, safe_val) + +#define REG_FTR_END \ + { \ + .type = FTR_END, \ + } + +static const struct reg_ftr_bits ftr_id_aa64dfr0_el1[] = { + S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, DoubleLock, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, WRPs, 0), + S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, PMUVer, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, DebugVer, ID_AA64DFR0_EL1_DebugVer_IMP), + REG_FTR_END, +}; + +static const struct reg_ftr_bits ftr_id_dfr0_el1[] = { + S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_DFR0_EL1, PerfMon, ID_DFR0_EL1_PerfMon_PMUv3), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_DFR0_EL1, CopDbg, ID_DFR0_EL1_CopDbg_Armv8), + REG_FTR_END, +}; + +static const struct reg_ftr_bits ftr_id_aa64isar0_el1[] = { + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, RNDR, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, TLB, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, TS, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, FHM, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, DP, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SM4, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SM3, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SHA3, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, RDM, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, TME, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, ATOMIC, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, CRC32, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SHA2, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SHA1, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, AES, 0), + REG_FTR_END, +}; + +static const struct reg_ftr_bits ftr_id_aa64isar1_el1[] = { + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, LS64, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, XS, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, I8MM, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, DGH, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, BF16, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, SPECRES, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, SB, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, FRINTTS, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, LRCPC, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, FCMA, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, JSCVT, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, DPB, 0), + REG_FTR_END, +}; + +static const struct reg_ftr_bits ftr_id_aa64isar2_el1[] = { + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR2_EL1, BC, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR2_EL1, RPRES, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR2_EL1, WFxT, 0), + REG_FTR_END, +}; + +static const struct reg_ftr_bits ftr_id_aa64pfr0_el1[] = { + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, CSV3, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, CSV2, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, DIT, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, SEL2, 0), + REG_FTR_BITS(FTR_EXACT, ID_AA64PFR0_EL1, GIC, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL3, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL2, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL1, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL0, 0), + REG_FTR_END, +}; + +static const struct reg_ftr_bits ftr_id_aa64pfr1_el1[] = { + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, CSV2_frac, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, SSBS, ID_AA64PFR1_EL1_SSBS_NI), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, BT, 0), + REG_FTR_END, +}; + +static const struct reg_ftr_bits ftr_id_aa64mmfr0_el1[] = { + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, ECV, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, EXS, 0), + S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN4, 0), + S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN64, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN16, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, BIGENDEL0, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, SNSMEM, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, BIGEND, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, ASIDBITS, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, PARANGE, 0), + REG_FTR_END, +}; + +static const struct reg_ftr_bits ftr_id_aa64mmfr1_el1[] = { + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, TIDCP1, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, AFP, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, ETS, 0), + REG_FTR_BITS(FTR_HIGHER_SAFE, ID_AA64MMFR1_EL1, SpecSEI, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, PAN, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, LO, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, HPDS, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, HAFDBS, 0), + REG_FTR_END, +}; + +static const struct reg_ftr_bits ftr_id_aa64mmfr2_el1[] = { + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, E0PD, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, BBM, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, TTL, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, AT, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, ST, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, VARange, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, IESB, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, LSM, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, UAO, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, CnP, 0), + REG_FTR_END, +}; + +static const struct reg_ftr_bits ftr_id_aa64zfr0_el1[] = { + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, F64MM, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, F32MM, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, I8MM, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, SM4, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, SHA3, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, BF16, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, BitPerm, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, AES, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, SVEver, 0), + REG_FTR_END, +}; + +#define TEST_REG(id, table) \ + { \ + .reg = id, \ + .ftr_bits = &((table)[0]), \ + } + +static struct test_feature_reg test_regs[] = { + TEST_REG(SYS_ID_AA64DFR0_EL1, ftr_id_aa64dfr0_el1), + TEST_REG(SYS_ID_DFR0_EL1, ftr_id_dfr0_el1), + TEST_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0_el1), + TEST_REG(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1_el1), + TEST_REG(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2_el1), + TEST_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0_el1), + TEST_REG(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1_el1), + TEST_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0_el1), + TEST_REG(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1_el1), + TEST_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2_el1), + TEST_REG(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0_el1), +}; + +#define GUEST_REG_SYNC(id) GUEST_SYNC_ARGS(0, id, read_sysreg_s(id), 0, 0); + +static void guest_code(void) +{ + GUEST_REG_SYNC(SYS_ID_AA64DFR0_EL1); + GUEST_REG_SYNC(SYS_ID_DFR0_EL1); + GUEST_REG_SYNC(SYS_ID_AA64ISAR0_EL1); + GUEST_REG_SYNC(SYS_ID_AA64ISAR1_EL1); + GUEST_REG_SYNC(SYS_ID_AA64ISAR2_EL1); + GUEST_REG_SYNC(SYS_ID_AA64PFR0_EL1); + GUEST_REG_SYNC(SYS_ID_AA64MMFR0_EL1); + GUEST_REG_SYNC(SYS_ID_AA64MMFR1_EL1); + GUEST_REG_SYNC(SYS_ID_AA64MMFR2_EL1); + GUEST_REG_SYNC(SYS_ID_AA64ZFR0_EL1); + GUEST_REG_SYNC(SYS_CTR_EL0); + + GUEST_DONE(); +} + +/* Return a safe value to a given ftr_bits an ftr value */ +uint64_t get_safe_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr) +{ + uint64_t ftr_max = GENMASK_ULL(ARM64_FEATURE_FIELD_BITS - 1, 0); + + if (ftr_bits->sign == FTR_UNSIGNED) { + switch (ftr_bits->type) { + case FTR_EXACT: + ftr = ftr_bits->safe_val; + break; + case FTR_LOWER_SAFE: + if (ftr > ftr_bits->safe_val) + ftr--; + break; + case FTR_HIGHER_SAFE: + if (ftr < ftr_max) + ftr++; + break; + case FTR_HIGHER_OR_ZERO_SAFE: + if (ftr == ftr_max) + ftr = 0; + else if (ftr != 0) + ftr++; + break; + default: + break; + } + } else if (ftr != ftr_max) { + switch (ftr_bits->type) { + case FTR_EXACT: + ftr = ftr_bits->safe_val; + break; + case FTR_LOWER_SAFE: + if (ftr > ftr_bits->safe_val) + ftr--; + break; + case FTR_HIGHER_SAFE: + if (ftr < ftr_max - 1) + ftr++; + break; + case FTR_HIGHER_OR_ZERO_SAFE: + if (ftr != 0 && ftr != ftr_max - 1) + ftr++; + break; + default: + break; + } + } + + return ftr; +} + +/* Return an invalid value to a given ftr_bits an ftr value */ +uint64_t get_invalid_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr) +{ + uint64_t ftr_max = GENMASK_ULL(ARM64_FEATURE_FIELD_BITS - 1, 0); + + if (ftr_bits->sign == FTR_UNSIGNED) { + switch (ftr_bits->type) { + case FTR_EXACT: + ftr = max((uint64_t)ftr_bits->safe_val + 1, ftr + 1); + break; + case FTR_LOWER_SAFE: + ftr++; + break; + case FTR_HIGHER_SAFE: + ftr--; + break; + case FTR_HIGHER_OR_ZERO_SAFE: + if (ftr == 0) + ftr = ftr_max; + else + ftr--; + break; + default: + break; + } + } else if (ftr != ftr_max) { + switch (ftr_bits->type) { + case FTR_EXACT: + ftr = max((uint64_t)ftr_bits->safe_val + 1, ftr + 1); + break; + case FTR_LOWER_SAFE: + ftr++; + break; + case FTR_HIGHER_SAFE: + ftr--; + break; + case FTR_HIGHER_OR_ZERO_SAFE: + if (ftr == 0) + ftr = ftr_max - 1; + else + ftr--; + break; + default: + break; + } + } else { + ftr = 0; + } + + return ftr; +} + +static uint64_t test_reg_set_success(struct kvm_vcpu *vcpu, uint64_t reg, + const struct reg_ftr_bits *ftr_bits) +{ + uint8_t shift = ftr_bits->shift; + uint64_t mask = ftr_bits->mask; + uint64_t val, new_val, ftr; + + val = vcpu_get_reg(vcpu, reg); + ftr = (val & mask) >> shift; + + ftr = get_safe_value(ftr_bits, ftr); + + ftr <<= shift; + val &= ~mask; + val |= ftr; + + vcpu_set_reg(vcpu, reg, val); + new_val = vcpu_get_reg(vcpu, reg); + TEST_ASSERT_EQ(new_val, val); + + return new_val; +} + +static void test_reg_set_fail(struct kvm_vcpu *vcpu, uint64_t reg, + const struct reg_ftr_bits *ftr_bits) +{ + uint8_t shift = ftr_bits->shift; + uint64_t mask = ftr_bits->mask; + uint64_t val, old_val, ftr; + int r; + + val = vcpu_get_reg(vcpu, reg); + ftr = (val & mask) >> shift; + + ftr = get_invalid_value(ftr_bits, ftr); + + old_val = val; + ftr <<= shift; + val &= ~mask; + val |= ftr; + + r = __vcpu_set_reg(vcpu, reg, val); + TEST_ASSERT(r < 0 && errno == EINVAL, + "Unexpected KVM_SET_ONE_REG error: r=%d, errno=%d", r, errno); + + val = vcpu_get_reg(vcpu, reg); + TEST_ASSERT_EQ(val, old_val); +} + +static uint64_t test_reg_vals[KVM_ARM_FEATURE_ID_RANGE_SIZE]; + +#define encoding_to_range_idx(encoding) \ + KVM_ARM_FEATURE_ID_RANGE_IDX(sys_reg_Op0(encoding), sys_reg_Op1(encoding), \ + sys_reg_CRn(encoding), sys_reg_CRm(encoding), \ + sys_reg_Op2(encoding)) + + +static void test_vm_ftr_id_regs(struct kvm_vcpu *vcpu, bool aarch64_only) +{ + uint64_t masks[KVM_ARM_FEATURE_ID_RANGE_SIZE]; + struct reg_mask_range range = { + .addr = (__u64)masks, + }; + int ret; + + /* KVM should return error when reserved field is not zero */ + range.reserved[0] = 1; + ret = __vm_ioctl(vcpu->vm, KVM_ARM_GET_REG_WRITABLE_MASKS, &range); + TEST_ASSERT(ret, "KVM doesn't check invalid parameters."); + + /* Get writable masks for feature ID registers */ + memset(range.reserved, 0, sizeof(range.reserved)); + vm_ioctl(vcpu->vm, KVM_ARM_GET_REG_WRITABLE_MASKS, &range); + + for (int i = 0; i < ARRAY_SIZE(test_regs); i++) { + const struct reg_ftr_bits *ftr_bits = test_regs[i].ftr_bits; + uint32_t reg_id = test_regs[i].reg; + uint64_t reg = KVM_ARM64_SYS_REG(reg_id); + int idx; + + /* Get the index to masks array for the idreg */ + idx = encoding_to_range_idx(reg_id); + + for (int j = 0; ftr_bits[j].type != FTR_END; j++) { + /* Skip aarch32 reg on aarch64 only system, since they are RAZ/WI. */ + if (aarch64_only && sys_reg_CRm(reg_id) < 4) { + ksft_test_result_skip("%s on AARCH64 only system\n", + ftr_bits[j].name); + continue; + } + + /* Make sure the feature field is writable */ + TEST_ASSERT_EQ(masks[idx] & ftr_bits[j].mask, ftr_bits[j].mask); + + test_reg_set_fail(vcpu, reg, &ftr_bits[j]); + + test_reg_vals[idx] = test_reg_set_success(vcpu, reg, + &ftr_bits[j]); + + ksft_test_result_pass("%s\n", ftr_bits[j].name); + } + } +} + +#define MPAM_IDREG_TEST 6 +static void test_user_set_mpam_reg(struct kvm_vcpu *vcpu) +{ + uint64_t masks[KVM_ARM_FEATURE_ID_RANGE_SIZE]; + struct reg_mask_range range = { + .addr = (__u64)masks, + }; + uint64_t val; + int idx, err; + + /* + * If ID_AA64PFR0.MPAM is _not_ officially modifiable and is zero, + * check that if it can be set to 1, (i.e. it is supported by the + * hardware), that it can't be set to other values. + */ + + /* Get writable masks for feature ID registers */ + memset(range.reserved, 0, sizeof(range.reserved)); + vm_ioctl(vcpu->vm, KVM_ARM_GET_REG_WRITABLE_MASKS, &range); + + /* Writeable? Nothing to test! */ + idx = encoding_to_range_idx(SYS_ID_AA64PFR0_EL1); + if ((masks[idx] & ID_AA64PFR0_EL1_MPAM_MASK) == ID_AA64PFR0_EL1_MPAM_MASK) { + ksft_test_result_skip("ID_AA64PFR0_EL1.MPAM is officially writable, nothing to test\n"); + return; + } + + /* Get the id register value */ + val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1)); + + /* Try to set MPAM=0. This should always be possible. */ + val &= ~ID_AA64PFR0_EL1_MPAM_MASK; + val |= FIELD_PREP(ID_AA64PFR0_EL1_MPAM_MASK, 0); + err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), val); + if (err) + ksft_test_result_fail("ID_AA64PFR0_EL1.MPAM=0 was not accepted\n"); + else + ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM=0 worked\n"); + + /* Try to set MPAM=1 */ + val &= ~ID_AA64PFR0_EL1_MPAM_MASK; + val |= FIELD_PREP(ID_AA64PFR0_EL1_MPAM_MASK, 1); + err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), val); + if (err) + ksft_test_result_skip("ID_AA64PFR0_EL1.MPAM is not writable, nothing to test\n"); + else + ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM=1 was writable\n"); + + /* Try to set MPAM=2 */ + val &= ~ID_AA64PFR0_EL1_MPAM_MASK; + val |= FIELD_PREP(ID_AA64PFR0_EL1_MPAM_MASK, 2); + err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), val); + if (err) + ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM not arbitrarily modifiable\n"); + else + ksft_test_result_fail("ID_AA64PFR0_EL1.MPAM value should not be ignored\n"); + + /* And again for ID_AA64PFR1_EL1.MPAM_frac */ + idx = encoding_to_range_idx(SYS_ID_AA64PFR1_EL1); + if ((masks[idx] & ID_AA64PFR1_EL1_MPAM_frac_MASK) == ID_AA64PFR1_EL1_MPAM_frac_MASK) { + ksft_test_result_skip("ID_AA64PFR1_EL1.MPAM_frac is officially writable, nothing to test\n"); + return; + } + + /* Get the id register value */ + val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1)); + + /* Try to set MPAM_frac=0. This should always be possible. */ + val &= ~ID_AA64PFR1_EL1_MPAM_frac_MASK; + val |= FIELD_PREP(ID_AA64PFR1_EL1_MPAM_frac_MASK, 0); + err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1), val); + if (err) + ksft_test_result_fail("ID_AA64PFR0_EL1.MPAM_frac=0 was not accepted\n"); + else + ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM_frac=0 worked\n"); + + /* Try to set MPAM_frac=1 */ + val &= ~ID_AA64PFR1_EL1_MPAM_frac_MASK; + val |= FIELD_PREP(ID_AA64PFR1_EL1_MPAM_frac_MASK, 1); + err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1), val); + if (err) + ksft_test_result_skip("ID_AA64PFR1_EL1.MPAM_frac is not writable, nothing to test\n"); + else + ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM_frac=1 was writable\n"); + + /* Try to set MPAM_frac=2 */ + val &= ~ID_AA64PFR1_EL1_MPAM_frac_MASK; + val |= FIELD_PREP(ID_AA64PFR1_EL1_MPAM_frac_MASK, 2); + err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1), val); + if (err) + ksft_test_result_pass("ID_AA64PFR1_EL1.MPAM_frac not arbitrarily modifiable\n"); + else + ksft_test_result_fail("ID_AA64PFR1_EL1.MPAM_frac value should not be ignored\n"); +} + +static void test_guest_reg_read(struct kvm_vcpu *vcpu) +{ + bool done = false; + struct ucall uc; + + while (!done) { + vcpu_run(vcpu); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + case UCALL_SYNC: + /* Make sure the written values are seen by guest */ + TEST_ASSERT_EQ(test_reg_vals[encoding_to_range_idx(uc.args[2])], + uc.args[3]); + break; + case UCALL_DONE: + done = true; + break; + default: + TEST_FAIL("Unexpected ucall: %lu", uc.cmd); + } + } +} + +/* Politely lifted from arch/arm64/include/asm/cache.h */ +/* Ctypen, bits[3(n - 1) + 2 : 3(n - 1)], for n = 1 to 7 */ +#define CLIDR_CTYPE_SHIFT(level) (3 * (level - 1)) +#define CLIDR_CTYPE_MASK(level) (7 << CLIDR_CTYPE_SHIFT(level)) +#define CLIDR_CTYPE(clidr, level) \ + (((clidr) & CLIDR_CTYPE_MASK(level)) >> CLIDR_CTYPE_SHIFT(level)) + +static void test_clidr(struct kvm_vcpu *vcpu) +{ + uint64_t clidr; + int level; + + clidr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CLIDR_EL1)); + + /* find the first empty level in the cache hierarchy */ + for (level = 1; level < 7; level++) { + if (!CLIDR_CTYPE(clidr, level)) + break; + } + + /* + * If you have a mind-boggling 7 levels of cache, congratulations, you + * get to fix this. + */ + TEST_ASSERT(level <= 7, "can't find an empty level in cache hierarchy"); + + /* stick in a unified cache level */ + clidr |= BIT(2) << CLIDR_CTYPE_SHIFT(level); + + vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CLIDR_EL1), clidr); + test_reg_vals[encoding_to_range_idx(SYS_CLIDR_EL1)] = clidr; +} + +static void test_ctr(struct kvm_vcpu *vcpu) +{ + u64 ctr; + + ctr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CTR_EL0)); + ctr &= ~CTR_EL0_DIC_MASK; + if (ctr & CTR_EL0_IminLine_MASK) + ctr--; + + vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CTR_EL0), ctr); + test_reg_vals[encoding_to_range_idx(SYS_CTR_EL0)] = ctr; +} + +static void test_vcpu_ftr_id_regs(struct kvm_vcpu *vcpu) +{ + u64 val; + + test_clidr(vcpu); + test_ctr(vcpu); + + val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1)); + val++; + vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1), val); + + test_reg_vals[encoding_to_range_idx(SYS_MPIDR_EL1)] = val; + ksft_test_result_pass("%s\n", __func__); +} + +static void test_assert_id_reg_unchanged(struct kvm_vcpu *vcpu, uint32_t encoding) +{ + size_t idx = encoding_to_range_idx(encoding); + uint64_t observed; + + observed = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(encoding)); + TEST_ASSERT_EQ(test_reg_vals[idx], observed); +} + +static void test_reset_preserves_id_regs(struct kvm_vcpu *vcpu) +{ + /* + * Calls KVM_ARM_VCPU_INIT behind the scenes, which will do an + * architectural reset of the vCPU. + */ + aarch64_vcpu_setup(vcpu, NULL); + + for (int i = 0; i < ARRAY_SIZE(test_regs); i++) + test_assert_id_reg_unchanged(vcpu, test_regs[i].reg); + + test_assert_id_reg_unchanged(vcpu, SYS_MPIDR_EL1); + test_assert_id_reg_unchanged(vcpu, SYS_CLIDR_EL1); + test_assert_id_reg_unchanged(vcpu, SYS_CTR_EL0); + + ksft_test_result_pass("%s\n", __func__); +} + +int main(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + bool aarch64_only; + uint64_t val, el0; + int test_cnt; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES)); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + /* Check for AARCH64 only system */ + val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1)); + el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), val); + aarch64_only = (el0 == ID_AA64PFR0_EL1_ELx_64BIT_ONLY); + + ksft_print_header(); + + test_cnt = ARRAY_SIZE(ftr_id_aa64dfr0_el1) + ARRAY_SIZE(ftr_id_dfr0_el1) + + ARRAY_SIZE(ftr_id_aa64isar0_el1) + ARRAY_SIZE(ftr_id_aa64isar1_el1) + + ARRAY_SIZE(ftr_id_aa64isar2_el1) + ARRAY_SIZE(ftr_id_aa64pfr0_el1) + + ARRAY_SIZE(ftr_id_aa64pfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr0_el1) + + ARRAY_SIZE(ftr_id_aa64mmfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr2_el1) + + ARRAY_SIZE(ftr_id_aa64zfr0_el1) - ARRAY_SIZE(test_regs) + 2 + + MPAM_IDREG_TEST; + + ksft_set_plan(test_cnt); + + test_vm_ftr_id_regs(vcpu, aarch64_only); + test_vcpu_ftr_id_regs(vcpu); + test_user_set_mpam_reg(vcpu); + + test_guest_reg_read(vcpu); + + test_reset_preserves_id_regs(vcpu); + + kvm_vm_free(vm); + + ksft_finished(); +} diff --git a/tools/testing/selftests/kvm/arm64/smccc_filter.c b/tools/testing/selftests/kvm/arm64/smccc_filter.c new file mode 100644 index 000000000000..2d189f3da228 --- /dev/null +++ b/tools/testing/selftests/kvm/arm64/smccc_filter.c @@ -0,0 +1,268 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * smccc_filter - Tests for the SMCCC filter UAPI. + * + * Copyright (c) 2023 Google LLC + * + * This test includes: + * - Tests that the UAPI constraints are upheld by KVM. For example, userspace + * is prevented from filtering the architecture range of SMCCC calls. + * - Test that the filter actions (DENIED, FWD_TO_USER) work as intended. + */ + +#include +#include +#include + +#include "processor.h" +#include "test_util.h" + +enum smccc_conduit { + HVC_INSN, + SMC_INSN, +}; + +#define for_each_conduit(conduit) \ + for (conduit = HVC_INSN; conduit <= SMC_INSN; conduit++) + +static void guest_main(uint32_t func_id, enum smccc_conduit conduit) +{ + struct arm_smccc_res res; + + if (conduit == SMC_INSN) + smccc_smc(func_id, 0, 0, 0, 0, 0, 0, 0, &res); + else + smccc_hvc(func_id, 0, 0, 0, 0, 0, 0, 0, &res); + + GUEST_SYNC(res.a0); +} + +static int __set_smccc_filter(struct kvm_vm *vm, uint32_t start, uint32_t nr_functions, + enum kvm_smccc_filter_action action) +{ + struct kvm_smccc_filter filter = { + .base = start, + .nr_functions = nr_functions, + .action = action, + }; + + return __kvm_device_attr_set(vm->fd, KVM_ARM_VM_SMCCC_CTRL, + KVM_ARM_VM_SMCCC_FILTER, &filter); +} + +static void set_smccc_filter(struct kvm_vm *vm, uint32_t start, uint32_t nr_functions, + enum kvm_smccc_filter_action action) +{ + int ret = __set_smccc_filter(vm, start, nr_functions, action); + + TEST_ASSERT(!ret, "failed to configure SMCCC filter: %d", ret); +} + +static struct kvm_vm *setup_vm(struct kvm_vcpu **vcpu) +{ + struct kvm_vcpu_init init; + struct kvm_vm *vm; + + vm = vm_create(1); + vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init); + + /* + * Enable in-kernel emulation of PSCI to ensure that calls are denied + * due to the SMCCC filter, not because of KVM. + */ + init.features[0] |= (1 << KVM_ARM_VCPU_PSCI_0_2); + + *vcpu = aarch64_vcpu_add(vm, 0, &init, guest_main); + return vm; +} + +static void test_pad_must_be_zero(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = setup_vm(&vcpu); + struct kvm_smccc_filter filter = { + .base = PSCI_0_2_FN_PSCI_VERSION, + .nr_functions = 1, + .action = KVM_SMCCC_FILTER_DENY, + .pad = { -1 }, + }; + int r; + + r = __kvm_device_attr_set(vm->fd, KVM_ARM_VM_SMCCC_CTRL, + KVM_ARM_VM_SMCCC_FILTER, &filter); + TEST_ASSERT(r < 0 && errno == EINVAL, + "Setting filter with nonzero padding should return EINVAL"); +} + +/* Ensure that userspace cannot filter the Arm Architecture SMCCC range */ +static void test_filter_reserved_range(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = setup_vm(&vcpu); + uint32_t smc64_fn; + int r; + + r = __set_smccc_filter(vm, ARM_SMCCC_ARCH_WORKAROUND_1, + 1, KVM_SMCCC_FILTER_DENY); + TEST_ASSERT(r < 0 && errno == EEXIST, + "Attempt to filter reserved range should return EEXIST"); + + smc64_fn = ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, ARM_SMCCC_SMC_64, + 0, 0); + + r = __set_smccc_filter(vm, smc64_fn, 1, KVM_SMCCC_FILTER_DENY); + TEST_ASSERT(r < 0 && errno == EEXIST, + "Attempt to filter reserved range should return EEXIST"); + + kvm_vm_free(vm); +} + +static void test_invalid_nr_functions(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = setup_vm(&vcpu); + int r; + + r = __set_smccc_filter(vm, PSCI_0_2_FN64_CPU_ON, 0, KVM_SMCCC_FILTER_DENY); + TEST_ASSERT(r < 0 && errno == EINVAL, + "Attempt to filter 0 functions should return EINVAL"); + + kvm_vm_free(vm); +} + +static void test_overflow_nr_functions(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = setup_vm(&vcpu); + int r; + + r = __set_smccc_filter(vm, ~0, ~0, KVM_SMCCC_FILTER_DENY); + TEST_ASSERT(r < 0 && errno == EINVAL, + "Attempt to overflow filter range should return EINVAL"); + + kvm_vm_free(vm); +} + +static void test_reserved_action(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = setup_vm(&vcpu); + int r; + + r = __set_smccc_filter(vm, PSCI_0_2_FN64_CPU_ON, 1, -1); + TEST_ASSERT(r < 0 && errno == EINVAL, + "Attempt to use reserved filter action should return EINVAL"); + + kvm_vm_free(vm); +} + + +/* Test that overlapping configurations of the SMCCC filter are rejected */ +static void test_filter_overlap(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = setup_vm(&vcpu); + int r; + + set_smccc_filter(vm, PSCI_0_2_FN64_CPU_ON, 1, KVM_SMCCC_FILTER_DENY); + + r = __set_smccc_filter(vm, PSCI_0_2_FN64_CPU_ON, 1, KVM_SMCCC_FILTER_DENY); + TEST_ASSERT(r < 0 && errno == EEXIST, + "Attempt to filter already configured range should return EEXIST"); + + kvm_vm_free(vm); +} + +static void expect_call_denied(struct kvm_vcpu *vcpu) +{ + struct ucall uc; + + if (get_ucall(vcpu, &uc) != UCALL_SYNC) + TEST_FAIL("Unexpected ucall: %lu", uc.cmd); + + TEST_ASSERT(uc.args[1] == SMCCC_RET_NOT_SUPPORTED, + "Unexpected SMCCC return code: %lu", uc.args[1]); +} + +/* Denied SMCCC calls have a return code of SMCCC_RET_NOT_SUPPORTED */ +static void test_filter_denied(void) +{ + enum smccc_conduit conduit; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + for_each_conduit(conduit) { + vm = setup_vm(&vcpu); + + set_smccc_filter(vm, PSCI_0_2_FN_PSCI_VERSION, 1, KVM_SMCCC_FILTER_DENY); + vcpu_args_set(vcpu, 2, PSCI_0_2_FN_PSCI_VERSION, conduit); + + vcpu_run(vcpu); + expect_call_denied(vcpu); + + kvm_vm_free(vm); + } +} + +static void expect_call_fwd_to_user(struct kvm_vcpu *vcpu, uint32_t func_id, + enum smccc_conduit conduit) +{ + struct kvm_run *run = vcpu->run; + + TEST_ASSERT(run->exit_reason == KVM_EXIT_HYPERCALL, + "Unexpected exit reason: %u", run->exit_reason); + TEST_ASSERT(run->hypercall.nr == func_id, + "Unexpected SMCCC function: %llu", run->hypercall.nr); + + if (conduit == SMC_INSN) + TEST_ASSERT(run->hypercall.flags & KVM_HYPERCALL_EXIT_SMC, + "KVM_HYPERCALL_EXIT_SMC is not set"); + else + TEST_ASSERT(!(run->hypercall.flags & KVM_HYPERCALL_EXIT_SMC), + "KVM_HYPERCALL_EXIT_SMC is set"); +} + +/* SMCCC calls forwarded to userspace cause KVM_EXIT_HYPERCALL exits */ +static void test_filter_fwd_to_user(void) +{ + enum smccc_conduit conduit; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + for_each_conduit(conduit) { + vm = setup_vm(&vcpu); + + set_smccc_filter(vm, PSCI_0_2_FN_PSCI_VERSION, 1, KVM_SMCCC_FILTER_FWD_TO_USER); + vcpu_args_set(vcpu, 2, PSCI_0_2_FN_PSCI_VERSION, conduit); + + vcpu_run(vcpu); + expect_call_fwd_to_user(vcpu, PSCI_0_2_FN_PSCI_VERSION, conduit); + + kvm_vm_free(vm); + } +} + +static bool kvm_supports_smccc_filter(void) +{ + struct kvm_vm *vm = vm_create_barebones(); + int r; + + r = __kvm_has_device_attr(vm->fd, KVM_ARM_VM_SMCCC_CTRL, KVM_ARM_VM_SMCCC_FILTER); + + kvm_vm_free(vm); + return !r; +} + +int main(void) +{ + TEST_REQUIRE(kvm_supports_smccc_filter()); + + test_pad_must_be_zero(); + test_invalid_nr_functions(); + test_overflow_nr_functions(); + test_reserved_action(); + test_filter_reserved_range(); + test_filter_overlap(); + test_filter_denied(); + test_filter_fwd_to_user(); +} diff --git a/tools/testing/selftests/kvm/arm64/vcpu_width_config.c b/tools/testing/selftests/kvm/arm64/vcpu_width_config.c new file mode 100644 index 000000000000..80b74c6f152b --- /dev/null +++ b/tools/testing/selftests/kvm/arm64/vcpu_width_config.c @@ -0,0 +1,121 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * vcpu_width_config - Test KVM_ARM_VCPU_INIT() with KVM_ARM_VCPU_EL1_32BIT. + * + * Copyright (c) 2022 Google LLC. + * + * This is a test that ensures that non-mixed-width vCPUs (all 64bit vCPUs + * or all 32bit vcPUs) can be configured and mixed-width vCPUs cannot be + * configured. + */ + +#include "kvm_util.h" +#include "processor.h" +#include "test_util.h" + + +/* + * Add a vCPU, run KVM_ARM_VCPU_INIT with @init0, and then + * add another vCPU, and run KVM_ARM_VCPU_INIT with @init1. + */ +static int add_init_2vcpus(struct kvm_vcpu_init *init0, + struct kvm_vcpu_init *init1) +{ + struct kvm_vcpu *vcpu0, *vcpu1; + struct kvm_vm *vm; + int ret; + + vm = vm_create_barebones(); + + vcpu0 = __vm_vcpu_add(vm, 0); + ret = __vcpu_ioctl(vcpu0, KVM_ARM_VCPU_INIT, init0); + if (ret) + goto free_exit; + + vcpu1 = __vm_vcpu_add(vm, 1); + ret = __vcpu_ioctl(vcpu1, KVM_ARM_VCPU_INIT, init1); + +free_exit: + kvm_vm_free(vm); + return ret; +} + +/* + * Add two vCPUs, then run KVM_ARM_VCPU_INIT for one vCPU with @init0, + * and run KVM_ARM_VCPU_INIT for another vCPU with @init1. + */ +static int add_2vcpus_init_2vcpus(struct kvm_vcpu_init *init0, + struct kvm_vcpu_init *init1) +{ + struct kvm_vcpu *vcpu0, *vcpu1; + struct kvm_vm *vm; + int ret; + + vm = vm_create_barebones(); + + vcpu0 = __vm_vcpu_add(vm, 0); + vcpu1 = __vm_vcpu_add(vm, 1); + + ret = __vcpu_ioctl(vcpu0, KVM_ARM_VCPU_INIT, init0); + if (ret) + goto free_exit; + + ret = __vcpu_ioctl(vcpu1, KVM_ARM_VCPU_INIT, init1); + +free_exit: + kvm_vm_free(vm); + return ret; +} + +/* + * Tests that two 64bit vCPUs can be configured, two 32bit vCPUs can be + * configured, and two mixed-width vCPUs cannot be configured. + * Each of those three cases, configure vCPUs in two different orders. + * The one is running KVM_CREATE_VCPU for 2 vCPUs, and then running + * KVM_ARM_VCPU_INIT for them. + * The other is running KVM_CREATE_VCPU and KVM_ARM_VCPU_INIT for a vCPU, + * and then run those commands for another vCPU. + */ +int main(void) +{ + struct kvm_vcpu_init init0, init1; + struct kvm_vm *vm; + int ret; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_EL1_32BIT)); + + /* Get the preferred target type and copy that to init1 for later use */ + vm = vm_create_barebones(); + vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init0); + kvm_vm_free(vm); + init1 = init0; + + /* Test with 64bit vCPUs */ + ret = add_init_2vcpus(&init0, &init0); + TEST_ASSERT(ret == 0, + "Configuring 64bit EL1 vCPUs failed unexpectedly"); + ret = add_2vcpus_init_2vcpus(&init0, &init0); + TEST_ASSERT(ret == 0, + "Configuring 64bit EL1 vCPUs failed unexpectedly"); + + /* Test with 32bit vCPUs */ + init0.features[0] = (1 << KVM_ARM_VCPU_EL1_32BIT); + ret = add_init_2vcpus(&init0, &init0); + TEST_ASSERT(ret == 0, + "Configuring 32bit EL1 vCPUs failed unexpectedly"); + ret = add_2vcpus_init_2vcpus(&init0, &init0); + TEST_ASSERT(ret == 0, + "Configuring 32bit EL1 vCPUs failed unexpectedly"); + + /* Test with mixed-width vCPUs */ + init0.features[0] = 0; + init1.features[0] = (1 << KVM_ARM_VCPU_EL1_32BIT); + ret = add_init_2vcpus(&init0, &init1); + TEST_ASSERT(ret != 0, + "Configuring mixed-width vCPUs worked unexpectedly"); + ret = add_2vcpus_init_2vcpus(&init0, &init1); + TEST_ASSERT(ret != 0, + "Configuring mixed-width vCPUs worked unexpectedly"); + + return 0; +} diff --git a/tools/testing/selftests/kvm/arm64/vgic_init.c b/tools/testing/selftests/kvm/arm64/vgic_init.c new file mode 100644 index 000000000000..b3b5fb0ff0a9 --- /dev/null +++ b/tools/testing/selftests/kvm/arm64/vgic_init.c @@ -0,0 +1,764 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * vgic init sequence tests + * + * Copyright (C) 2020, Red Hat, Inc. + */ +#include +#include +#include +#include + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "vgic.h" + +#define NR_VCPUS 4 + +#define REG_OFFSET(vcpu, offset) (((uint64_t)vcpu << 32) | offset) + +#define GICR_TYPER 0x8 + +#define VGIC_DEV_IS_V2(_d) ((_d) == KVM_DEV_TYPE_ARM_VGIC_V2) +#define VGIC_DEV_IS_V3(_d) ((_d) == KVM_DEV_TYPE_ARM_VGIC_V3) + +struct vm_gic { + struct kvm_vm *vm; + int gic_fd; + uint32_t gic_dev_type; +}; + +static uint64_t max_phys_size; + +/* + * Helpers to access a redistributor register and verify the ioctl() failed or + * succeeded as expected, and provided the correct value on success. + */ +static void v3_redist_reg_get_errno(int gicv3_fd, int vcpu, int offset, + int want, const char *msg) +{ + uint32_t ignored_val; + int ret = __kvm_device_attr_get(gicv3_fd, KVM_DEV_ARM_VGIC_GRP_REDIST_REGS, + REG_OFFSET(vcpu, offset), &ignored_val); + + TEST_ASSERT(ret && errno == want, "%s; want errno = %d", msg, want); +} + +static void v3_redist_reg_get(int gicv3_fd, int vcpu, int offset, uint32_t want, + const char *msg) +{ + uint32_t val; + + kvm_device_attr_get(gicv3_fd, KVM_DEV_ARM_VGIC_GRP_REDIST_REGS, + REG_OFFSET(vcpu, offset), &val); + TEST_ASSERT(val == want, "%s; want '0x%x', got '0x%x'", msg, want, val); +} + +/* dummy guest code */ +static void guest_code(void) +{ + GUEST_SYNC(0); + GUEST_SYNC(1); + GUEST_SYNC(2); + GUEST_DONE(); +} + +/* we don't want to assert on run execution, hence that helper */ +static int run_vcpu(struct kvm_vcpu *vcpu) +{ + return __vcpu_run(vcpu) ? -errno : 0; +} + +static struct vm_gic vm_gic_create_with_vcpus(uint32_t gic_dev_type, + uint32_t nr_vcpus, + struct kvm_vcpu *vcpus[]) +{ + struct vm_gic v; + + v.gic_dev_type = gic_dev_type; + v.vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus); + v.gic_fd = kvm_create_device(v.vm, gic_dev_type); + + return v; +} + +static struct vm_gic vm_gic_create_barebones(uint32_t gic_dev_type) +{ + struct vm_gic v; + + v.gic_dev_type = gic_dev_type; + v.vm = vm_create_barebones(); + v.gic_fd = kvm_create_device(v.vm, gic_dev_type); + + return v; +} + + +static void vm_gic_destroy(struct vm_gic *v) +{ + close(v->gic_fd); + kvm_vm_free(v->vm); +} + +struct vgic_region_attr { + uint64_t attr; + uint64_t size; + uint64_t alignment; +}; + +struct vgic_region_attr gic_v3_dist_region = { + .attr = KVM_VGIC_V3_ADDR_TYPE_DIST, + .size = 0x10000, + .alignment = 0x10000, +}; + +struct vgic_region_attr gic_v3_redist_region = { + .attr = KVM_VGIC_V3_ADDR_TYPE_REDIST, + .size = NR_VCPUS * 0x20000, + .alignment = 0x10000, +}; + +struct vgic_region_attr gic_v2_dist_region = { + .attr = KVM_VGIC_V2_ADDR_TYPE_DIST, + .size = 0x1000, + .alignment = 0x1000, +}; + +struct vgic_region_attr gic_v2_cpu_region = { + .attr = KVM_VGIC_V2_ADDR_TYPE_CPU, + .size = 0x2000, + .alignment = 0x1000, +}; + +/** + * Helper routine that performs KVM device tests in general. Eventually the + * ARM_VGIC (GICv2 or GICv3) device gets created with an overlapping + * DIST/REDIST (or DIST/CPUIF for GICv2). Assumption is 4 vcpus are going to be + * used hence the overlap. In the case of GICv3, A RDIST region is set at @0x0 + * and a DIST region is set @0x70000. The GICv2 case sets a CPUIF @0x0 and a + * DIST region @0x1000. + */ +static void subtest_dist_rdist(struct vm_gic *v) +{ + int ret; + uint64_t addr; + struct vgic_region_attr rdist; /* CPU interface in GICv2*/ + struct vgic_region_attr dist; + + rdist = VGIC_DEV_IS_V3(v->gic_dev_type) ? gic_v3_redist_region + : gic_v2_cpu_region; + dist = VGIC_DEV_IS_V3(v->gic_dev_type) ? gic_v3_dist_region + : gic_v2_dist_region; + + /* Check existing group/attributes */ + kvm_has_device_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, dist.attr); + + kvm_has_device_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, rdist.attr); + + /* check non existing attribute */ + ret = __kvm_has_device_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, -1); + TEST_ASSERT(ret && errno == ENXIO, "attribute not supported"); + + /* misaligned DIST and REDIST address settings */ + addr = dist.alignment / 0x10; + ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + dist.attr, &addr); + TEST_ASSERT(ret && errno == EINVAL, "GIC dist base not aligned"); + + addr = rdist.alignment / 0x10; + ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + rdist.attr, &addr); + TEST_ASSERT(ret && errno == EINVAL, "GIC redist/cpu base not aligned"); + + /* out of range address */ + addr = max_phys_size; + ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + dist.attr, &addr); + TEST_ASSERT(ret && errno == E2BIG, "dist address beyond IPA limit"); + + ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + rdist.attr, &addr); + TEST_ASSERT(ret && errno == E2BIG, "redist address beyond IPA limit"); + + /* Space for half a rdist (a rdist is: 2 * rdist.alignment). */ + addr = max_phys_size - dist.alignment; + ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + rdist.attr, &addr); + TEST_ASSERT(ret && errno == E2BIG, + "half of the redist is beyond IPA limit"); + + /* set REDIST base address @0x0*/ + addr = 0x00000; + kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + rdist.attr, &addr); + + /* Attempt to create a second legacy redistributor region */ + addr = 0xE0000; + ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + rdist.attr, &addr); + TEST_ASSERT(ret && errno == EEXIST, "GIC redist base set again"); + + ret = __kvm_has_device_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST); + if (!ret) { + /* Attempt to mix legacy and new redistributor regions */ + addr = REDIST_REGION_ATTR_ADDR(NR_VCPUS, 0x100000, 0, 0); + ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); + TEST_ASSERT(ret && errno == EINVAL, + "attempt to mix GICv3 REDIST and REDIST_REGION"); + } + + /* + * Set overlapping DIST / REDIST, cannot be detected here. Will be detected + * on first vcpu run instead. + */ + addr = rdist.size - rdist.alignment; + kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + dist.attr, &addr); +} + +/* Test the new REDIST region API */ +static void subtest_v3_redist_regions(struct vm_gic *v) +{ + uint64_t addr, expected_addr; + int ret; + + ret = __kvm_has_device_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST); + TEST_ASSERT(!ret, "Multiple redist regions advertised"); + + addr = REDIST_REGION_ATTR_ADDR(NR_VCPUS, 0x100000, 2, 0); + ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); + TEST_ASSERT(ret && errno == EINVAL, "redist region attr value with flags != 0"); + + addr = REDIST_REGION_ATTR_ADDR(0, 0x100000, 0, 0); + ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); + TEST_ASSERT(ret && errno == EINVAL, "redist region attr value with count== 0"); + + addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 1); + ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); + TEST_ASSERT(ret && errno == EINVAL, + "attempt to register the first rdist region with index != 0"); + + addr = REDIST_REGION_ATTR_ADDR(2, 0x201000, 0, 1); + ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); + TEST_ASSERT(ret && errno == EINVAL, "rdist region with misaligned address"); + + addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 0); + kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); + + addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 1); + ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); + TEST_ASSERT(ret && errno == EINVAL, "register an rdist region with already used index"); + + addr = REDIST_REGION_ATTR_ADDR(1, 0x210000, 0, 2); + ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); + TEST_ASSERT(ret && errno == EINVAL, + "register an rdist region overlapping with another one"); + + addr = REDIST_REGION_ATTR_ADDR(1, 0x240000, 0, 2); + ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); + TEST_ASSERT(ret && errno == EINVAL, "register redist region with index not +1"); + + addr = REDIST_REGION_ATTR_ADDR(1, 0x240000, 0, 1); + kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); + + addr = REDIST_REGION_ATTR_ADDR(1, max_phys_size, 0, 2); + ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); + TEST_ASSERT(ret && errno == E2BIG, + "register redist region with base address beyond IPA range"); + + /* The last redist is above the pa range. */ + addr = REDIST_REGION_ATTR_ADDR(2, max_phys_size - 0x30000, 0, 2); + ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); + TEST_ASSERT(ret && errno == E2BIG, + "register redist region with top address beyond IPA range"); + + addr = 0x260000; + ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr); + TEST_ASSERT(ret && errno == EINVAL, + "Mix KVM_VGIC_V3_ADDR_TYPE_REDIST and REDIST_REGION"); + + /* + * Now there are 2 redist regions: + * region 0 @ 0x200000 2 redists + * region 1 @ 0x240000 1 redist + * Attempt to read their characteristics + */ + + addr = REDIST_REGION_ATTR_ADDR(0, 0, 0, 0); + expected_addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 0); + ret = __kvm_device_attr_get(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); + TEST_ASSERT(!ret && addr == expected_addr, "read characteristics of region #0"); + + addr = REDIST_REGION_ATTR_ADDR(0, 0, 0, 1); + expected_addr = REDIST_REGION_ATTR_ADDR(1, 0x240000, 0, 1); + ret = __kvm_device_attr_get(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); + TEST_ASSERT(!ret && addr == expected_addr, "read characteristics of region #1"); + + addr = REDIST_REGION_ATTR_ADDR(0, 0, 0, 2); + ret = __kvm_device_attr_get(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); + TEST_ASSERT(ret && errno == ENOENT, "read characteristics of non existing region"); + + addr = 0x260000; + kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_DIST, &addr); + + addr = REDIST_REGION_ATTR_ADDR(1, 0x260000, 0, 2); + ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); + TEST_ASSERT(ret && errno == EINVAL, "register redist region colliding with dist"); +} + +/* + * VGIC KVM device is created and initialized before the secondary CPUs + * get created + */ +static void test_vgic_then_vcpus(uint32_t gic_dev_type) +{ + struct kvm_vcpu *vcpus[NR_VCPUS]; + struct vm_gic v; + int ret, i; + + v = vm_gic_create_with_vcpus(gic_dev_type, 1, vcpus); + + subtest_dist_rdist(&v); + + /* Add the rest of the VCPUs */ + for (i = 1; i < NR_VCPUS; ++i) + vcpus[i] = vm_vcpu_add(v.vm, i, guest_code); + + ret = run_vcpu(vcpus[3]); + TEST_ASSERT(ret == -EINVAL, "dist/rdist overlap detected on 1st vcpu run"); + + vm_gic_destroy(&v); +} + +/* All the VCPUs are created before the VGIC KVM device gets initialized */ +static void test_vcpus_then_vgic(uint32_t gic_dev_type) +{ + struct kvm_vcpu *vcpus[NR_VCPUS]; + struct vm_gic v; + int ret; + + v = vm_gic_create_with_vcpus(gic_dev_type, NR_VCPUS, vcpus); + + subtest_dist_rdist(&v); + + ret = run_vcpu(vcpus[3]); + TEST_ASSERT(ret == -EINVAL, "dist/rdist overlap detected on 1st vcpu run"); + + vm_gic_destroy(&v); +} + +#define KVM_VGIC_V2_ATTR(offset, cpu) \ + (FIELD_PREP(KVM_DEV_ARM_VGIC_OFFSET_MASK, offset) | \ + FIELD_PREP(KVM_DEV_ARM_VGIC_CPUID_MASK, cpu)) + +#define GIC_CPU_CTRL 0x00 + +static void test_v2_uaccess_cpuif_no_vcpus(void) +{ + struct vm_gic v; + u64 val = 0; + int ret; + + v = vm_gic_create_barebones(KVM_DEV_TYPE_ARM_VGIC_V2); + subtest_dist_rdist(&v); + + ret = __kvm_has_device_attr(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CPU_REGS, + KVM_VGIC_V2_ATTR(GIC_CPU_CTRL, 0)); + TEST_ASSERT(ret && errno == EINVAL, + "accessed non-existent CPU interface, want errno: %i", + EINVAL); + ret = __kvm_device_attr_get(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CPU_REGS, + KVM_VGIC_V2_ATTR(GIC_CPU_CTRL, 0), &val); + TEST_ASSERT(ret && errno == EINVAL, + "accessed non-existent CPU interface, want errno: %i", + EINVAL); + ret = __kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CPU_REGS, + KVM_VGIC_V2_ATTR(GIC_CPU_CTRL, 0), &val); + TEST_ASSERT(ret && errno == EINVAL, + "accessed non-existent CPU interface, want errno: %i", + EINVAL); + + vm_gic_destroy(&v); +} + +static void test_v3_new_redist_regions(void) +{ + struct kvm_vcpu *vcpus[NR_VCPUS]; + void *dummy = NULL; + struct vm_gic v; + uint64_t addr; + int ret; + + v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus); + subtest_v3_redist_regions(&v); + kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, + KVM_DEV_ARM_VGIC_CTRL_INIT, NULL); + + ret = run_vcpu(vcpus[3]); + TEST_ASSERT(ret == -ENXIO, "running without sufficient number of rdists"); + vm_gic_destroy(&v); + + /* step2 */ + + v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus); + subtest_v3_redist_regions(&v); + + addr = REDIST_REGION_ATTR_ADDR(1, 0x280000, 0, 2); + kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); + + ret = run_vcpu(vcpus[3]); + TEST_ASSERT(ret == -EBUSY, "running without vgic explicit init"); + + vm_gic_destroy(&v); + + /* step 3 */ + + v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus); + subtest_v3_redist_regions(&v); + + ret = __kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, dummy); + TEST_ASSERT(ret && errno == EFAULT, + "register a third region allowing to cover the 4 vcpus"); + + addr = REDIST_REGION_ATTR_ADDR(1, 0x280000, 0, 2); + kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); + + kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, + KVM_DEV_ARM_VGIC_CTRL_INIT, NULL); + + ret = run_vcpu(vcpus[3]); + TEST_ASSERT(!ret, "vcpu run"); + + vm_gic_destroy(&v); +} + +static void test_v3_typer_accesses(void) +{ + struct vm_gic v; + uint64_t addr; + int ret, i; + + v.vm = vm_create(NR_VCPUS); + (void)vm_vcpu_add(v.vm, 0, guest_code); + + v.gic_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3); + + (void)vm_vcpu_add(v.vm, 3, guest_code); + + v3_redist_reg_get_errno(v.gic_fd, 1, GICR_TYPER, EINVAL, + "attempting to read GICR_TYPER of non created vcpu"); + + (void)vm_vcpu_add(v.vm, 1, guest_code); + + v3_redist_reg_get_errno(v.gic_fd, 1, GICR_TYPER, EBUSY, + "read GICR_TYPER before GIC initialized"); + + (void)vm_vcpu_add(v.vm, 2, guest_code); + + kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, + KVM_DEV_ARM_VGIC_CTRL_INIT, NULL); + + for (i = 0; i < NR_VCPUS ; i++) { + v3_redist_reg_get(v.gic_fd, i, GICR_TYPER, i * 0x100, + "read GICR_TYPER before rdist region setting"); + } + + addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 0); + kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); + + /* The 2 first rdists should be put there (vcpu 0 and 3) */ + v3_redist_reg_get(v.gic_fd, 0, GICR_TYPER, 0x0, "read typer of rdist #0"); + v3_redist_reg_get(v.gic_fd, 3, GICR_TYPER, 0x310, "read typer of rdist #1"); + + addr = REDIST_REGION_ATTR_ADDR(10, 0x100000, 0, 1); + ret = __kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); + TEST_ASSERT(ret && errno == EINVAL, "collision with previous rdist region"); + + v3_redist_reg_get(v.gic_fd, 1, GICR_TYPER, 0x100, + "no redist region attached to vcpu #1 yet, last cannot be returned"); + v3_redist_reg_get(v.gic_fd, 2, GICR_TYPER, 0x200, + "no redist region attached to vcpu #2, last cannot be returned"); + + addr = REDIST_REGION_ATTR_ADDR(10, 0x20000, 0, 1); + kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); + + v3_redist_reg_get(v.gic_fd, 1, GICR_TYPER, 0x100, "read typer of rdist #1"); + v3_redist_reg_get(v.gic_fd, 2, GICR_TYPER, 0x210, + "read typer of rdist #1, last properly returned"); + + vm_gic_destroy(&v); +} + +static struct vm_gic vm_gic_v3_create_with_vcpuids(int nr_vcpus, + uint32_t vcpuids[]) +{ + struct vm_gic v; + int i; + + v.vm = vm_create(nr_vcpus); + for (i = 0; i < nr_vcpus; i++) + vm_vcpu_add(v.vm, vcpuids[i], guest_code); + + v.gic_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3); + + return v; +} + +/** + * Test GICR_TYPER last bit with new redist regions + * rdist regions #1 and #2 are contiguous + * rdist region #0 @0x100000 2 rdist capacity + * rdists: 0, 3 (Last) + * rdist region #1 @0x240000 2 rdist capacity + * rdists: 5, 4 (Last) + * rdist region #2 @0x200000 2 rdist capacity + * rdists: 1, 2 + */ +static void test_v3_last_bit_redist_regions(void) +{ + uint32_t vcpuids[] = { 0, 3, 5, 4, 1, 2 }; + struct vm_gic v; + uint64_t addr; + + v = vm_gic_v3_create_with_vcpuids(ARRAY_SIZE(vcpuids), vcpuids); + + kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, + KVM_DEV_ARM_VGIC_CTRL_INIT, NULL); + + addr = REDIST_REGION_ATTR_ADDR(2, 0x100000, 0, 0); + kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); + + addr = REDIST_REGION_ATTR_ADDR(2, 0x240000, 0, 1); + kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); + + addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 2); + kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); + + v3_redist_reg_get(v.gic_fd, 0, GICR_TYPER, 0x000, "read typer of rdist #0"); + v3_redist_reg_get(v.gic_fd, 1, GICR_TYPER, 0x100, "read typer of rdist #1"); + v3_redist_reg_get(v.gic_fd, 2, GICR_TYPER, 0x200, "read typer of rdist #2"); + v3_redist_reg_get(v.gic_fd, 3, GICR_TYPER, 0x310, "read typer of rdist #3"); + v3_redist_reg_get(v.gic_fd, 5, GICR_TYPER, 0x500, "read typer of rdist #5"); + v3_redist_reg_get(v.gic_fd, 4, GICR_TYPER, 0x410, "read typer of rdist #4"); + + vm_gic_destroy(&v); +} + +/* Test last bit with legacy region */ +static void test_v3_last_bit_single_rdist(void) +{ + uint32_t vcpuids[] = { 0, 3, 5, 4, 1, 2 }; + struct vm_gic v; + uint64_t addr; + + v = vm_gic_v3_create_with_vcpuids(ARRAY_SIZE(vcpuids), vcpuids); + + kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, + KVM_DEV_ARM_VGIC_CTRL_INIT, NULL); + + addr = 0x10000; + kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr); + + v3_redist_reg_get(v.gic_fd, 0, GICR_TYPER, 0x000, "read typer of rdist #0"); + v3_redist_reg_get(v.gic_fd, 3, GICR_TYPER, 0x300, "read typer of rdist #1"); + v3_redist_reg_get(v.gic_fd, 5, GICR_TYPER, 0x500, "read typer of rdist #2"); + v3_redist_reg_get(v.gic_fd, 1, GICR_TYPER, 0x100, "read typer of rdist #3"); + v3_redist_reg_get(v.gic_fd, 2, GICR_TYPER, 0x210, "read typer of rdist #3"); + + vm_gic_destroy(&v); +} + +/* Uses the legacy REDIST region API. */ +static void test_v3_redist_ipa_range_check_at_vcpu_run(void) +{ + struct kvm_vcpu *vcpus[NR_VCPUS]; + struct vm_gic v; + int ret, i; + uint64_t addr; + + v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, 1, vcpus); + + /* Set space for 3 redists, we have 1 vcpu, so this succeeds. */ + addr = max_phys_size - (3 * 2 * 0x10000); + kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr); + + addr = 0x00000; + kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_DIST, &addr); + + /* Add the rest of the VCPUs */ + for (i = 1; i < NR_VCPUS; ++i) + vcpus[i] = vm_vcpu_add(v.vm, i, guest_code); + + kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, + KVM_DEV_ARM_VGIC_CTRL_INIT, NULL); + + /* Attempt to run a vcpu without enough redist space. */ + ret = run_vcpu(vcpus[2]); + TEST_ASSERT(ret && errno == EINVAL, + "redist base+size above PA range detected on 1st vcpu run"); + + vm_gic_destroy(&v); +} + +static void test_v3_its_region(void) +{ + struct kvm_vcpu *vcpus[NR_VCPUS]; + struct vm_gic v; + uint64_t addr; + int its_fd, ret; + + v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus); + its_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_ITS); + + addr = 0x401000; + ret = __kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_ITS_ADDR_TYPE, &addr); + TEST_ASSERT(ret && errno == EINVAL, + "ITS region with misaligned address"); + + addr = max_phys_size; + ret = __kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_ITS_ADDR_TYPE, &addr); + TEST_ASSERT(ret && errno == E2BIG, + "register ITS region with base address beyond IPA range"); + + addr = max_phys_size - 0x10000; + ret = __kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_ITS_ADDR_TYPE, &addr); + TEST_ASSERT(ret && errno == E2BIG, + "Half of ITS region is beyond IPA range"); + + /* This one succeeds setting the ITS base */ + addr = 0x400000; + kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_ITS_ADDR_TYPE, &addr); + + addr = 0x300000; + ret = __kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_ITS_ADDR_TYPE, &addr); + TEST_ASSERT(ret && errno == EEXIST, "ITS base set again"); + + close(its_fd); + vm_gic_destroy(&v); +} + +/* + * Returns 0 if it's possible to create GIC device of a given type (V2 or V3). + */ +int test_kvm_device(uint32_t gic_dev_type) +{ + struct kvm_vcpu *vcpus[NR_VCPUS]; + struct vm_gic v; + uint32_t other; + int ret; + + v.vm = vm_create_with_vcpus(NR_VCPUS, guest_code, vcpus); + + /* try to create a non existing KVM device */ + ret = __kvm_test_create_device(v.vm, 0); + TEST_ASSERT(ret && errno == ENODEV, "unsupported device"); + + /* trial mode */ + ret = __kvm_test_create_device(v.vm, gic_dev_type); + if (ret) + return ret; + v.gic_fd = kvm_create_device(v.vm, gic_dev_type); + + ret = __kvm_create_device(v.vm, gic_dev_type); + TEST_ASSERT(ret < 0 && errno == EEXIST, "create GIC device twice"); + + /* try to create the other gic_dev_type */ + other = VGIC_DEV_IS_V2(gic_dev_type) ? KVM_DEV_TYPE_ARM_VGIC_V3 + : KVM_DEV_TYPE_ARM_VGIC_V2; + + if (!__kvm_test_create_device(v.vm, other)) { + ret = __kvm_create_device(v.vm, other); + TEST_ASSERT(ret < 0 && (errno == EINVAL || errno == EEXIST), + "create GIC device while other version exists"); + } + + vm_gic_destroy(&v); + + return 0; +} + +void run_tests(uint32_t gic_dev_type) +{ + test_vcpus_then_vgic(gic_dev_type); + test_vgic_then_vcpus(gic_dev_type); + + if (VGIC_DEV_IS_V2(gic_dev_type)) + test_v2_uaccess_cpuif_no_vcpus(); + + if (VGIC_DEV_IS_V3(gic_dev_type)) { + test_v3_new_redist_regions(); + test_v3_typer_accesses(); + test_v3_last_bit_redist_regions(); + test_v3_last_bit_single_rdist(); + test_v3_redist_ipa_range_check_at_vcpu_run(); + test_v3_its_region(); + } +} + +int main(int ac, char **av) +{ + int ret; + int pa_bits; + int cnt_impl = 0; + + pa_bits = vm_guest_mode_params[VM_MODE_DEFAULT].pa_bits; + max_phys_size = 1ULL << pa_bits; + + ret = test_kvm_device(KVM_DEV_TYPE_ARM_VGIC_V3); + if (!ret) { + pr_info("Running GIC_v3 tests.\n"); + run_tests(KVM_DEV_TYPE_ARM_VGIC_V3); + cnt_impl++; + } + + ret = test_kvm_device(KVM_DEV_TYPE_ARM_VGIC_V2); + if (!ret) { + pr_info("Running GIC_v2 tests.\n"); + run_tests(KVM_DEV_TYPE_ARM_VGIC_V2); + cnt_impl++; + } + + if (!cnt_impl) { + print_skip("No GICv2 nor GICv3 support"); + exit(KSFT_SKIP); + } + return 0; +} diff --git a/tools/testing/selftests/kvm/arm64/vgic_irq.c b/tools/testing/selftests/kvm/arm64/vgic_irq.c new file mode 100644 index 000000000000..f4ac28d53747 --- /dev/null +++ b/tools/testing/selftests/kvm/arm64/vgic_irq.c @@ -0,0 +1,847 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * vgic_irq.c - Test userspace injection of IRQs + * + * This test validates the injection of IRQs from userspace using various + * methods (e.g., KVM_IRQ_LINE) and modes (e.g., EOI). The guest "asks" the + * host to inject a specific intid via a GUEST_SYNC call, and then checks that + * it received it. + */ +#include +#include +#include +#include + +#include "processor.h" +#include "test_util.h" +#include "kvm_util.h" +#include "gic.h" +#include "gic_v3.h" +#include "vgic.h" + +/* + * Stores the user specified args; it's passed to the guest and to every test + * function. + */ +struct test_args { + uint32_t nr_irqs; /* number of KVM supported IRQs. */ + bool eoi_split; /* 1 is eoir+dir, 0 is eoir only */ + bool level_sensitive; /* 1 is level, 0 is edge */ + int kvm_max_routes; /* output of KVM_CAP_IRQ_ROUTING */ + bool kvm_supports_irqfd; /* output of KVM_CAP_IRQFD */ +}; + +/* + * KVM implements 32 priority levels: + * 0x00 (highest priority) - 0xF8 (lowest priority), in steps of 8 + * + * Note that these macros will still be correct in the case that KVM implements + * more priority levels. Also note that 32 is the minimum for GICv3 and GICv2. + */ +#define KVM_NUM_PRIOS 32 +#define KVM_PRIO_SHIFT 3 /* steps of 8 = 1 << 3 */ +#define KVM_PRIO_STEPS (1 << KVM_PRIO_SHIFT) /* 8 */ +#define LOWEST_PRIO (KVM_NUM_PRIOS - 1) +#define CPU_PRIO_MASK (LOWEST_PRIO << KVM_PRIO_SHIFT) /* 0xf8 */ +#define IRQ_DEFAULT_PRIO (LOWEST_PRIO - 1) +#define IRQ_DEFAULT_PRIO_REG (IRQ_DEFAULT_PRIO << KVM_PRIO_SHIFT) /* 0xf0 */ + +/* + * The kvm_inject_* utilities are used by the guest to ask the host to inject + * interrupts (e.g., using the KVM_IRQ_LINE ioctl). + */ + +typedef enum { + KVM_INJECT_EDGE_IRQ_LINE = 1, + KVM_SET_IRQ_LINE, + KVM_SET_IRQ_LINE_HIGH, + KVM_SET_LEVEL_INFO_HIGH, + KVM_INJECT_IRQFD, + KVM_WRITE_ISPENDR, + KVM_WRITE_ISACTIVER, +} kvm_inject_cmd; + +struct kvm_inject_args { + kvm_inject_cmd cmd; + uint32_t first_intid; + uint32_t num; + int level; + bool expect_failure; +}; + +/* Used on the guest side to perform the hypercall. */ +static void kvm_inject_call(kvm_inject_cmd cmd, uint32_t first_intid, + uint32_t num, int level, bool expect_failure); + +/* Used on the host side to get the hypercall info. */ +static void kvm_inject_get_call(struct kvm_vm *vm, struct ucall *uc, + struct kvm_inject_args *args); + +#define _KVM_INJECT_MULTI(cmd, intid, num, expect_failure) \ + kvm_inject_call(cmd, intid, num, -1 /* not used */, expect_failure) + +#define KVM_INJECT_MULTI(cmd, intid, num) \ + _KVM_INJECT_MULTI(cmd, intid, num, false) + +#define _KVM_INJECT(cmd, intid, expect_failure) \ + _KVM_INJECT_MULTI(cmd, intid, 1, expect_failure) + +#define KVM_INJECT(cmd, intid) \ + _KVM_INJECT_MULTI(cmd, intid, 1, false) + +#define KVM_ACTIVATE(cmd, intid) \ + kvm_inject_call(cmd, intid, 1, 1, false); + +struct kvm_inject_desc { + kvm_inject_cmd cmd; + /* can inject PPIs, PPIs, and/or SPIs. */ + bool sgi, ppi, spi; +}; + +static struct kvm_inject_desc inject_edge_fns[] = { + /* sgi ppi spi */ + { KVM_INJECT_EDGE_IRQ_LINE, false, false, true }, + { KVM_INJECT_IRQFD, false, false, true }, + { KVM_WRITE_ISPENDR, true, false, true }, + { 0, }, +}; + +static struct kvm_inject_desc inject_level_fns[] = { + /* sgi ppi spi */ + { KVM_SET_IRQ_LINE_HIGH, false, true, true }, + { KVM_SET_LEVEL_INFO_HIGH, false, true, true }, + { KVM_INJECT_IRQFD, false, false, true }, + { KVM_WRITE_ISPENDR, false, true, true }, + { 0, }, +}; + +static struct kvm_inject_desc set_active_fns[] = { + /* sgi ppi spi */ + { KVM_WRITE_ISACTIVER, true, true, true }, + { 0, }, +}; + +#define for_each_inject_fn(t, f) \ + for ((f) = (t); (f)->cmd; (f)++) + +#define for_each_supported_inject_fn(args, t, f) \ + for_each_inject_fn(t, f) \ + if ((args)->kvm_supports_irqfd || (f)->cmd != KVM_INJECT_IRQFD) + +#define for_each_supported_activate_fn(args, t, f) \ + for_each_supported_inject_fn((args), (t), (f)) + +/* Shared between the guest main thread and the IRQ handlers. */ +volatile uint64_t irq_handled; +volatile uint32_t irqnr_received[MAX_SPI + 1]; + +static void reset_stats(void) +{ + int i; + + irq_handled = 0; + for (i = 0; i <= MAX_SPI; i++) + irqnr_received[i] = 0; +} + +static uint64_t gic_read_ap1r0(void) +{ + uint64_t reg = read_sysreg_s(SYS_ICC_AP1R0_EL1); + + dsb(sy); + return reg; +} + +static void gic_write_ap1r0(uint64_t val) +{ + write_sysreg_s(val, SYS_ICC_AP1R0_EL1); + isb(); +} + +static void guest_set_irq_line(uint32_t intid, uint32_t level); + +static void guest_irq_generic_handler(bool eoi_split, bool level_sensitive) +{ + uint32_t intid = gic_get_and_ack_irq(); + + if (intid == IAR_SPURIOUS) + return; + + GUEST_ASSERT(gic_irq_get_active(intid)); + + if (!level_sensitive) + GUEST_ASSERT(!gic_irq_get_pending(intid)); + + if (level_sensitive) + guest_set_irq_line(intid, 0); + + GUEST_ASSERT(intid < MAX_SPI); + irqnr_received[intid] += 1; + irq_handled += 1; + + gic_set_eoi(intid); + GUEST_ASSERT_EQ(gic_read_ap1r0(), 0); + if (eoi_split) + gic_set_dir(intid); + + GUEST_ASSERT(!gic_irq_get_active(intid)); + GUEST_ASSERT(!gic_irq_get_pending(intid)); +} + +static void kvm_inject_call(kvm_inject_cmd cmd, uint32_t first_intid, + uint32_t num, int level, bool expect_failure) +{ + struct kvm_inject_args args = { + .cmd = cmd, + .first_intid = first_intid, + .num = num, + .level = level, + .expect_failure = expect_failure, + }; + GUEST_SYNC(&args); +} + +#define GUEST_ASSERT_IAR_EMPTY() \ +do { \ + uint32_t _intid; \ + _intid = gic_get_and_ack_irq(); \ + GUEST_ASSERT(_intid == 0 || _intid == IAR_SPURIOUS); \ +} while (0) + +#define CAT_HELPER(a, b) a ## b +#define CAT(a, b) CAT_HELPER(a, b) +#define PREFIX guest_irq_handler_ +#define GUEST_IRQ_HANDLER_NAME(split, lev) CAT(PREFIX, CAT(split, lev)) +#define GENERATE_GUEST_IRQ_HANDLER(split, lev) \ +static void CAT(PREFIX, CAT(split, lev))(struct ex_regs *regs) \ +{ \ + guest_irq_generic_handler(split, lev); \ +} + +GENERATE_GUEST_IRQ_HANDLER(0, 0); +GENERATE_GUEST_IRQ_HANDLER(0, 1); +GENERATE_GUEST_IRQ_HANDLER(1, 0); +GENERATE_GUEST_IRQ_HANDLER(1, 1); + +static void (*guest_irq_handlers[2][2])(struct ex_regs *) = { + {GUEST_IRQ_HANDLER_NAME(0, 0), GUEST_IRQ_HANDLER_NAME(0, 1),}, + {GUEST_IRQ_HANDLER_NAME(1, 0), GUEST_IRQ_HANDLER_NAME(1, 1),}, +}; + +static void reset_priorities(struct test_args *args) +{ + int i; + + for (i = 0; i < args->nr_irqs; i++) + gic_set_priority(i, IRQ_DEFAULT_PRIO_REG); +} + +static void guest_set_irq_line(uint32_t intid, uint32_t level) +{ + kvm_inject_call(KVM_SET_IRQ_LINE, intid, 1, level, false); +} + +static void test_inject_fail(struct test_args *args, + uint32_t intid, kvm_inject_cmd cmd) +{ + reset_stats(); + + _KVM_INJECT(cmd, intid, true); + /* no IRQ to handle on entry */ + + GUEST_ASSERT_EQ(irq_handled, 0); + GUEST_ASSERT_IAR_EMPTY(); +} + +static void guest_inject(struct test_args *args, + uint32_t first_intid, uint32_t num, + kvm_inject_cmd cmd) +{ + uint32_t i; + + reset_stats(); + + /* Cycle over all priorities to make things more interesting. */ + for (i = first_intid; i < num + first_intid; i++) + gic_set_priority(i, (i % (KVM_NUM_PRIOS - 1)) << 3); + + asm volatile("msr daifset, #2" : : : "memory"); + KVM_INJECT_MULTI(cmd, first_intid, num); + + while (irq_handled < num) { + wfi(); + local_irq_enable(); + isb(); /* handle IRQ */ + local_irq_disable(); + } + local_irq_enable(); + + GUEST_ASSERT_EQ(irq_handled, num); + for (i = first_intid; i < num + first_intid; i++) + GUEST_ASSERT_EQ(irqnr_received[i], 1); + GUEST_ASSERT_IAR_EMPTY(); + + reset_priorities(args); +} + +/* + * Restore the active state of multiple concurrent IRQs (given by + * concurrent_irqs). This does what a live-migration would do on the + * destination side assuming there are some active IRQs that were not + * deactivated yet. + */ +static void guest_restore_active(struct test_args *args, + uint32_t first_intid, uint32_t num, + kvm_inject_cmd cmd) +{ + uint32_t prio, intid, ap1r; + int i; + + /* + * Set the priorities of the first (KVM_NUM_PRIOS - 1) IRQs + * in descending order, so intid+1 can preempt intid. + */ + for (i = 0, prio = (num - 1) * 8; i < num; i++, prio -= 8) { + GUEST_ASSERT(prio >= 0); + intid = i + first_intid; + gic_set_priority(intid, prio); + } + + /* + * In a real migration, KVM would restore all GIC state before running + * guest code. + */ + for (i = 0; i < num; i++) { + intid = i + first_intid; + KVM_ACTIVATE(cmd, intid); + ap1r = gic_read_ap1r0(); + ap1r |= 1U << i; + gic_write_ap1r0(ap1r); + } + + /* This is where the "migration" would occur. */ + + /* finish handling the IRQs starting with the highest priority one. */ + for (i = 0; i < num; i++) { + intid = num - i - 1 + first_intid; + gic_set_eoi(intid); + if (args->eoi_split) + gic_set_dir(intid); + } + + for (i = 0; i < num; i++) + GUEST_ASSERT(!gic_irq_get_active(i + first_intid)); + GUEST_ASSERT_EQ(gic_read_ap1r0(), 0); + GUEST_ASSERT_IAR_EMPTY(); +} + +/* + * Polls the IAR until it's not a spurious interrupt. + * + * This function should only be used in test_inject_preemption (with IRQs + * masked). + */ +static uint32_t wait_for_and_activate_irq(void) +{ + uint32_t intid; + + do { + asm volatile("wfi" : : : "memory"); + intid = gic_get_and_ack_irq(); + } while (intid == IAR_SPURIOUS); + + return intid; +} + +/* + * Inject multiple concurrent IRQs (num IRQs starting at first_intid) and + * handle them without handling the actual exceptions. This is done by masking + * interrupts for the whole test. + */ +static void test_inject_preemption(struct test_args *args, + uint32_t first_intid, int num, + kvm_inject_cmd cmd) +{ + uint32_t intid, prio, step = KVM_PRIO_STEPS; + int i; + + /* Set the priorities of the first (KVM_NUM_PRIOS - 1) IRQs + * in descending order, so intid+1 can preempt intid. + */ + for (i = 0, prio = (num - 1) * step; i < num; i++, prio -= step) { + GUEST_ASSERT(prio >= 0); + intid = i + first_intid; + gic_set_priority(intid, prio); + } + + local_irq_disable(); + + for (i = 0; i < num; i++) { + uint32_t tmp; + intid = i + first_intid; + KVM_INJECT(cmd, intid); + /* Each successive IRQ will preempt the previous one. */ + tmp = wait_for_and_activate_irq(); + GUEST_ASSERT_EQ(tmp, intid); + if (args->level_sensitive) + guest_set_irq_line(intid, 0); + } + + /* finish handling the IRQs starting with the highest priority one. */ + for (i = 0; i < num; i++) { + intid = num - i - 1 + first_intid; + gic_set_eoi(intid); + if (args->eoi_split) + gic_set_dir(intid); + } + + local_irq_enable(); + + for (i = 0; i < num; i++) + GUEST_ASSERT(!gic_irq_get_active(i + first_intid)); + GUEST_ASSERT_EQ(gic_read_ap1r0(), 0); + GUEST_ASSERT_IAR_EMPTY(); + + reset_priorities(args); +} + +static void test_injection(struct test_args *args, struct kvm_inject_desc *f) +{ + uint32_t nr_irqs = args->nr_irqs; + + if (f->sgi) { + guest_inject(args, MIN_SGI, 1, f->cmd); + guest_inject(args, 0, 16, f->cmd); + } + + if (f->ppi) + guest_inject(args, MIN_PPI, 1, f->cmd); + + if (f->spi) { + guest_inject(args, MIN_SPI, 1, f->cmd); + guest_inject(args, nr_irqs - 1, 1, f->cmd); + guest_inject(args, MIN_SPI, nr_irqs - MIN_SPI, f->cmd); + } +} + +static void test_injection_failure(struct test_args *args, + struct kvm_inject_desc *f) +{ + uint32_t bad_intid[] = { args->nr_irqs, 1020, 1024, 1120, 5120, ~0U, }; + int i; + + for (i = 0; i < ARRAY_SIZE(bad_intid); i++) + test_inject_fail(args, bad_intid[i], f->cmd); +} + +static void test_preemption(struct test_args *args, struct kvm_inject_desc *f) +{ + /* + * Test up to 4 levels of preemption. The reason is that KVM doesn't + * currently implement the ability to have more than the number-of-LRs + * number of concurrently active IRQs. The number of LRs implemented is + * IMPLEMENTATION DEFINED, however, it seems that most implement 4. + */ + if (f->sgi) + test_inject_preemption(args, MIN_SGI, 4, f->cmd); + + if (f->ppi) + test_inject_preemption(args, MIN_PPI, 4, f->cmd); + + if (f->spi) + test_inject_preemption(args, MIN_SPI, 4, f->cmd); +} + +static void test_restore_active(struct test_args *args, struct kvm_inject_desc *f) +{ + /* Test up to 4 active IRQs. Same reason as in test_preemption. */ + if (f->sgi) + guest_restore_active(args, MIN_SGI, 4, f->cmd); + + if (f->ppi) + guest_restore_active(args, MIN_PPI, 4, f->cmd); + + if (f->spi) + guest_restore_active(args, MIN_SPI, 4, f->cmd); +} + +static void guest_code(struct test_args *args) +{ + uint32_t i, nr_irqs = args->nr_irqs; + bool level_sensitive = args->level_sensitive; + struct kvm_inject_desc *f, *inject_fns; + + gic_init(GIC_V3, 1); + + for (i = 0; i < nr_irqs; i++) + gic_irq_enable(i); + + for (i = MIN_SPI; i < nr_irqs; i++) + gic_irq_set_config(i, !level_sensitive); + + gic_set_eoi_split(args->eoi_split); + + reset_priorities(args); + gic_set_priority_mask(CPU_PRIO_MASK); + + inject_fns = level_sensitive ? inject_level_fns + : inject_edge_fns; + + local_irq_enable(); + + /* Start the tests. */ + for_each_supported_inject_fn(args, inject_fns, f) { + test_injection(args, f); + test_preemption(args, f); + test_injection_failure(args, f); + } + + /* + * Restore the active state of IRQs. This would happen when live + * migrating IRQs in the middle of being handled. + */ + for_each_supported_activate_fn(args, set_active_fns, f) + test_restore_active(args, f); + + GUEST_DONE(); +} + +static void kvm_irq_line_check(struct kvm_vm *vm, uint32_t intid, int level, + struct test_args *test_args, bool expect_failure) +{ + int ret; + + if (!expect_failure) { + kvm_arm_irq_line(vm, intid, level); + } else { + /* The interface doesn't allow larger intid's. */ + if (intid > KVM_ARM_IRQ_NUM_MASK) + return; + + ret = _kvm_arm_irq_line(vm, intid, level); + TEST_ASSERT(ret != 0 && errno == EINVAL, + "Bad intid %i did not cause KVM_IRQ_LINE " + "error: rc: %i errno: %i", intid, ret, errno); + } +} + +void kvm_irq_set_level_info_check(int gic_fd, uint32_t intid, int level, + bool expect_failure) +{ + if (!expect_failure) { + kvm_irq_set_level_info(gic_fd, intid, level); + } else { + int ret = _kvm_irq_set_level_info(gic_fd, intid, level); + /* + * The kernel silently fails for invalid SPIs and SGIs (which + * are not level-sensitive). It only checks for intid to not + * spill over 1U << 10 (the max reserved SPI). Also, callers + * are supposed to mask the intid with 0x3ff (1023). + */ + if (intid > VGIC_MAX_RESERVED) + TEST_ASSERT(ret != 0 && errno == EINVAL, + "Bad intid %i did not cause VGIC_GRP_LEVEL_INFO " + "error: rc: %i errno: %i", intid, ret, errno); + else + TEST_ASSERT(!ret, "KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO " + "for intid %i failed, rc: %i errno: %i", + intid, ret, errno); + } +} + +static void kvm_set_gsi_routing_irqchip_check(struct kvm_vm *vm, + uint32_t intid, uint32_t num, uint32_t kvm_max_routes, + bool expect_failure) +{ + struct kvm_irq_routing *routing; + int ret; + uint64_t i; + + assert(num <= kvm_max_routes && kvm_max_routes <= KVM_MAX_IRQ_ROUTES); + + routing = kvm_gsi_routing_create(); + for (i = intid; i < (uint64_t)intid + num; i++) + kvm_gsi_routing_irqchip_add(routing, i - MIN_SPI, i - MIN_SPI); + + if (!expect_failure) { + kvm_gsi_routing_write(vm, routing); + } else { + ret = _kvm_gsi_routing_write(vm, routing); + /* The kernel only checks e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS */ + if (((uint64_t)intid + num - 1 - MIN_SPI) >= KVM_IRQCHIP_NUM_PINS) + TEST_ASSERT(ret != 0 && errno == EINVAL, + "Bad intid %u did not cause KVM_SET_GSI_ROUTING " + "error: rc: %i errno: %i", intid, ret, errno); + else + TEST_ASSERT(ret == 0, "KVM_SET_GSI_ROUTING " + "for intid %i failed, rc: %i errno: %i", + intid, ret, errno); + } +} + +static void kvm_irq_write_ispendr_check(int gic_fd, uint32_t intid, + struct kvm_vcpu *vcpu, + bool expect_failure) +{ + /* + * Ignore this when expecting failure as invalid intids will lead to + * either trying to inject SGIs when we configured the test to be + * level_sensitive (or the reverse), or inject large intids which + * will lead to writing above the ISPENDR register space (and we + * don't want to do that either). + */ + if (!expect_failure) + kvm_irq_write_ispendr(gic_fd, intid, vcpu); +} + +static void kvm_routing_and_irqfd_check(struct kvm_vm *vm, + uint32_t intid, uint32_t num, uint32_t kvm_max_routes, + bool expect_failure) +{ + int fd[MAX_SPI]; + uint64_t val; + int ret, f; + uint64_t i; + + /* + * There is no way to try injecting an SGI or PPI as the interface + * starts counting from the first SPI (above the private ones), so just + * exit. + */ + if (INTID_IS_SGI(intid) || INTID_IS_PPI(intid)) + return; + + kvm_set_gsi_routing_irqchip_check(vm, intid, num, + kvm_max_routes, expect_failure); + + /* + * If expect_failure, then just to inject anyway. These + * will silently fail. And in any case, the guest will check + * that no actual interrupt was injected for those cases. + */ + + for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) { + fd[f] = eventfd(0, 0); + TEST_ASSERT(fd[f] != -1, __KVM_SYSCALL_ERROR("eventfd()", fd[f])); + } + + for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) { + struct kvm_irqfd irqfd = { + .fd = fd[f], + .gsi = i - MIN_SPI, + }; + assert(i <= (uint64_t)UINT_MAX); + vm_ioctl(vm, KVM_IRQFD, &irqfd); + } + + for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) { + val = 1; + ret = write(fd[f], &val, sizeof(uint64_t)); + TEST_ASSERT(ret == sizeof(uint64_t), + __KVM_SYSCALL_ERROR("write()", ret)); + } + + for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) + close(fd[f]); +} + +/* handles the valid case: intid=0xffffffff num=1 */ +#define for_each_intid(first, num, tmp, i) \ + for ((tmp) = (i) = (first); \ + (tmp) < (uint64_t)(first) + (uint64_t)(num); \ + (tmp)++, (i)++) + +static void run_guest_cmd(struct kvm_vcpu *vcpu, int gic_fd, + struct kvm_inject_args *inject_args, + struct test_args *test_args) +{ + kvm_inject_cmd cmd = inject_args->cmd; + uint32_t intid = inject_args->first_intid; + uint32_t num = inject_args->num; + int level = inject_args->level; + bool expect_failure = inject_args->expect_failure; + struct kvm_vm *vm = vcpu->vm; + uint64_t tmp; + uint32_t i; + + /* handles the valid case: intid=0xffffffff num=1 */ + assert(intid < UINT_MAX - num || num == 1); + + switch (cmd) { + case KVM_INJECT_EDGE_IRQ_LINE: + for_each_intid(intid, num, tmp, i) + kvm_irq_line_check(vm, i, 1, test_args, + expect_failure); + for_each_intid(intid, num, tmp, i) + kvm_irq_line_check(vm, i, 0, test_args, + expect_failure); + break; + case KVM_SET_IRQ_LINE: + for_each_intid(intid, num, tmp, i) + kvm_irq_line_check(vm, i, level, test_args, + expect_failure); + break; + case KVM_SET_IRQ_LINE_HIGH: + for_each_intid(intid, num, tmp, i) + kvm_irq_line_check(vm, i, 1, test_args, + expect_failure); + break; + case KVM_SET_LEVEL_INFO_HIGH: + for_each_intid(intid, num, tmp, i) + kvm_irq_set_level_info_check(gic_fd, i, 1, + expect_failure); + break; + case KVM_INJECT_IRQFD: + kvm_routing_and_irqfd_check(vm, intid, num, + test_args->kvm_max_routes, + expect_failure); + break; + case KVM_WRITE_ISPENDR: + for (i = intid; i < intid + num; i++) + kvm_irq_write_ispendr_check(gic_fd, i, vcpu, + expect_failure); + break; + case KVM_WRITE_ISACTIVER: + for (i = intid; i < intid + num; i++) + kvm_irq_write_isactiver(gic_fd, i, vcpu); + break; + default: + break; + } +} + +static void kvm_inject_get_call(struct kvm_vm *vm, struct ucall *uc, + struct kvm_inject_args *args) +{ + struct kvm_inject_args *kvm_args_hva; + vm_vaddr_t kvm_args_gva; + + kvm_args_gva = uc->args[1]; + kvm_args_hva = (struct kvm_inject_args *)addr_gva2hva(vm, kvm_args_gva); + memcpy(args, kvm_args_hva, sizeof(struct kvm_inject_args)); +} + +static void print_args(struct test_args *args) +{ + printf("nr-irqs=%d level-sensitive=%d eoi-split=%d\n", + args->nr_irqs, args->level_sensitive, + args->eoi_split); +} + +static void test_vgic(uint32_t nr_irqs, bool level_sensitive, bool eoi_split) +{ + struct ucall uc; + int gic_fd; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + struct kvm_inject_args inject_args; + vm_vaddr_t args_gva; + + struct test_args args = { + .nr_irqs = nr_irqs, + .level_sensitive = level_sensitive, + .eoi_split = eoi_split, + .kvm_max_routes = kvm_check_cap(KVM_CAP_IRQ_ROUTING), + .kvm_supports_irqfd = kvm_check_cap(KVM_CAP_IRQFD), + }; + + print_args(&args); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vcpu); + + /* Setup the guest args page (so it gets the args). */ + args_gva = vm_vaddr_alloc_page(vm); + memcpy(addr_gva2hva(vm, args_gva), &args, sizeof(args)); + vcpu_args_set(vcpu, 1, args_gva); + + gic_fd = vgic_v3_setup(vm, 1, nr_irqs); + __TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3, skipping"); + + vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT, + guest_irq_handlers[args.eoi_split][args.level_sensitive]); + + while (1) { + vcpu_run(vcpu); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_SYNC: + kvm_inject_get_call(vm, &uc, &inject_args); + run_guest_cmd(vcpu, gic_fd, &inject_args, &args); + break; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + case UCALL_DONE: + goto done; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + } + +done: + close(gic_fd); + kvm_vm_free(vm); +} + +static void help(const char *name) +{ + printf( + "\n" + "usage: %s [-n num_irqs] [-e eoi_split] [-l level_sensitive]\n", name); + printf(" -n: specify number of IRQs to setup the vgic with. " + "It has to be a multiple of 32 and between 64 and 1024.\n"); + printf(" -e: if 1 then EOI is split into a write to DIR on top " + "of writing EOI.\n"); + printf(" -l: specify whether the IRQs are level-sensitive (1) or not (0)."); + puts(""); + exit(1); +} + +int main(int argc, char **argv) +{ + uint32_t nr_irqs = 64; + bool default_args = true; + bool level_sensitive = false; + int opt; + bool eoi_split = false; + + while ((opt = getopt(argc, argv, "hn:e:l:")) != -1) { + switch (opt) { + case 'n': + nr_irqs = atoi_non_negative("Number of IRQs", optarg); + if (nr_irqs > 1024 || nr_irqs % 32) + help(argv[0]); + break; + case 'e': + eoi_split = (bool)atoi_paranoid(optarg); + default_args = false; + break; + case 'l': + level_sensitive = (bool)atoi_paranoid(optarg); + default_args = false; + break; + case 'h': + default: + help(argv[0]); + break; + } + } + + /* + * If the user just specified nr_irqs and/or gic_version, then run all + * combinations. + */ + if (default_args) { + test_vgic(nr_irqs, false /* level */, false /* eoi_split */); + test_vgic(nr_irqs, false /* level */, true /* eoi_split */); + test_vgic(nr_irqs, true /* level */, false /* eoi_split */); + test_vgic(nr_irqs, true /* level */, true /* eoi_split */); + } else { + test_vgic(nr_irqs, level_sensitive, eoi_split); + } + + return 0; +} diff --git a/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c b/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c new file mode 100644 index 000000000000..fc4fe52fb6f8 --- /dev/null +++ b/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c @@ -0,0 +1,410 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * vgic_lpi_stress - Stress test for KVM's ITS emulation + * + * Copyright (c) 2024 Google LLC + */ + +#include +#include +#include +#include + +#include "kvm_util.h" +#include "gic.h" +#include "gic_v3.h" +#include "gic_v3_its.h" +#include "processor.h" +#include "ucall.h" +#include "vgic.h" + +#define TEST_MEMSLOT_INDEX 1 + +#define GIC_LPI_OFFSET 8192 + +static size_t nr_iterations = 1000; +static vm_paddr_t gpa_base; + +static struct kvm_vm *vm; +static struct kvm_vcpu **vcpus; +static int gic_fd, its_fd; + +static struct test_data { + bool request_vcpus_stop; + u32 nr_cpus; + u32 nr_devices; + u32 nr_event_ids; + + vm_paddr_t device_table; + vm_paddr_t collection_table; + vm_paddr_t cmdq_base; + void *cmdq_base_va; + vm_paddr_t itt_tables; + + vm_paddr_t lpi_prop_table; + vm_paddr_t lpi_pend_tables; +} test_data = { + .nr_cpus = 1, + .nr_devices = 1, + .nr_event_ids = 16, +}; + +static void guest_irq_handler(struct ex_regs *regs) +{ + u32 intid = gic_get_and_ack_irq(); + + if (intid == IAR_SPURIOUS) + return; + + GUEST_ASSERT(intid >= GIC_LPI_OFFSET); + gic_set_eoi(intid); +} + +static void guest_setup_its_mappings(void) +{ + u32 coll_id, device_id, event_id, intid = GIC_LPI_OFFSET; + u32 nr_events = test_data.nr_event_ids; + u32 nr_devices = test_data.nr_devices; + u32 nr_cpus = test_data.nr_cpus; + + for (coll_id = 0; coll_id < nr_cpus; coll_id++) + its_send_mapc_cmd(test_data.cmdq_base_va, coll_id, coll_id, true); + + /* Round-robin the LPIs to all of the vCPUs in the VM */ + coll_id = 0; + for (device_id = 0; device_id < nr_devices; device_id++) { + vm_paddr_t itt_base = test_data.itt_tables + (device_id * SZ_64K); + + its_send_mapd_cmd(test_data.cmdq_base_va, device_id, + itt_base, SZ_64K, true); + + for (event_id = 0; event_id < nr_events; event_id++) { + its_send_mapti_cmd(test_data.cmdq_base_va, device_id, + event_id, coll_id, intid++); + + coll_id = (coll_id + 1) % test_data.nr_cpus; + } + } +} + +static void guest_invalidate_all_rdists(void) +{ + int i; + + for (i = 0; i < test_data.nr_cpus; i++) + its_send_invall_cmd(test_data.cmdq_base_va, i); +} + +static void guest_setup_gic(void) +{ + static atomic_int nr_cpus_ready = 0; + u32 cpuid = guest_get_vcpuid(); + + gic_init(GIC_V3, test_data.nr_cpus); + gic_rdist_enable_lpis(test_data.lpi_prop_table, SZ_64K, + test_data.lpi_pend_tables + (cpuid * SZ_64K)); + + atomic_fetch_add(&nr_cpus_ready, 1); + + if (cpuid > 0) + return; + + while (atomic_load(&nr_cpus_ready) < test_data.nr_cpus) + cpu_relax(); + + its_init(test_data.collection_table, SZ_64K, + test_data.device_table, SZ_64K, + test_data.cmdq_base, SZ_64K); + + guest_setup_its_mappings(); + guest_invalidate_all_rdists(); +} + +static void guest_code(size_t nr_lpis) +{ + guest_setup_gic(); + + GUEST_SYNC(0); + + /* + * Don't use WFI here to avoid blocking the vCPU thread indefinitely and + * never getting the stop signal. + */ + while (!READ_ONCE(test_data.request_vcpus_stop)) + cpu_relax(); + + GUEST_DONE(); +} + +static void setup_memslot(void) +{ + size_t pages; + size_t sz; + + /* + * For the ITS: + * - A single level device table + * - A single level collection table + * - The command queue + * - An ITT for each device + */ + sz = (3 + test_data.nr_devices) * SZ_64K; + + /* + * For the redistributors: + * - A shared LPI configuration table + * - An LPI pending table for each vCPU + */ + sz += (1 + test_data.nr_cpus) * SZ_64K; + + pages = sz / vm->page_size; + gpa_base = ((vm_compute_max_gfn(vm) + 1) * vm->page_size) - sz; + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, gpa_base, + TEST_MEMSLOT_INDEX, pages, 0); +} + +#define LPI_PROP_DEFAULT_PRIO 0xa0 + +static void configure_lpis(void) +{ + size_t nr_lpis = test_data.nr_devices * test_data.nr_event_ids; + u8 *tbl = addr_gpa2hva(vm, test_data.lpi_prop_table); + size_t i; + + for (i = 0; i < nr_lpis; i++) { + tbl[i] = LPI_PROP_DEFAULT_PRIO | + LPI_PROP_GROUP1 | + LPI_PROP_ENABLED; + } +} + +static void setup_test_data(void) +{ + size_t pages_per_64k = vm_calc_num_guest_pages(vm->mode, SZ_64K); + u32 nr_devices = test_data.nr_devices; + u32 nr_cpus = test_data.nr_cpus; + vm_paddr_t cmdq_base; + + test_data.device_table = vm_phy_pages_alloc(vm, pages_per_64k, + gpa_base, + TEST_MEMSLOT_INDEX); + + test_data.collection_table = vm_phy_pages_alloc(vm, pages_per_64k, + gpa_base, + TEST_MEMSLOT_INDEX); + + cmdq_base = vm_phy_pages_alloc(vm, pages_per_64k, gpa_base, + TEST_MEMSLOT_INDEX); + virt_map(vm, cmdq_base, cmdq_base, pages_per_64k); + test_data.cmdq_base = cmdq_base; + test_data.cmdq_base_va = (void *)cmdq_base; + + test_data.itt_tables = vm_phy_pages_alloc(vm, pages_per_64k * nr_devices, + gpa_base, TEST_MEMSLOT_INDEX); + + test_data.lpi_prop_table = vm_phy_pages_alloc(vm, pages_per_64k, + gpa_base, TEST_MEMSLOT_INDEX); + configure_lpis(); + + test_data.lpi_pend_tables = vm_phy_pages_alloc(vm, pages_per_64k * nr_cpus, + gpa_base, TEST_MEMSLOT_INDEX); + + sync_global_to_guest(vm, test_data); +} + +static void setup_gic(void) +{ + gic_fd = vgic_v3_setup(vm, test_data.nr_cpus, 64); + __TEST_REQUIRE(gic_fd >= 0, "Failed to create GICv3"); + + its_fd = vgic_its_setup(vm); +} + +static void signal_lpi(u32 device_id, u32 event_id) +{ + vm_paddr_t db_addr = GITS_BASE_GPA + GITS_TRANSLATER; + + struct kvm_msi msi = { + .address_lo = db_addr, + .address_hi = db_addr >> 32, + .data = event_id, + .devid = device_id, + .flags = KVM_MSI_VALID_DEVID, + }; + + /* + * KVM_SIGNAL_MSI returns 1 if the MSI wasn't 'blocked' by the VM, + * which for arm64 implies having a valid translation in the ITS. + */ + TEST_ASSERT(__vm_ioctl(vm, KVM_SIGNAL_MSI, &msi) == 1, + "KVM_SIGNAL_MSI ioctl failed"); +} + +static pthread_barrier_t test_setup_barrier; + +static void *lpi_worker_thread(void *data) +{ + u32 device_id = (size_t)data; + u32 event_id; + size_t i; + + pthread_barrier_wait(&test_setup_barrier); + + for (i = 0; i < nr_iterations; i++) + for (event_id = 0; event_id < test_data.nr_event_ids; event_id++) + signal_lpi(device_id, event_id); + + return NULL; +} + +static void *vcpu_worker_thread(void *data) +{ + struct kvm_vcpu *vcpu = data; + struct ucall uc; + + while (true) { + vcpu_run(vcpu); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_SYNC: + pthread_barrier_wait(&test_setup_barrier); + continue; + case UCALL_DONE: + return NULL; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + default: + TEST_FAIL("Unknown ucall: %lu", uc.cmd); + } + } + + return NULL; +} + +static void report_stats(struct timespec delta) +{ + double nr_lpis; + double time; + + nr_lpis = test_data.nr_devices * test_data.nr_event_ids * nr_iterations; + + time = delta.tv_sec; + time += ((double)delta.tv_nsec) / NSEC_PER_SEC; + + pr_info("Rate: %.2f LPIs/sec\n", nr_lpis / time); +} + +static void run_test(void) +{ + u32 nr_devices = test_data.nr_devices; + u32 nr_vcpus = test_data.nr_cpus; + pthread_t *lpi_threads = malloc(nr_devices * sizeof(pthread_t)); + pthread_t *vcpu_threads = malloc(nr_vcpus * sizeof(pthread_t)); + struct timespec start, delta; + size_t i; + + TEST_ASSERT(lpi_threads && vcpu_threads, "Failed to allocate pthread arrays"); + + pthread_barrier_init(&test_setup_barrier, NULL, nr_vcpus + nr_devices + 1); + + for (i = 0; i < nr_vcpus; i++) + pthread_create(&vcpu_threads[i], NULL, vcpu_worker_thread, vcpus[i]); + + for (i = 0; i < nr_devices; i++) + pthread_create(&lpi_threads[i], NULL, lpi_worker_thread, (void *)i); + + pthread_barrier_wait(&test_setup_barrier); + + clock_gettime(CLOCK_MONOTONIC, &start); + + for (i = 0; i < nr_devices; i++) + pthread_join(lpi_threads[i], NULL); + + delta = timespec_elapsed(start); + write_guest_global(vm, test_data.request_vcpus_stop, true); + + for (i = 0; i < nr_vcpus; i++) + pthread_join(vcpu_threads[i], NULL); + + report_stats(delta); +} + +static void setup_vm(void) +{ + int i; + + vcpus = malloc(test_data.nr_cpus * sizeof(struct kvm_vcpu)); + TEST_ASSERT(vcpus, "Failed to allocate vCPU array"); + + vm = vm_create_with_vcpus(test_data.nr_cpus, guest_code, vcpus); + + vm_init_descriptor_tables(vm); + for (i = 0; i < test_data.nr_cpus; i++) + vcpu_init_descriptor_tables(vcpus[i]); + + vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT, guest_irq_handler); + + setup_memslot(); + + setup_gic(); + + setup_test_data(); +} + +static void destroy_vm(void) +{ + close(its_fd); + close(gic_fd); + kvm_vm_free(vm); + free(vcpus); +} + +static void pr_usage(const char *name) +{ + pr_info("%s [-v NR_VCPUS] [-d NR_DEVICES] [-e NR_EVENTS] [-i ITERS] -h\n", name); + pr_info(" -v:\tnumber of vCPUs (default: %u)\n", test_data.nr_cpus); + pr_info(" -d:\tnumber of devices (default: %u)\n", test_data.nr_devices); + pr_info(" -e:\tnumber of event IDs per device (default: %u)\n", test_data.nr_event_ids); + pr_info(" -i:\tnumber of iterations (default: %lu)\n", nr_iterations); +} + +int main(int argc, char **argv) +{ + u32 nr_threads; + int c; + + while ((c = getopt(argc, argv, "hv:d:e:i:")) != -1) { + switch (c) { + case 'v': + test_data.nr_cpus = atoi(optarg); + break; + case 'd': + test_data.nr_devices = atoi(optarg); + break; + case 'e': + test_data.nr_event_ids = atoi(optarg); + break; + case 'i': + nr_iterations = strtoul(optarg, NULL, 0); + break; + case 'h': + default: + pr_usage(argv[0]); + return 1; + } + } + + nr_threads = test_data.nr_cpus + test_data.nr_devices; + if (nr_threads > get_nprocs()) + pr_info("WARNING: running %u threads on %d CPUs; performance is degraded.\n", + nr_threads, get_nprocs()); + + setup_vm(); + + run_test(); + + destroy_vm(); + + return 0; +} diff --git a/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c b/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c new file mode 100644 index 000000000000..f16b3b27e32e --- /dev/null +++ b/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c @@ -0,0 +1,648 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * vpmu_counter_access - Test vPMU event counter access + * + * Copyright (c) 2023 Google LLC. + * + * This test checks if the guest can see the same number of the PMU event + * counters (PMCR_EL0.N) that userspace sets, if the guest can access + * those counters, and if the guest is prevented from accessing any + * other counters. + * It also checks if the userspace accesses to the PMU regsisters honor the + * PMCR.N value that's set for the guest. + * This test runs only when KVM_CAP_ARM_PMU_V3 is supported on the host. + */ +#include +#include +#include +#include +#include +#include + +/* The max number of the PMU event counters (excluding the cycle counter) */ +#define ARMV8_PMU_MAX_GENERAL_COUNTERS (ARMV8_PMU_MAX_COUNTERS - 1) + +/* The cycle counter bit position that's common among the PMU registers */ +#define ARMV8_PMU_CYCLE_IDX 31 + +struct vpmu_vm { + struct kvm_vm *vm; + struct kvm_vcpu *vcpu; + int gic_fd; +}; + +static struct vpmu_vm vpmu_vm; + +struct pmreg_sets { + uint64_t set_reg_id; + uint64_t clr_reg_id; +}; + +#define PMREG_SET(set, clr) {.set_reg_id = set, .clr_reg_id = clr} + +static uint64_t get_pmcr_n(uint64_t pmcr) +{ + return FIELD_GET(ARMV8_PMU_PMCR_N, pmcr); +} + +static void set_pmcr_n(uint64_t *pmcr, uint64_t pmcr_n) +{ + u64p_replace_bits((__u64 *) pmcr, pmcr_n, ARMV8_PMU_PMCR_N); +} + +static uint64_t get_counters_mask(uint64_t n) +{ + uint64_t mask = BIT(ARMV8_PMU_CYCLE_IDX); + + if (n) + mask |= GENMASK(n - 1, 0); + return mask; +} + +/* Read PMEVTCNTR_EL0 through PMXEVCNTR_EL0 */ +static inline unsigned long read_sel_evcntr(int sel) +{ + write_sysreg(sel, pmselr_el0); + isb(); + return read_sysreg(pmxevcntr_el0); +} + +/* Write PMEVTCNTR_EL0 through PMXEVCNTR_EL0 */ +static inline void write_sel_evcntr(int sel, unsigned long val) +{ + write_sysreg(sel, pmselr_el0); + isb(); + write_sysreg(val, pmxevcntr_el0); + isb(); +} + +/* Read PMEVTYPER_EL0 through PMXEVTYPER_EL0 */ +static inline unsigned long read_sel_evtyper(int sel) +{ + write_sysreg(sel, pmselr_el0); + isb(); + return read_sysreg(pmxevtyper_el0); +} + +/* Write PMEVTYPER_EL0 through PMXEVTYPER_EL0 */ +static inline void write_sel_evtyper(int sel, unsigned long val) +{ + write_sysreg(sel, pmselr_el0); + isb(); + write_sysreg(val, pmxevtyper_el0); + isb(); +} + +static void pmu_disable_reset(void) +{ + uint64_t pmcr = read_sysreg(pmcr_el0); + + /* Reset all counters, disabling them */ + pmcr &= ~ARMV8_PMU_PMCR_E; + write_sysreg(pmcr | ARMV8_PMU_PMCR_P, pmcr_el0); + isb(); +} + +#define RETURN_READ_PMEVCNTRN(n) \ + return read_sysreg(pmevcntr##n##_el0) +static unsigned long read_pmevcntrn(int n) +{ + PMEVN_SWITCH(n, RETURN_READ_PMEVCNTRN); + return 0; +} + +#define WRITE_PMEVCNTRN(n) \ + write_sysreg(val, pmevcntr##n##_el0) +static void write_pmevcntrn(int n, unsigned long val) +{ + PMEVN_SWITCH(n, WRITE_PMEVCNTRN); + isb(); +} + +#define READ_PMEVTYPERN(n) \ + return read_sysreg(pmevtyper##n##_el0) +static unsigned long read_pmevtypern(int n) +{ + PMEVN_SWITCH(n, READ_PMEVTYPERN); + return 0; +} + +#define WRITE_PMEVTYPERN(n) \ + write_sysreg(val, pmevtyper##n##_el0) +static void write_pmevtypern(int n, unsigned long val) +{ + PMEVN_SWITCH(n, WRITE_PMEVTYPERN); + isb(); +} + +/* + * The pmc_accessor structure has pointers to PMEV{CNTR,TYPER}_EL0 + * accessors that test cases will use. Each of the accessors will + * either directly reads/writes PMEV{CNTR,TYPER}_EL0 + * (i.e. {read,write}_pmev{cnt,type}rn()), or reads/writes them through + * PMXEV{CNTR,TYPER}_EL0 (i.e. {read,write}_sel_ev{cnt,type}r()). + * + * This is used to test that combinations of those accessors provide + * the consistent behavior. + */ +struct pmc_accessor { + /* A function to be used to read PMEVTCNTR_EL0 */ + unsigned long (*read_cntr)(int idx); + /* A function to be used to write PMEVTCNTR_EL0 */ + void (*write_cntr)(int idx, unsigned long val); + /* A function to be used to read PMEVTYPER_EL0 */ + unsigned long (*read_typer)(int idx); + /* A function to be used to write PMEVTYPER_EL0 */ + void (*write_typer)(int idx, unsigned long val); +}; + +struct pmc_accessor pmc_accessors[] = { + /* test with all direct accesses */ + { read_pmevcntrn, write_pmevcntrn, read_pmevtypern, write_pmevtypern }, + /* test with all indirect accesses */ + { read_sel_evcntr, write_sel_evcntr, read_sel_evtyper, write_sel_evtyper }, + /* read with direct accesses, and write with indirect accesses */ + { read_pmevcntrn, write_sel_evcntr, read_pmevtypern, write_sel_evtyper }, + /* read with indirect accesses, and write with direct accesses */ + { read_sel_evcntr, write_pmevcntrn, read_sel_evtyper, write_pmevtypern }, +}; + +/* + * Convert a pointer of pmc_accessor to an index in pmc_accessors[], + * assuming that the pointer is one of the entries in pmc_accessors[]. + */ +#define PMC_ACC_TO_IDX(acc) (acc - &pmc_accessors[0]) + +#define GUEST_ASSERT_BITMAP_REG(regname, mask, set_expected) \ +{ \ + uint64_t _tval = read_sysreg(regname); \ + \ + if (set_expected) \ + __GUEST_ASSERT((_tval & mask), \ + "tval: 0x%lx; mask: 0x%lx; set_expected: %u", \ + _tval, mask, set_expected); \ + else \ + __GUEST_ASSERT(!(_tval & mask), \ + "tval: 0x%lx; mask: 0x%lx; set_expected: %u", \ + _tval, mask, set_expected); \ +} + +/* + * Check if @mask bits in {PMCNTEN,PMINTEN,PMOVS}{SET,CLR} registers + * are set or cleared as specified in @set_expected. + */ +static void check_bitmap_pmu_regs(uint64_t mask, bool set_expected) +{ + GUEST_ASSERT_BITMAP_REG(pmcntenset_el0, mask, set_expected); + GUEST_ASSERT_BITMAP_REG(pmcntenclr_el0, mask, set_expected); + GUEST_ASSERT_BITMAP_REG(pmintenset_el1, mask, set_expected); + GUEST_ASSERT_BITMAP_REG(pmintenclr_el1, mask, set_expected); + GUEST_ASSERT_BITMAP_REG(pmovsset_el0, mask, set_expected); + GUEST_ASSERT_BITMAP_REG(pmovsclr_el0, mask, set_expected); +} + +/* + * Check if the bit in {PMCNTEN,PMINTEN,PMOVS}{SET,CLR} registers corresponding + * to the specified counter (@pmc_idx) can be read/written as expected. + * When @set_op is true, it tries to set the bit for the counter in + * those registers by writing the SET registers (the bit won't be set + * if the counter is not implemented though). + * Otherwise, it tries to clear the bits in the registers by writing + * the CLR registers. + * Then, it checks if the values indicated in the registers are as expected. + */ +static void test_bitmap_pmu_regs(int pmc_idx, bool set_op) +{ + uint64_t pmcr_n, test_bit = BIT(pmc_idx); + bool set_expected = false; + + if (set_op) { + write_sysreg(test_bit, pmcntenset_el0); + write_sysreg(test_bit, pmintenset_el1); + write_sysreg(test_bit, pmovsset_el0); + + /* The bit will be set only if the counter is implemented */ + pmcr_n = get_pmcr_n(read_sysreg(pmcr_el0)); + set_expected = (pmc_idx < pmcr_n) ? true : false; + } else { + write_sysreg(test_bit, pmcntenclr_el0); + write_sysreg(test_bit, pmintenclr_el1); + write_sysreg(test_bit, pmovsclr_el0); + } + check_bitmap_pmu_regs(test_bit, set_expected); +} + +/* + * Tests for reading/writing registers for the (implemented) event counter + * specified by @pmc_idx. + */ +static void test_access_pmc_regs(struct pmc_accessor *acc, int pmc_idx) +{ + uint64_t write_data, read_data; + + /* Disable all PMCs and reset all PMCs to zero. */ + pmu_disable_reset(); + + /* + * Tests for reading/writing {PMCNTEN,PMINTEN,PMOVS}{SET,CLR}_EL1. + */ + + /* Make sure that the bit in those registers are set to 0 */ + test_bitmap_pmu_regs(pmc_idx, false); + /* Test if setting the bit in those registers works */ + test_bitmap_pmu_regs(pmc_idx, true); + /* Test if clearing the bit in those registers works */ + test_bitmap_pmu_regs(pmc_idx, false); + + /* + * Tests for reading/writing the event type register. + */ + + /* + * Set the event type register to an arbitrary value just for testing + * of reading/writing the register. + * Arm ARM says that for the event from 0x0000 to 0x003F, + * the value indicated in the PMEVTYPER_EL0.evtCount field is + * the value written to the field even when the specified event + * is not supported. + */ + write_data = (ARMV8_PMU_EXCLUDE_EL1 | ARMV8_PMUV3_PERFCTR_INST_RETIRED); + acc->write_typer(pmc_idx, write_data); + read_data = acc->read_typer(pmc_idx); + __GUEST_ASSERT(read_data == write_data, + "pmc_idx: 0x%x; acc_idx: 0x%lx; read_data: 0x%lx; write_data: 0x%lx", + pmc_idx, PMC_ACC_TO_IDX(acc), read_data, write_data); + + /* + * Tests for reading/writing the event count register. + */ + + read_data = acc->read_cntr(pmc_idx); + + /* The count value must be 0, as it is disabled and reset */ + __GUEST_ASSERT(read_data == 0, + "pmc_idx: 0x%x; acc_idx: 0x%lx; read_data: 0x%lx", + pmc_idx, PMC_ACC_TO_IDX(acc), read_data); + + write_data = read_data + pmc_idx + 0x12345; + acc->write_cntr(pmc_idx, write_data); + read_data = acc->read_cntr(pmc_idx); + __GUEST_ASSERT(read_data == write_data, + "pmc_idx: 0x%x; acc_idx: 0x%lx; read_data: 0x%lx; write_data: 0x%lx", + pmc_idx, PMC_ACC_TO_IDX(acc), read_data, write_data); +} + +#define INVALID_EC (-1ul) +uint64_t expected_ec = INVALID_EC; + +static void guest_sync_handler(struct ex_regs *regs) +{ + uint64_t esr, ec; + + esr = read_sysreg(esr_el1); + ec = ESR_ELx_EC(esr); + + __GUEST_ASSERT(expected_ec == ec, + "PC: 0x%lx; ESR: 0x%lx; EC: 0x%lx; EC expected: 0x%lx", + regs->pc, esr, ec, expected_ec); + + /* skip the trapping instruction */ + regs->pc += 4; + + /* Use INVALID_EC to indicate an exception occurred */ + expected_ec = INVALID_EC; +} + +/* + * Run the given operation that should trigger an exception with the + * given exception class. The exception handler (guest_sync_handler) + * will reset op_end_addr to 0, expected_ec to INVALID_EC, and skip + * the instruction that trapped. + */ +#define TEST_EXCEPTION(ec, ops) \ +({ \ + GUEST_ASSERT(ec != INVALID_EC); \ + WRITE_ONCE(expected_ec, ec); \ + dsb(ish); \ + ops; \ + GUEST_ASSERT(expected_ec == INVALID_EC); \ +}) + +/* + * Tests for reading/writing registers for the unimplemented event counter + * specified by @pmc_idx (>= PMCR_EL0.N). + */ +static void test_access_invalid_pmc_regs(struct pmc_accessor *acc, int pmc_idx) +{ + /* + * Reading/writing the event count/type registers should cause + * an UNDEFINED exception. + */ + TEST_EXCEPTION(ESR_ELx_EC_UNKNOWN, acc->read_cntr(pmc_idx)); + TEST_EXCEPTION(ESR_ELx_EC_UNKNOWN, acc->write_cntr(pmc_idx, 0)); + TEST_EXCEPTION(ESR_ELx_EC_UNKNOWN, acc->read_typer(pmc_idx)); + TEST_EXCEPTION(ESR_ELx_EC_UNKNOWN, acc->write_typer(pmc_idx, 0)); + /* + * The bit corresponding to the (unimplemented) counter in + * {PMCNTEN,PMINTEN,PMOVS}{SET,CLR} registers should be RAZ. + */ + test_bitmap_pmu_regs(pmc_idx, 1); + test_bitmap_pmu_regs(pmc_idx, 0); +} + +/* + * The guest is configured with PMUv3 with @expected_pmcr_n number of + * event counters. + * Check if @expected_pmcr_n is consistent with PMCR_EL0.N, and + * if reading/writing PMU registers for implemented or unimplemented + * counters works as expected. + */ +static void guest_code(uint64_t expected_pmcr_n) +{ + uint64_t pmcr, pmcr_n, unimp_mask; + int i, pmc; + + __GUEST_ASSERT(expected_pmcr_n <= ARMV8_PMU_MAX_GENERAL_COUNTERS, + "Expected PMCR.N: 0x%lx; ARMv8 general counters: 0x%x", + expected_pmcr_n, ARMV8_PMU_MAX_GENERAL_COUNTERS); + + pmcr = read_sysreg(pmcr_el0); + pmcr_n = get_pmcr_n(pmcr); + + /* Make sure that PMCR_EL0.N indicates the value userspace set */ + __GUEST_ASSERT(pmcr_n == expected_pmcr_n, + "Expected PMCR.N: 0x%lx, PMCR.N: 0x%lx", + expected_pmcr_n, pmcr_n); + + /* + * Make sure that (RAZ) bits corresponding to unimplemented event + * counters in {PMCNTEN,PMINTEN,PMOVS}{SET,CLR} registers are reset + * to zero. + * (NOTE: bits for implemented event counters are reset to UNKNOWN) + */ + unimp_mask = GENMASK_ULL(ARMV8_PMU_MAX_GENERAL_COUNTERS - 1, pmcr_n); + check_bitmap_pmu_regs(unimp_mask, false); + + /* + * Tests for reading/writing PMU registers for implemented counters. + * Use each combination of PMEV{CNTR,TYPER}_EL0 accessor functions. + */ + for (i = 0; i < ARRAY_SIZE(pmc_accessors); i++) { + for (pmc = 0; pmc < pmcr_n; pmc++) + test_access_pmc_regs(&pmc_accessors[i], pmc); + } + + /* + * Tests for reading/writing PMU registers for unimplemented counters. + * Use each combination of PMEV{CNTR,TYPER}_EL0 accessor functions. + */ + for (i = 0; i < ARRAY_SIZE(pmc_accessors); i++) { + for (pmc = pmcr_n; pmc < ARMV8_PMU_MAX_GENERAL_COUNTERS; pmc++) + test_access_invalid_pmc_regs(&pmc_accessors[i], pmc); + } + + GUEST_DONE(); +} + +/* Create a VM that has one vCPU with PMUv3 configured. */ +static void create_vpmu_vm(void *guest_code) +{ + struct kvm_vcpu_init init; + uint8_t pmuver, ec; + uint64_t dfr0, irq = 23; + struct kvm_device_attr irq_attr = { + .group = KVM_ARM_VCPU_PMU_V3_CTRL, + .attr = KVM_ARM_VCPU_PMU_V3_IRQ, + .addr = (uint64_t)&irq, + }; + struct kvm_device_attr init_attr = { + .group = KVM_ARM_VCPU_PMU_V3_CTRL, + .attr = KVM_ARM_VCPU_PMU_V3_INIT, + }; + + /* The test creates the vpmu_vm multiple times. Ensure a clean state */ + memset(&vpmu_vm, 0, sizeof(vpmu_vm)); + + vpmu_vm.vm = vm_create(1); + vm_init_descriptor_tables(vpmu_vm.vm); + for (ec = 0; ec < ESR_ELx_EC_MAX + 1; ec++) { + vm_install_sync_handler(vpmu_vm.vm, VECTOR_SYNC_CURRENT, ec, + guest_sync_handler); + } + + /* Create vCPU with PMUv3 */ + vm_ioctl(vpmu_vm.vm, KVM_ARM_PREFERRED_TARGET, &init); + init.features[0] |= (1 << KVM_ARM_VCPU_PMU_V3); + vpmu_vm.vcpu = aarch64_vcpu_add(vpmu_vm.vm, 0, &init, guest_code); + vcpu_init_descriptor_tables(vpmu_vm.vcpu); + vpmu_vm.gic_fd = vgic_v3_setup(vpmu_vm.vm, 1, 64); + __TEST_REQUIRE(vpmu_vm.gic_fd >= 0, + "Failed to create vgic-v3, skipping"); + + /* Make sure that PMUv3 support is indicated in the ID register */ + dfr0 = vcpu_get_reg(vpmu_vm.vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1)); + pmuver = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), dfr0); + TEST_ASSERT(pmuver != ID_AA64DFR0_EL1_PMUVer_IMP_DEF && + pmuver >= ID_AA64DFR0_EL1_PMUVer_IMP, + "Unexpected PMUVER (0x%x) on the vCPU with PMUv3", pmuver); + + /* Initialize vPMU */ + vcpu_ioctl(vpmu_vm.vcpu, KVM_SET_DEVICE_ATTR, &irq_attr); + vcpu_ioctl(vpmu_vm.vcpu, KVM_SET_DEVICE_ATTR, &init_attr); +} + +static void destroy_vpmu_vm(void) +{ + close(vpmu_vm.gic_fd); + kvm_vm_free(vpmu_vm.vm); +} + +static void run_vcpu(struct kvm_vcpu *vcpu, uint64_t pmcr_n) +{ + struct ucall uc; + + vcpu_args_set(vcpu, 1, pmcr_n); + vcpu_run(vcpu); + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + case UCALL_DONE: + break; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + break; + } +} + +static void test_create_vpmu_vm_with_pmcr_n(uint64_t pmcr_n, bool expect_fail) +{ + struct kvm_vcpu *vcpu; + uint64_t pmcr, pmcr_orig; + + create_vpmu_vm(guest_code); + vcpu = vpmu_vm.vcpu; + + pmcr_orig = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0)); + pmcr = pmcr_orig; + + /* + * Setting a larger value of PMCR.N should not modify the field, and + * return a success. + */ + set_pmcr_n(&pmcr, pmcr_n); + vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0), pmcr); + pmcr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0)); + + if (expect_fail) + TEST_ASSERT(pmcr_orig == pmcr, + "PMCR.N modified by KVM to a larger value (PMCR: 0x%lx) for pmcr_n: 0x%lx", + pmcr, pmcr_n); + else + TEST_ASSERT(pmcr_n == get_pmcr_n(pmcr), + "Failed to update PMCR.N to %lu (received: %lu)", + pmcr_n, get_pmcr_n(pmcr)); +} + +/* + * Create a guest with one vCPU, set the PMCR_EL0.N for the vCPU to @pmcr_n, + * and run the test. + */ +static void run_access_test(uint64_t pmcr_n) +{ + uint64_t sp; + struct kvm_vcpu *vcpu; + struct kvm_vcpu_init init; + + pr_debug("Test with pmcr_n %lu\n", pmcr_n); + + test_create_vpmu_vm_with_pmcr_n(pmcr_n, false); + vcpu = vpmu_vm.vcpu; + + /* Save the initial sp to restore them later to run the guest again */ + sp = vcpu_get_reg(vcpu, ARM64_CORE_REG(sp_el1)); + + run_vcpu(vcpu, pmcr_n); + + /* + * Reset and re-initialize the vCPU, and run the guest code again to + * check if PMCR_EL0.N is preserved. + */ + vm_ioctl(vpmu_vm.vm, KVM_ARM_PREFERRED_TARGET, &init); + init.features[0] |= (1 << KVM_ARM_VCPU_PMU_V3); + aarch64_vcpu_setup(vcpu, &init); + vcpu_init_descriptor_tables(vcpu); + vcpu_set_reg(vcpu, ARM64_CORE_REG(sp_el1), sp); + vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc), (uint64_t)guest_code); + + run_vcpu(vcpu, pmcr_n); + + destroy_vpmu_vm(); +} + +static struct pmreg_sets validity_check_reg_sets[] = { + PMREG_SET(SYS_PMCNTENSET_EL0, SYS_PMCNTENCLR_EL0), + PMREG_SET(SYS_PMINTENSET_EL1, SYS_PMINTENCLR_EL1), + PMREG_SET(SYS_PMOVSSET_EL0, SYS_PMOVSCLR_EL0), +}; + +/* + * Create a VM, and check if KVM handles the userspace accesses of + * the PMU register sets in @validity_check_reg_sets[] correctly. + */ +static void run_pmregs_validity_test(uint64_t pmcr_n) +{ + int i; + struct kvm_vcpu *vcpu; + uint64_t set_reg_id, clr_reg_id, reg_val; + uint64_t valid_counters_mask, max_counters_mask; + + test_create_vpmu_vm_with_pmcr_n(pmcr_n, false); + vcpu = vpmu_vm.vcpu; + + valid_counters_mask = get_counters_mask(pmcr_n); + max_counters_mask = get_counters_mask(ARMV8_PMU_MAX_COUNTERS); + + for (i = 0; i < ARRAY_SIZE(validity_check_reg_sets); i++) { + set_reg_id = validity_check_reg_sets[i].set_reg_id; + clr_reg_id = validity_check_reg_sets[i].clr_reg_id; + + /* + * Test if the 'set' and 'clr' variants of the registers + * are initialized based on the number of valid counters. + */ + reg_val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id)); + TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0, + "Initial read of set_reg: 0x%llx has unimplemented counters enabled: 0x%lx", + KVM_ARM64_SYS_REG(set_reg_id), reg_val); + + reg_val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(clr_reg_id)); + TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0, + "Initial read of clr_reg: 0x%llx has unimplemented counters enabled: 0x%lx", + KVM_ARM64_SYS_REG(clr_reg_id), reg_val); + + /* + * Using the 'set' variant, force-set the register to the + * max number of possible counters and test if KVM discards + * the bits for unimplemented counters as it should. + */ + vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id), max_counters_mask); + + reg_val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id)); + TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0, + "Read of set_reg: 0x%llx has unimplemented counters enabled: 0x%lx", + KVM_ARM64_SYS_REG(set_reg_id), reg_val); + + reg_val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(clr_reg_id)); + TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0, + "Read of clr_reg: 0x%llx has unimplemented counters enabled: 0x%lx", + KVM_ARM64_SYS_REG(clr_reg_id), reg_val); + } + + destroy_vpmu_vm(); +} + +/* + * Create a guest with one vCPU, and attempt to set the PMCR_EL0.N for + * the vCPU to @pmcr_n, which is larger than the host value. + * The attempt should fail as @pmcr_n is too big to set for the vCPU. + */ +static void run_error_test(uint64_t pmcr_n) +{ + pr_debug("Error test with pmcr_n %lu (larger than the host)\n", pmcr_n); + + test_create_vpmu_vm_with_pmcr_n(pmcr_n, true); + destroy_vpmu_vm(); +} + +/* + * Return the default number of implemented PMU event counters excluding + * the cycle counter (i.e. PMCR_EL0.N value) for the guest. + */ +static uint64_t get_pmcr_n_limit(void) +{ + uint64_t pmcr; + + create_vpmu_vm(guest_code); + pmcr = vcpu_get_reg(vpmu_vm.vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0)); + destroy_vpmu_vm(); + return get_pmcr_n(pmcr); +} + +int main(void) +{ + uint64_t i, pmcr_n; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_PMU_V3)); + + pmcr_n = get_pmcr_n_limit(); + for (i = 0; i <= pmcr_n; i++) { + run_access_test(i); + run_pmregs_validity_test(i); + } + + for (i = pmcr_n + 1; i < ARMV8_PMU_MAX_COUNTERS; i++) + run_error_test(i); + + return 0; +} diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index 9f24303acb8c..e79817bd0e29 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -21,7 +21,7 @@ #include "ucall_common.h" #ifdef __aarch64__ -#include "aarch64/vgic.h" +#include "arm64/vgic.h" static int gic_fd; diff --git a/tools/testing/selftests/kvm/include/aarch64/arch_timer.h b/tools/testing/selftests/kvm/include/aarch64/arch_timer.h deleted file mode 100644 index bf461de34785..000000000000 --- a/tools/testing/selftests/kvm/include/aarch64/arch_timer.h +++ /dev/null @@ -1,158 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * ARM Generic Timer specific interface - */ - -#ifndef SELFTEST_KVM_ARCH_TIMER_H -#define SELFTEST_KVM_ARCH_TIMER_H - -#include "processor.h" - -enum arch_timer { - VIRTUAL, - PHYSICAL, -}; - -#define CTL_ENABLE (1 << 0) -#define CTL_IMASK (1 << 1) -#define CTL_ISTATUS (1 << 2) - -#define msec_to_cycles(msec) \ - (timer_get_cntfrq() * (uint64_t)(msec) / 1000) - -#define usec_to_cycles(usec) \ - (timer_get_cntfrq() * (uint64_t)(usec) / 1000000) - -#define cycles_to_usec(cycles) \ - ((uint64_t)(cycles) * 1000000 / timer_get_cntfrq()) - -static inline uint32_t timer_get_cntfrq(void) -{ - return read_sysreg(cntfrq_el0); -} - -static inline uint64_t timer_get_cntct(enum arch_timer timer) -{ - isb(); - - switch (timer) { - case VIRTUAL: - return read_sysreg(cntvct_el0); - case PHYSICAL: - return read_sysreg(cntpct_el0); - default: - GUEST_FAIL("Unexpected timer type = %u", timer); - } - - /* We should not reach here */ - return 0; -} - -static inline void timer_set_cval(enum arch_timer timer, uint64_t cval) -{ - switch (timer) { - case VIRTUAL: - write_sysreg(cval, cntv_cval_el0); - break; - case PHYSICAL: - write_sysreg(cval, cntp_cval_el0); - break; - default: - GUEST_FAIL("Unexpected timer type = %u", timer); - } - - isb(); -} - -static inline uint64_t timer_get_cval(enum arch_timer timer) -{ - switch (timer) { - case VIRTUAL: - return read_sysreg(cntv_cval_el0); - case PHYSICAL: - return read_sysreg(cntp_cval_el0); - default: - GUEST_FAIL("Unexpected timer type = %u", timer); - } - - /* We should not reach here */ - return 0; -} - -static inline void timer_set_tval(enum arch_timer timer, int32_t tval) -{ - switch (timer) { - case VIRTUAL: - write_sysreg(tval, cntv_tval_el0); - break; - case PHYSICAL: - write_sysreg(tval, cntp_tval_el0); - break; - default: - GUEST_FAIL("Unexpected timer type = %u", timer); - } - - isb(); -} - -static inline int32_t timer_get_tval(enum arch_timer timer) -{ - isb(); - switch (timer) { - case VIRTUAL: - return read_sysreg(cntv_tval_el0); - case PHYSICAL: - return read_sysreg(cntp_tval_el0); - default: - GUEST_FAIL("Could not get timer %d\n", timer); - } - - /* We should not reach here */ - return 0; -} - -static inline void timer_set_ctl(enum arch_timer timer, uint32_t ctl) -{ - switch (timer) { - case VIRTUAL: - write_sysreg(ctl, cntv_ctl_el0); - break; - case PHYSICAL: - write_sysreg(ctl, cntp_ctl_el0); - break; - default: - GUEST_FAIL("Unexpected timer type = %u", timer); - } - - isb(); -} - -static inline uint32_t timer_get_ctl(enum arch_timer timer) -{ - switch (timer) { - case VIRTUAL: - return read_sysreg(cntv_ctl_el0); - case PHYSICAL: - return read_sysreg(cntp_ctl_el0); - default: - GUEST_FAIL("Unexpected timer type = %u", timer); - } - - /* We should not reach here */ - return 0; -} - -static inline void timer_set_next_cval_ms(enum arch_timer timer, uint32_t msec) -{ - uint64_t now_ct = timer_get_cntct(timer); - uint64_t next_ct = now_ct + msec_to_cycles(msec); - - timer_set_cval(timer, next_ct); -} - -static inline void timer_set_next_tval_ms(enum arch_timer timer, uint32_t msec) -{ - timer_set_tval(timer, msec_to_cycles(msec)); -} - -#endif /* SELFTEST_KVM_ARCH_TIMER_H */ diff --git a/tools/testing/selftests/kvm/include/aarch64/delay.h b/tools/testing/selftests/kvm/include/aarch64/delay.h deleted file mode 100644 index 329e4f5079ea..000000000000 --- a/tools/testing/selftests/kvm/include/aarch64/delay.h +++ /dev/null @@ -1,25 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * ARM simple delay routines - */ - -#ifndef SELFTEST_KVM_ARM_DELAY_H -#define SELFTEST_KVM_ARM_DELAY_H - -#include "arch_timer.h" - -static inline void __delay(uint64_t cycles) -{ - enum arch_timer timer = VIRTUAL; - uint64_t start = timer_get_cntct(timer); - - while ((timer_get_cntct(timer) - start) < cycles) - cpu_relax(); -} - -static inline void udelay(unsigned long usec) -{ - __delay(usec_to_cycles(usec)); -} - -#endif /* SELFTEST_KVM_ARM_DELAY_H */ diff --git a/tools/testing/selftests/kvm/include/aarch64/gic.h b/tools/testing/selftests/kvm/include/aarch64/gic.h deleted file mode 100644 index baeb3c859389..000000000000 --- a/tools/testing/selftests/kvm/include/aarch64/gic.h +++ /dev/null @@ -1,64 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * ARM Generic Interrupt Controller (GIC) specific defines - */ - -#ifndef SELFTEST_KVM_GIC_H -#define SELFTEST_KVM_GIC_H - -#include - -enum gic_type { - GIC_V3, - GIC_TYPE_MAX, -}; - -/* - * Note that the redistributor frames are at the end, as the range scales - * with the number of vCPUs in the VM. - */ -#define GITS_BASE_GPA 0x8000000ULL -#define GICD_BASE_GPA (GITS_BASE_GPA + KVM_VGIC_V3_ITS_SIZE) -#define GICR_BASE_GPA (GICD_BASE_GPA + KVM_VGIC_V3_DIST_SIZE) - -/* The GIC is identity-mapped into the guest at the time of setup. */ -#define GITS_BASE_GVA ((volatile void *)GITS_BASE_GPA) -#define GICD_BASE_GVA ((volatile void *)GICD_BASE_GPA) -#define GICR_BASE_GVA ((volatile void *)GICR_BASE_GPA) - -#define MIN_SGI 0 -#define MIN_PPI 16 -#define MIN_SPI 32 -#define MAX_SPI 1019 -#define IAR_SPURIOUS 1023 - -#define INTID_IS_SGI(intid) (0 <= (intid) && (intid) < MIN_PPI) -#define INTID_IS_PPI(intid) (MIN_PPI <= (intid) && (intid) < MIN_SPI) -#define INTID_IS_SPI(intid) (MIN_SPI <= (intid) && (intid) <= MAX_SPI) - -void gic_init(enum gic_type type, unsigned int nr_cpus); -void gic_irq_enable(unsigned int intid); -void gic_irq_disable(unsigned int intid); -unsigned int gic_get_and_ack_irq(void); -void gic_set_eoi(unsigned int intid); -void gic_set_dir(unsigned int intid); - -/* - * Sets the EOI mode. When split is false, EOI just drops the priority. When - * split is true, EOI drops the priority and deactivates the interrupt. - */ -void gic_set_eoi_split(bool split); -void gic_set_priority_mask(uint64_t mask); -void gic_set_priority(uint32_t intid, uint32_t prio); -void gic_irq_set_active(unsigned int intid); -void gic_irq_clear_active(unsigned int intid); -bool gic_irq_get_active(unsigned int intid); -void gic_irq_set_pending(unsigned int intid); -void gic_irq_clear_pending(unsigned int intid); -bool gic_irq_get_pending(unsigned int intid); -void gic_irq_set_config(unsigned int intid, bool is_edge); - -void gic_rdist_enable_lpis(vm_paddr_t cfg_table, size_t cfg_table_size, - vm_paddr_t pend_table); - -#endif /* SELFTEST_KVM_GIC_H */ diff --git a/tools/testing/selftests/kvm/include/aarch64/gic_v3.h b/tools/testing/selftests/kvm/include/aarch64/gic_v3.h deleted file mode 100644 index a76615fa39a1..000000000000 --- a/tools/testing/selftests/kvm/include/aarch64/gic_v3.h +++ /dev/null @@ -1,604 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2013, 2014 ARM Limited, All Rights Reserved. - * Author: Marc Zyngier - */ -#ifndef __SELFTESTS_GIC_V3_H -#define __SELFTESTS_GIC_V3_H - -/* - * Distributor registers. We assume we're running non-secure, with ARE - * being set. Secure-only and non-ARE registers are not described. - */ -#define GICD_CTLR 0x0000 -#define GICD_TYPER 0x0004 -#define GICD_IIDR 0x0008 -#define GICD_TYPER2 0x000C -#define GICD_STATUSR 0x0010 -#define GICD_SETSPI_NSR 0x0040 -#define GICD_CLRSPI_NSR 0x0048 -#define GICD_SETSPI_SR 0x0050 -#define GICD_CLRSPI_SR 0x0058 -#define GICD_IGROUPR 0x0080 -#define GICD_ISENABLER 0x0100 -#define GICD_ICENABLER 0x0180 -#define GICD_ISPENDR 0x0200 -#define GICD_ICPENDR 0x0280 -#define GICD_ISACTIVER 0x0300 -#define GICD_ICACTIVER 0x0380 -#define GICD_IPRIORITYR 0x0400 -#define GICD_ICFGR 0x0C00 -#define GICD_IGRPMODR 0x0D00 -#define GICD_NSACR 0x0E00 -#define GICD_IGROUPRnE 0x1000 -#define GICD_ISENABLERnE 0x1200 -#define GICD_ICENABLERnE 0x1400 -#define GICD_ISPENDRnE 0x1600 -#define GICD_ICPENDRnE 0x1800 -#define GICD_ISACTIVERnE 0x1A00 -#define GICD_ICACTIVERnE 0x1C00 -#define GICD_IPRIORITYRnE 0x2000 -#define GICD_ICFGRnE 0x3000 -#define GICD_IROUTER 0x6000 -#define GICD_IROUTERnE 0x8000 -#define GICD_IDREGS 0xFFD0 -#define GICD_PIDR2 0xFFE8 - -#define ESPI_BASE_INTID 4096 - -/* - * Those registers are actually from GICv2, but the spec demands that they - * are implemented as RES0 if ARE is 1 (which we do in KVM's emulated GICv3). - */ -#define GICD_ITARGETSR 0x0800 -#define GICD_SGIR 0x0F00 -#define GICD_CPENDSGIR 0x0F10 -#define GICD_SPENDSGIR 0x0F20 - -#define GICD_CTLR_RWP (1U << 31) -#define GICD_CTLR_nASSGIreq (1U << 8) -#define GICD_CTLR_DS (1U << 6) -#define GICD_CTLR_ARE_NS (1U << 4) -#define GICD_CTLR_ENABLE_G1A (1U << 1) -#define GICD_CTLR_ENABLE_G1 (1U << 0) - -#define GICD_IIDR_IMPLEMENTER_SHIFT 0 -#define GICD_IIDR_IMPLEMENTER_MASK (0xfff << GICD_IIDR_IMPLEMENTER_SHIFT) -#define GICD_IIDR_REVISION_SHIFT 12 -#define GICD_IIDR_REVISION_MASK (0xf << GICD_IIDR_REVISION_SHIFT) -#define GICD_IIDR_VARIANT_SHIFT 16 -#define GICD_IIDR_VARIANT_MASK (0xf << GICD_IIDR_VARIANT_SHIFT) -#define GICD_IIDR_PRODUCT_ID_SHIFT 24 -#define GICD_IIDR_PRODUCT_ID_MASK (0xff << GICD_IIDR_PRODUCT_ID_SHIFT) - - -/* - * In systems with a single security state (what we emulate in KVM) - * the meaning of the interrupt group enable bits is slightly different - */ -#define GICD_CTLR_ENABLE_SS_G1 (1U << 1) -#define GICD_CTLR_ENABLE_SS_G0 (1U << 0) - -#define GICD_TYPER_RSS (1U << 26) -#define GICD_TYPER_LPIS (1U << 17) -#define GICD_TYPER_MBIS (1U << 16) -#define GICD_TYPER_ESPI (1U << 8) - -#define GICD_TYPER_ID_BITS(typer) ((((typer) >> 19) & 0x1f) + 1) -#define GICD_TYPER_NUM_LPIS(typer) ((((typer) >> 11) & 0x1f) + 1) -#define GICD_TYPER_SPIS(typer) ((((typer) & 0x1f) + 1) * 32) -#define GICD_TYPER_ESPIS(typer) \ - (((typer) & GICD_TYPER_ESPI) ? GICD_TYPER_SPIS((typer) >> 27) : 0) - -#define GICD_TYPER2_nASSGIcap (1U << 8) -#define GICD_TYPER2_VIL (1U << 7) -#define GICD_TYPER2_VID GENMASK(4, 0) - -#define GICD_IROUTER_SPI_MODE_ONE (0U << 31) -#define GICD_IROUTER_SPI_MODE_ANY (1U << 31) - -#define GIC_PIDR2_ARCH_MASK 0xf0 -#define GIC_PIDR2_ARCH_GICv3 0x30 -#define GIC_PIDR2_ARCH_GICv4 0x40 - -#define GIC_V3_DIST_SIZE 0x10000 - -#define GIC_PAGE_SIZE_4K 0ULL -#define GIC_PAGE_SIZE_16K 1ULL -#define GIC_PAGE_SIZE_64K 2ULL -#define GIC_PAGE_SIZE_MASK 3ULL - -/* - * Re-Distributor registers, offsets from RD_base - */ -#define GICR_CTLR GICD_CTLR -#define GICR_IIDR 0x0004 -#define GICR_TYPER 0x0008 -#define GICR_STATUSR GICD_STATUSR -#define GICR_WAKER 0x0014 -#define GICR_SETLPIR 0x0040 -#define GICR_CLRLPIR 0x0048 -#define GICR_PROPBASER 0x0070 -#define GICR_PENDBASER 0x0078 -#define GICR_INVLPIR 0x00A0 -#define GICR_INVALLR 0x00B0 -#define GICR_SYNCR 0x00C0 -#define GICR_IDREGS GICD_IDREGS -#define GICR_PIDR2 GICD_PIDR2 - -#define GICR_CTLR_ENABLE_LPIS (1UL << 0) -#define GICR_CTLR_CES (1UL << 1) -#define GICR_CTLR_IR (1UL << 2) -#define GICR_CTLR_RWP (1UL << 3) - -#define GICR_TYPER_CPU_NUMBER(r) (((r) >> 8) & 0xffff) - -#define EPPI_BASE_INTID 1056 - -#define GICR_TYPER_NR_PPIS(r) \ - ({ \ - unsigned int __ppinum = ((r) >> 27) & 0x1f; \ - unsigned int __nr_ppis = 16; \ - if (__ppinum == 1 || __ppinum == 2) \ - __nr_ppis += __ppinum * 32; \ - \ - __nr_ppis; \ - }) - -#define GICR_WAKER_ProcessorSleep (1U << 1) -#define GICR_WAKER_ChildrenAsleep (1U << 2) - -#define GIC_BASER_CACHE_nCnB 0ULL -#define GIC_BASER_CACHE_SameAsInner 0ULL -#define GIC_BASER_CACHE_nC 1ULL -#define GIC_BASER_CACHE_RaWt 2ULL -#define GIC_BASER_CACHE_RaWb 3ULL -#define GIC_BASER_CACHE_WaWt 4ULL -#define GIC_BASER_CACHE_WaWb 5ULL -#define GIC_BASER_CACHE_RaWaWt 6ULL -#define GIC_BASER_CACHE_RaWaWb 7ULL -#define GIC_BASER_CACHE_MASK 7ULL -#define GIC_BASER_NonShareable 0ULL -#define GIC_BASER_InnerShareable 1ULL -#define GIC_BASER_OuterShareable 2ULL -#define GIC_BASER_SHAREABILITY_MASK 3ULL - -#define GIC_BASER_CACHEABILITY(reg, inner_outer, type) \ - (GIC_BASER_CACHE_##type << reg##_##inner_outer##_CACHEABILITY_SHIFT) - -#define GIC_BASER_SHAREABILITY(reg, type) \ - (GIC_BASER_##type << reg##_SHAREABILITY_SHIFT) - -/* encode a size field of width @w containing @n - 1 units */ -#define GIC_ENCODE_SZ(n, w) (((unsigned long)(n) - 1) & GENMASK_ULL(((w) - 1), 0)) - -#define GICR_PROPBASER_SHAREABILITY_SHIFT (10) -#define GICR_PROPBASER_INNER_CACHEABILITY_SHIFT (7) -#define GICR_PROPBASER_OUTER_CACHEABILITY_SHIFT (56) -#define GICR_PROPBASER_SHAREABILITY_MASK \ - GIC_BASER_SHAREABILITY(GICR_PROPBASER, SHAREABILITY_MASK) -#define GICR_PROPBASER_INNER_CACHEABILITY_MASK \ - GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, MASK) -#define GICR_PROPBASER_OUTER_CACHEABILITY_MASK \ - GIC_BASER_CACHEABILITY(GICR_PROPBASER, OUTER, MASK) -#define GICR_PROPBASER_CACHEABILITY_MASK GICR_PROPBASER_INNER_CACHEABILITY_MASK - -#define GICR_PROPBASER_InnerShareable \ - GIC_BASER_SHAREABILITY(GICR_PROPBASER, InnerShareable) - -#define GICR_PROPBASER_nCnB GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, nCnB) -#define GICR_PROPBASER_nC GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, nC) -#define GICR_PROPBASER_RaWt GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWt) -#define GICR_PROPBASER_RaWb GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWb) -#define GICR_PROPBASER_WaWt GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, WaWt) -#define GICR_PROPBASER_WaWb GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, WaWb) -#define GICR_PROPBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWaWt) -#define GICR_PROPBASER_RaWaWb GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWaWb) - -#define GICR_PROPBASER_IDBITS_MASK (0x1f) -#define GICR_PROPBASER_ADDRESS(x) ((x) & GENMASK_ULL(51, 12)) -#define GICR_PENDBASER_ADDRESS(x) ((x) & GENMASK_ULL(51, 16)) - -#define GICR_PENDBASER_SHAREABILITY_SHIFT (10) -#define GICR_PENDBASER_INNER_CACHEABILITY_SHIFT (7) -#define GICR_PENDBASER_OUTER_CACHEABILITY_SHIFT (56) -#define GICR_PENDBASER_SHAREABILITY_MASK \ - GIC_BASER_SHAREABILITY(GICR_PENDBASER, SHAREABILITY_MASK) -#define GICR_PENDBASER_INNER_CACHEABILITY_MASK \ - GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, MASK) -#define GICR_PENDBASER_OUTER_CACHEABILITY_MASK \ - GIC_BASER_CACHEABILITY(GICR_PENDBASER, OUTER, MASK) -#define GICR_PENDBASER_CACHEABILITY_MASK GICR_PENDBASER_INNER_CACHEABILITY_MASK - -#define GICR_PENDBASER_InnerShareable \ - GIC_BASER_SHAREABILITY(GICR_PENDBASER, InnerShareable) - -#define GICR_PENDBASER_nCnB GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, nCnB) -#define GICR_PENDBASER_nC GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, nC) -#define GICR_PENDBASER_RaWt GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWt) -#define GICR_PENDBASER_RaWb GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWb) -#define GICR_PENDBASER_WaWt GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, WaWt) -#define GICR_PENDBASER_WaWb GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, WaWb) -#define GICR_PENDBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWaWt) -#define GICR_PENDBASER_RaWaWb GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWaWb) - -#define GICR_PENDBASER_PTZ BIT_ULL(62) - -/* - * Re-Distributor registers, offsets from SGI_base - */ -#define GICR_IGROUPR0 GICD_IGROUPR -#define GICR_ISENABLER0 GICD_ISENABLER -#define GICR_ICENABLER0 GICD_ICENABLER -#define GICR_ISPENDR0 GICD_ISPENDR -#define GICR_ICPENDR0 GICD_ICPENDR -#define GICR_ISACTIVER0 GICD_ISACTIVER -#define GICR_ICACTIVER0 GICD_ICACTIVER -#define GICR_IPRIORITYR0 GICD_IPRIORITYR -#define GICR_ICFGR0 GICD_ICFGR -#define GICR_IGRPMODR0 GICD_IGRPMODR -#define GICR_NSACR GICD_NSACR - -#define GICR_TYPER_PLPIS (1U << 0) -#define GICR_TYPER_VLPIS (1U << 1) -#define GICR_TYPER_DIRTY (1U << 2) -#define GICR_TYPER_DirectLPIS (1U << 3) -#define GICR_TYPER_LAST (1U << 4) -#define GICR_TYPER_RVPEID (1U << 7) -#define GICR_TYPER_COMMON_LPI_AFF GENMASK_ULL(25, 24) -#define GICR_TYPER_AFFINITY GENMASK_ULL(63, 32) - -#define GICR_INVLPIR_INTID GENMASK_ULL(31, 0) -#define GICR_INVLPIR_VPEID GENMASK_ULL(47, 32) -#define GICR_INVLPIR_V GENMASK_ULL(63, 63) - -#define GICR_INVALLR_VPEID GICR_INVLPIR_VPEID -#define GICR_INVALLR_V GICR_INVLPIR_V - -#define GIC_V3_REDIST_SIZE 0x20000 - -#define LPI_PROP_GROUP1 (1 << 1) -#define LPI_PROP_ENABLED (1 << 0) - -/* - * Re-Distributor registers, offsets from VLPI_base - */ -#define GICR_VPROPBASER 0x0070 - -#define GICR_VPROPBASER_IDBITS_MASK 0x1f - -#define GICR_VPROPBASER_SHAREABILITY_SHIFT (10) -#define GICR_VPROPBASER_INNER_CACHEABILITY_SHIFT (7) -#define GICR_VPROPBASER_OUTER_CACHEABILITY_SHIFT (56) - -#define GICR_VPROPBASER_SHAREABILITY_MASK \ - GIC_BASER_SHAREABILITY(GICR_VPROPBASER, SHAREABILITY_MASK) -#define GICR_VPROPBASER_INNER_CACHEABILITY_MASK \ - GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, MASK) -#define GICR_VPROPBASER_OUTER_CACHEABILITY_MASK \ - GIC_BASER_CACHEABILITY(GICR_VPROPBASER, OUTER, MASK) -#define GICR_VPROPBASER_CACHEABILITY_MASK \ - GICR_VPROPBASER_INNER_CACHEABILITY_MASK - -#define GICR_VPROPBASER_InnerShareable \ - GIC_BASER_SHAREABILITY(GICR_VPROPBASER, InnerShareable) - -#define GICR_VPROPBASER_nCnB GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, nCnB) -#define GICR_VPROPBASER_nC GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, nC) -#define GICR_VPROPBASER_RaWt GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWt) -#define GICR_VPROPBASER_RaWb GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWb) -#define GICR_VPROPBASER_WaWt GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, WaWt) -#define GICR_VPROPBASER_WaWb GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, WaWb) -#define GICR_VPROPBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWaWt) -#define GICR_VPROPBASER_RaWaWb GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWaWb) - -/* - * GICv4.1 VPROPBASER reinvention. A subtle mix between the old - * VPROPBASER and ITS_BASER. Just not quite any of the two. - */ -#define GICR_VPROPBASER_4_1_VALID (1ULL << 63) -#define GICR_VPROPBASER_4_1_ENTRY_SIZE GENMASK_ULL(61, 59) -#define GICR_VPROPBASER_4_1_INDIRECT (1ULL << 55) -#define GICR_VPROPBASER_4_1_PAGE_SIZE GENMASK_ULL(54, 53) -#define GICR_VPROPBASER_4_1_Z (1ULL << 52) -#define GICR_VPROPBASER_4_1_ADDR GENMASK_ULL(51, 12) -#define GICR_VPROPBASER_4_1_SIZE GENMASK_ULL(6, 0) - -#define GICR_VPENDBASER 0x0078 - -#define GICR_VPENDBASER_SHAREABILITY_SHIFT (10) -#define GICR_VPENDBASER_INNER_CACHEABILITY_SHIFT (7) -#define GICR_VPENDBASER_OUTER_CACHEABILITY_SHIFT (56) -#define GICR_VPENDBASER_SHAREABILITY_MASK \ - GIC_BASER_SHAREABILITY(GICR_VPENDBASER, SHAREABILITY_MASK) -#define GICR_VPENDBASER_INNER_CACHEABILITY_MASK \ - GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, MASK) -#define GICR_VPENDBASER_OUTER_CACHEABILITY_MASK \ - GIC_BASER_CACHEABILITY(GICR_VPENDBASER, OUTER, MASK) -#define GICR_VPENDBASER_CACHEABILITY_MASK \ - GICR_VPENDBASER_INNER_CACHEABILITY_MASK - -#define GICR_VPENDBASER_NonShareable \ - GIC_BASER_SHAREABILITY(GICR_VPENDBASER, NonShareable) - -#define GICR_VPENDBASER_InnerShareable \ - GIC_BASER_SHAREABILITY(GICR_VPENDBASER, InnerShareable) - -#define GICR_VPENDBASER_nCnB GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, nCnB) -#define GICR_VPENDBASER_nC GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, nC) -#define GICR_VPENDBASER_RaWt GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWt) -#define GICR_VPENDBASER_RaWb GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWb) -#define GICR_VPENDBASER_WaWt GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, WaWt) -#define GICR_VPENDBASER_WaWb GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, WaWb) -#define GICR_VPENDBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWaWt) -#define GICR_VPENDBASER_RaWaWb GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWaWb) - -#define GICR_VPENDBASER_Dirty (1ULL << 60) -#define GICR_VPENDBASER_PendingLast (1ULL << 61) -#define GICR_VPENDBASER_IDAI (1ULL << 62) -#define GICR_VPENDBASER_Valid (1ULL << 63) - -/* - * GICv4.1 VPENDBASER, used for VPE residency. On top of these fields, - * also use the above Valid, PendingLast and Dirty. - */ -#define GICR_VPENDBASER_4_1_DB (1ULL << 62) -#define GICR_VPENDBASER_4_1_VGRP0EN (1ULL << 59) -#define GICR_VPENDBASER_4_1_VGRP1EN (1ULL << 58) -#define GICR_VPENDBASER_4_1_VPEID GENMASK_ULL(15, 0) - -#define GICR_VSGIR 0x0080 - -#define GICR_VSGIR_VPEID GENMASK(15, 0) - -#define GICR_VSGIPENDR 0x0088 - -#define GICR_VSGIPENDR_BUSY (1U << 31) -#define GICR_VSGIPENDR_PENDING GENMASK(15, 0) - -/* - * ITS registers, offsets from ITS_base - */ -#define GITS_CTLR 0x0000 -#define GITS_IIDR 0x0004 -#define GITS_TYPER 0x0008 -#define GITS_MPIDR 0x0018 -#define GITS_CBASER 0x0080 -#define GITS_CWRITER 0x0088 -#define GITS_CREADR 0x0090 -#define GITS_BASER 0x0100 -#define GITS_IDREGS_BASE 0xffd0 -#define GITS_PIDR0 0xffe0 -#define GITS_PIDR1 0xffe4 -#define GITS_PIDR2 GICR_PIDR2 -#define GITS_PIDR4 0xffd0 -#define GITS_CIDR0 0xfff0 -#define GITS_CIDR1 0xfff4 -#define GITS_CIDR2 0xfff8 -#define GITS_CIDR3 0xfffc - -#define GITS_TRANSLATER 0x10040 - -#define GITS_SGIR 0x20020 - -#define GITS_SGIR_VPEID GENMASK_ULL(47, 32) -#define GITS_SGIR_VINTID GENMASK_ULL(3, 0) - -#define GITS_CTLR_ENABLE (1U << 0) -#define GITS_CTLR_ImDe (1U << 1) -#define GITS_CTLR_ITS_NUMBER_SHIFT 4 -#define GITS_CTLR_ITS_NUMBER (0xFU << GITS_CTLR_ITS_NUMBER_SHIFT) -#define GITS_CTLR_QUIESCENT (1U << 31) - -#define GITS_TYPER_PLPIS (1UL << 0) -#define GITS_TYPER_VLPIS (1UL << 1) -#define GITS_TYPER_ITT_ENTRY_SIZE_SHIFT 4 -#define GITS_TYPER_ITT_ENTRY_SIZE GENMASK_ULL(7, 4) -#define GITS_TYPER_IDBITS_SHIFT 8 -#define GITS_TYPER_DEVBITS_SHIFT 13 -#define GITS_TYPER_DEVBITS GENMASK_ULL(17, 13) -#define GITS_TYPER_PTA (1UL << 19) -#define GITS_TYPER_HCC_SHIFT 24 -#define GITS_TYPER_HCC(r) (((r) >> GITS_TYPER_HCC_SHIFT) & 0xff) -#define GITS_TYPER_VMOVP (1ULL << 37) -#define GITS_TYPER_VMAPP (1ULL << 40) -#define GITS_TYPER_SVPET GENMASK_ULL(42, 41) - -#define GITS_IIDR_REV_SHIFT 12 -#define GITS_IIDR_REV_MASK (0xf << GITS_IIDR_REV_SHIFT) -#define GITS_IIDR_REV(r) (((r) >> GITS_IIDR_REV_SHIFT) & 0xf) -#define GITS_IIDR_PRODUCTID_SHIFT 24 - -#define GITS_CBASER_VALID (1ULL << 63) -#define GITS_CBASER_SHAREABILITY_SHIFT (10) -#define GITS_CBASER_INNER_CACHEABILITY_SHIFT (59) -#define GITS_CBASER_OUTER_CACHEABILITY_SHIFT (53) -#define GITS_CBASER_SHAREABILITY_MASK \ - GIC_BASER_SHAREABILITY(GITS_CBASER, SHAREABILITY_MASK) -#define GITS_CBASER_INNER_CACHEABILITY_MASK \ - GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, MASK) -#define GITS_CBASER_OUTER_CACHEABILITY_MASK \ - GIC_BASER_CACHEABILITY(GITS_CBASER, OUTER, MASK) -#define GITS_CBASER_CACHEABILITY_MASK GITS_CBASER_INNER_CACHEABILITY_MASK - -#define GITS_CBASER_InnerShareable \ - GIC_BASER_SHAREABILITY(GITS_CBASER, InnerShareable) - -#define GITS_CBASER_nCnB GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, nCnB) -#define GITS_CBASER_nC GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, nC) -#define GITS_CBASER_RaWt GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWt) -#define GITS_CBASER_RaWb GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWb) -#define GITS_CBASER_WaWt GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, WaWt) -#define GITS_CBASER_WaWb GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, WaWb) -#define GITS_CBASER_RaWaWt GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWaWt) -#define GITS_CBASER_RaWaWb GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWaWb) - -#define GITS_CBASER_ADDRESS(cbaser) ((cbaser) & GENMASK_ULL(51, 12)) - -#define GITS_BASER_NR_REGS 8 - -#define GITS_BASER_VALID (1ULL << 63) -#define GITS_BASER_INDIRECT (1ULL << 62) - -#define GITS_BASER_INNER_CACHEABILITY_SHIFT (59) -#define GITS_BASER_OUTER_CACHEABILITY_SHIFT (53) -#define GITS_BASER_INNER_CACHEABILITY_MASK \ - GIC_BASER_CACHEABILITY(GITS_BASER, INNER, MASK) -#define GITS_BASER_CACHEABILITY_MASK GITS_BASER_INNER_CACHEABILITY_MASK -#define GITS_BASER_OUTER_CACHEABILITY_MASK \ - GIC_BASER_CACHEABILITY(GITS_BASER, OUTER, MASK) -#define GITS_BASER_SHAREABILITY_MASK \ - GIC_BASER_SHAREABILITY(GITS_BASER, SHAREABILITY_MASK) - -#define GITS_BASER_nCnB GIC_BASER_CACHEABILITY(GITS_BASER, INNER, nCnB) -#define GITS_BASER_nC GIC_BASER_CACHEABILITY(GITS_BASER, INNER, nC) -#define GITS_BASER_RaWt GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWt) -#define GITS_BASER_RaWb GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWb) -#define GITS_BASER_WaWt GIC_BASER_CACHEABILITY(GITS_BASER, INNER, WaWt) -#define GITS_BASER_WaWb GIC_BASER_CACHEABILITY(GITS_BASER, INNER, WaWb) -#define GITS_BASER_RaWaWt GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWaWt) -#define GITS_BASER_RaWaWb GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWaWb) - -#define GITS_BASER_TYPE_SHIFT (56) -#define GITS_BASER_TYPE(r) (((r) >> GITS_BASER_TYPE_SHIFT) & 7) -#define GITS_BASER_ENTRY_SIZE_SHIFT (48) -#define GITS_BASER_ENTRY_SIZE(r) ((((r) >> GITS_BASER_ENTRY_SIZE_SHIFT) & 0x1f) + 1) -#define GITS_BASER_ENTRY_SIZE_MASK GENMASK_ULL(52, 48) -#define GITS_BASER_PHYS_52_to_48(phys) \ - (((phys) & GENMASK_ULL(47, 16)) | (((phys) >> 48) & 0xf) << 12) -#define GITS_BASER_ADDR_48_to_52(baser) \ - (((baser) & GENMASK_ULL(47, 16)) | (((baser) >> 12) & 0xf) << 48) - -#define GITS_BASER_SHAREABILITY_SHIFT (10) -#define GITS_BASER_InnerShareable \ - GIC_BASER_SHAREABILITY(GITS_BASER, InnerShareable) -#define GITS_BASER_PAGE_SIZE_SHIFT (8) -#define __GITS_BASER_PSZ(sz) (GIC_PAGE_SIZE_ ## sz << GITS_BASER_PAGE_SIZE_SHIFT) -#define GITS_BASER_PAGE_SIZE_4K __GITS_BASER_PSZ(4K) -#define GITS_BASER_PAGE_SIZE_16K __GITS_BASER_PSZ(16K) -#define GITS_BASER_PAGE_SIZE_64K __GITS_BASER_PSZ(64K) -#define GITS_BASER_PAGE_SIZE_MASK __GITS_BASER_PSZ(MASK) -#define GITS_BASER_PAGES_MAX 256 -#define GITS_BASER_PAGES_SHIFT (0) -#define GITS_BASER_NR_PAGES(r) (((r) & 0xff) + 1) - -#define GITS_BASER_TYPE_NONE 0 -#define GITS_BASER_TYPE_DEVICE 1 -#define GITS_BASER_TYPE_VCPU 2 -#define GITS_BASER_TYPE_RESERVED3 3 -#define GITS_BASER_TYPE_COLLECTION 4 -#define GITS_BASER_TYPE_RESERVED5 5 -#define GITS_BASER_TYPE_RESERVED6 6 -#define GITS_BASER_TYPE_RESERVED7 7 - -#define GITS_LVL1_ENTRY_SIZE (8UL) - -/* - * ITS commands - */ -#define GITS_CMD_MAPD 0x08 -#define GITS_CMD_MAPC 0x09 -#define GITS_CMD_MAPTI 0x0a -#define GITS_CMD_MAPI 0x0b -#define GITS_CMD_MOVI 0x01 -#define GITS_CMD_DISCARD 0x0f -#define GITS_CMD_INV 0x0c -#define GITS_CMD_MOVALL 0x0e -#define GITS_CMD_INVALL 0x0d -#define GITS_CMD_INT 0x03 -#define GITS_CMD_CLEAR 0x04 -#define GITS_CMD_SYNC 0x05 - -/* - * GICv4 ITS specific commands - */ -#define GITS_CMD_GICv4(x) ((x) | 0x20) -#define GITS_CMD_VINVALL GITS_CMD_GICv4(GITS_CMD_INVALL) -#define GITS_CMD_VMAPP GITS_CMD_GICv4(GITS_CMD_MAPC) -#define GITS_CMD_VMAPTI GITS_CMD_GICv4(GITS_CMD_MAPTI) -#define GITS_CMD_VMOVI GITS_CMD_GICv4(GITS_CMD_MOVI) -#define GITS_CMD_VSYNC GITS_CMD_GICv4(GITS_CMD_SYNC) -/* VMOVP, VSGI and INVDB are the odd ones, as they dont have a physical counterpart */ -#define GITS_CMD_VMOVP GITS_CMD_GICv4(2) -#define GITS_CMD_VSGI GITS_CMD_GICv4(3) -#define GITS_CMD_INVDB GITS_CMD_GICv4(0xe) - -/* - * ITS error numbers - */ -#define E_ITS_MOVI_UNMAPPED_INTERRUPT 0x010107 -#define E_ITS_MOVI_UNMAPPED_COLLECTION 0x010109 -#define E_ITS_INT_UNMAPPED_INTERRUPT 0x010307 -#define E_ITS_CLEAR_UNMAPPED_INTERRUPT 0x010507 -#define E_ITS_MAPD_DEVICE_OOR 0x010801 -#define E_ITS_MAPD_ITTSIZE_OOR 0x010802 -#define E_ITS_MAPC_PROCNUM_OOR 0x010902 -#define E_ITS_MAPC_COLLECTION_OOR 0x010903 -#define E_ITS_MAPTI_UNMAPPED_DEVICE 0x010a04 -#define E_ITS_MAPTI_ID_OOR 0x010a05 -#define E_ITS_MAPTI_PHYSICALID_OOR 0x010a06 -#define E_ITS_INV_UNMAPPED_INTERRUPT 0x010c07 -#define E_ITS_INVALL_UNMAPPED_COLLECTION 0x010d09 -#define E_ITS_MOVALL_PROCNUM_OOR 0x010e01 -#define E_ITS_DISCARD_UNMAPPED_INTERRUPT 0x010f07 - -/* - * CPU interface registers - */ -#define ICC_CTLR_EL1_EOImode_SHIFT (1) -#define ICC_CTLR_EL1_EOImode_drop_dir (0U << ICC_CTLR_EL1_EOImode_SHIFT) -#define ICC_CTLR_EL1_EOImode_drop (1U << ICC_CTLR_EL1_EOImode_SHIFT) -#define ICC_CTLR_EL1_EOImode_MASK (1 << ICC_CTLR_EL1_EOImode_SHIFT) -#define ICC_CTLR_EL1_CBPR_SHIFT 0 -#define ICC_CTLR_EL1_CBPR_MASK (1 << ICC_CTLR_EL1_CBPR_SHIFT) -#define ICC_CTLR_EL1_PMHE_SHIFT 6 -#define ICC_CTLR_EL1_PMHE_MASK (1 << ICC_CTLR_EL1_PMHE_SHIFT) -#define ICC_CTLR_EL1_PRI_BITS_SHIFT 8 -#define ICC_CTLR_EL1_PRI_BITS_MASK (0x7 << ICC_CTLR_EL1_PRI_BITS_SHIFT) -#define ICC_CTLR_EL1_ID_BITS_SHIFT 11 -#define ICC_CTLR_EL1_ID_BITS_MASK (0x7 << ICC_CTLR_EL1_ID_BITS_SHIFT) -#define ICC_CTLR_EL1_SEIS_SHIFT 14 -#define ICC_CTLR_EL1_SEIS_MASK (0x1 << ICC_CTLR_EL1_SEIS_SHIFT) -#define ICC_CTLR_EL1_A3V_SHIFT 15 -#define ICC_CTLR_EL1_A3V_MASK (0x1 << ICC_CTLR_EL1_A3V_SHIFT) -#define ICC_CTLR_EL1_RSS (0x1 << 18) -#define ICC_CTLR_EL1_ExtRange (0x1 << 19) -#define ICC_PMR_EL1_SHIFT 0 -#define ICC_PMR_EL1_MASK (0xff << ICC_PMR_EL1_SHIFT) -#define ICC_BPR0_EL1_SHIFT 0 -#define ICC_BPR0_EL1_MASK (0x7 << ICC_BPR0_EL1_SHIFT) -#define ICC_BPR1_EL1_SHIFT 0 -#define ICC_BPR1_EL1_MASK (0x7 << ICC_BPR1_EL1_SHIFT) -#define ICC_IGRPEN0_EL1_SHIFT 0 -#define ICC_IGRPEN0_EL1_MASK (1 << ICC_IGRPEN0_EL1_SHIFT) -#define ICC_IGRPEN1_EL1_SHIFT 0 -#define ICC_IGRPEN1_EL1_MASK (1 << ICC_IGRPEN1_EL1_SHIFT) -#define ICC_SRE_EL1_DIB (1U << 2) -#define ICC_SRE_EL1_DFB (1U << 1) -#define ICC_SRE_EL1_SRE (1U << 0) - -/* These are for GICv2 emulation only */ -#define GICH_LR_VIRTUALID (0x3ffUL << 0) -#define GICH_LR_PHYSID_CPUID_SHIFT (10) -#define GICH_LR_PHYSID_CPUID (7UL << GICH_LR_PHYSID_CPUID_SHIFT) - -#define ICC_IAR1_EL1_SPURIOUS 0x3ff - -#define ICC_SRE_EL2_SRE (1 << 0) -#define ICC_SRE_EL2_ENABLE (1 << 3) - -#define ICC_SGI1R_TARGET_LIST_SHIFT 0 -#define ICC_SGI1R_TARGET_LIST_MASK (0xffff << ICC_SGI1R_TARGET_LIST_SHIFT) -#define ICC_SGI1R_AFFINITY_1_SHIFT 16 -#define ICC_SGI1R_AFFINITY_1_MASK (0xff << ICC_SGI1R_AFFINITY_1_SHIFT) -#define ICC_SGI1R_SGI_ID_SHIFT 24 -#define ICC_SGI1R_SGI_ID_MASK (0xfULL << ICC_SGI1R_SGI_ID_SHIFT) -#define ICC_SGI1R_AFFINITY_2_SHIFT 32 -#define ICC_SGI1R_AFFINITY_2_MASK (0xffULL << ICC_SGI1R_AFFINITY_2_SHIFT) -#define ICC_SGI1R_IRQ_ROUTING_MODE_BIT 40 -#define ICC_SGI1R_RS_SHIFT 44 -#define ICC_SGI1R_RS_MASK (0xfULL << ICC_SGI1R_RS_SHIFT) -#define ICC_SGI1R_AFFINITY_3_SHIFT 48 -#define ICC_SGI1R_AFFINITY_3_MASK (0xffULL << ICC_SGI1R_AFFINITY_3_SHIFT) - -#endif diff --git a/tools/testing/selftests/kvm/include/aarch64/gic_v3_its.h b/tools/testing/selftests/kvm/include/aarch64/gic_v3_its.h deleted file mode 100644 index 3722ed9c8f96..000000000000 --- a/tools/testing/selftests/kvm/include/aarch64/gic_v3_its.h +++ /dev/null @@ -1,19 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -#ifndef __SELFTESTS_GIC_V3_ITS_H__ -#define __SELFTESTS_GIC_V3_ITS_H__ - -#include - -void its_init(vm_paddr_t coll_tbl, size_t coll_tbl_sz, - vm_paddr_t device_tbl, size_t device_tbl_sz, - vm_paddr_t cmdq, size_t cmdq_size); - -void its_send_mapd_cmd(void *cmdq_base, u32 device_id, vm_paddr_t itt_base, - size_t itt_size, bool valid); -void its_send_mapc_cmd(void *cmdq_base, u32 vcpu_id, u32 collection_id, bool valid); -void its_send_mapti_cmd(void *cmdq_base, u32 device_id, u32 event_id, - u32 collection_id, u32 intid); -void its_send_invall_cmd(void *cmdq_base, u32 collection_id); - -#endif // __SELFTESTS_GIC_V3_ITS_H__ diff --git a/tools/testing/selftests/kvm/include/aarch64/kvm_util_arch.h b/tools/testing/selftests/kvm/include/aarch64/kvm_util_arch.h deleted file mode 100644 index e43a57d99b56..000000000000 --- a/tools/testing/selftests/kvm/include/aarch64/kvm_util_arch.h +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -#ifndef SELFTEST_KVM_UTIL_ARCH_H -#define SELFTEST_KVM_UTIL_ARCH_H - -struct kvm_vm_arch {}; - -#endif // SELFTEST_KVM_UTIL_ARCH_H diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h deleted file mode 100644 index 1e8d0d531fbd..000000000000 --- a/tools/testing/selftests/kvm/include/aarch64/processor.h +++ /dev/null @@ -1,238 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * AArch64 processor specific defines - * - * Copyright (C) 2018, Red Hat, Inc. - */ -#ifndef SELFTEST_KVM_PROCESSOR_H -#define SELFTEST_KVM_PROCESSOR_H - -#include "kvm_util.h" -#include "ucall_common.h" - -#include -#include -#include -#include -#include - - -#define ARM64_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \ - KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x)) - -/* - * KVM_ARM64_SYS_REG(sys_reg_id): Helper macro to convert - * SYS_* register definitions in asm/sysreg.h to use in KVM - * calls such as vcpu_get_reg() and vcpu_set_reg(). - */ -#define KVM_ARM64_SYS_REG(sys_reg_id) \ - ARM64_SYS_REG(sys_reg_Op0(sys_reg_id), \ - sys_reg_Op1(sys_reg_id), \ - sys_reg_CRn(sys_reg_id), \ - sys_reg_CRm(sys_reg_id), \ - sys_reg_Op2(sys_reg_id)) - -/* - * Default MAIR - * index attribute - * DEVICE_nGnRnE 0 0000:0000 - * DEVICE_nGnRE 1 0000:0100 - * DEVICE_GRE 2 0000:1100 - * NORMAL_NC 3 0100:0100 - * NORMAL 4 1111:1111 - * NORMAL_WT 5 1011:1011 - */ - -/* Linux doesn't use these memory types, so let's define them. */ -#define MAIR_ATTR_DEVICE_GRE UL(0x0c) -#define MAIR_ATTR_NORMAL_WT UL(0xbb) - -#define MT_DEVICE_nGnRnE 0 -#define MT_DEVICE_nGnRE 1 -#define MT_DEVICE_GRE 2 -#define MT_NORMAL_NC 3 -#define MT_NORMAL 4 -#define MT_NORMAL_WT 5 - -#define DEFAULT_MAIR_EL1 \ - (MAIR_ATTRIDX(MAIR_ATTR_DEVICE_nGnRnE, MT_DEVICE_nGnRnE) | \ - MAIR_ATTRIDX(MAIR_ATTR_DEVICE_nGnRE, MT_DEVICE_nGnRE) | \ - MAIR_ATTRIDX(MAIR_ATTR_DEVICE_GRE, MT_DEVICE_GRE) | \ - MAIR_ATTRIDX(MAIR_ATTR_NORMAL_NC, MT_NORMAL_NC) | \ - MAIR_ATTRIDX(MAIR_ATTR_NORMAL, MT_NORMAL) | \ - MAIR_ATTRIDX(MAIR_ATTR_NORMAL_WT, MT_NORMAL_WT)) - -void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init); -struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, - struct kvm_vcpu_init *init, void *guest_code); - -struct ex_regs { - u64 regs[31]; - u64 sp; - u64 pc; - u64 pstate; -}; - -#define VECTOR_NUM 16 - -enum { - VECTOR_SYNC_CURRENT_SP0, - VECTOR_IRQ_CURRENT_SP0, - VECTOR_FIQ_CURRENT_SP0, - VECTOR_ERROR_CURRENT_SP0, - - VECTOR_SYNC_CURRENT, - VECTOR_IRQ_CURRENT, - VECTOR_FIQ_CURRENT, - VECTOR_ERROR_CURRENT, - - VECTOR_SYNC_LOWER_64, - VECTOR_IRQ_LOWER_64, - VECTOR_FIQ_LOWER_64, - VECTOR_ERROR_LOWER_64, - - VECTOR_SYNC_LOWER_32, - VECTOR_IRQ_LOWER_32, - VECTOR_FIQ_LOWER_32, - VECTOR_ERROR_LOWER_32, -}; - -#define VECTOR_IS_SYNC(v) ((v) == VECTOR_SYNC_CURRENT_SP0 || \ - (v) == VECTOR_SYNC_CURRENT || \ - (v) == VECTOR_SYNC_LOWER_64 || \ - (v) == VECTOR_SYNC_LOWER_32) - -/* Access flag */ -#define PTE_AF (1ULL << 10) - -/* Access flag update enable/disable */ -#define TCR_EL1_HA (1ULL << 39) - -void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k, - uint32_t *ipa16k, uint32_t *ipa64k); - -void vm_init_descriptor_tables(struct kvm_vm *vm); -void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu); - -typedef void(*handler_fn)(struct ex_regs *); -void vm_install_exception_handler(struct kvm_vm *vm, - int vector, handler_fn handler); -void vm_install_sync_handler(struct kvm_vm *vm, - int vector, int ec, handler_fn handler); - -uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva); - -static inline void cpu_relax(void) -{ - asm volatile("yield" ::: "memory"); -} - -#define isb() asm volatile("isb" : : : "memory") -#define dsb(opt) asm volatile("dsb " #opt : : : "memory") -#define dmb(opt) asm volatile("dmb " #opt : : : "memory") - -#define dma_wmb() dmb(oshst) -#define __iowmb() dma_wmb() - -#define dma_rmb() dmb(oshld) - -#define __iormb(v) \ -({ \ - unsigned long tmp; \ - \ - dma_rmb(); \ - \ - /* \ - * Courtesy of arch/arm64/include/asm/io.h: \ - * Create a dummy control dependency from the IO read to any \ - * later instructions. This ensures that a subsequent call \ - * to udelay() will be ordered due to the ISB in __delay(). \ - */ \ - asm volatile("eor %0, %1, %1\n" \ - "cbnz %0, ." \ - : "=r" (tmp) : "r" ((unsigned long)(v)) \ - : "memory"); \ -}) - -static __always_inline void __raw_writel(u32 val, volatile void *addr) -{ - asm volatile("str %w0, [%1]" : : "rZ" (val), "r" (addr)); -} - -static __always_inline u32 __raw_readl(const volatile void *addr) -{ - u32 val; - asm volatile("ldr %w0, [%1]" : "=r" (val) : "r" (addr)); - return val; -} - -static __always_inline void __raw_writeq(u64 val, volatile void *addr) -{ - asm volatile("str %0, [%1]" : : "rZ" (val), "r" (addr)); -} - -static __always_inline u64 __raw_readq(const volatile void *addr) -{ - u64 val; - asm volatile("ldr %0, [%1]" : "=r" (val) : "r" (addr)); - return val; -} - -#define writel_relaxed(v,c) ((void)__raw_writel((__force u32)cpu_to_le32(v),(c))) -#define readl_relaxed(c) ({ u32 __r = le32_to_cpu((__force __le32)__raw_readl(c)); __r; }) -#define writeq_relaxed(v,c) ((void)__raw_writeq((__force u64)cpu_to_le64(v),(c))) -#define readq_relaxed(c) ({ u64 __r = le64_to_cpu((__force __le64)__raw_readq(c)); __r; }) - -#define writel(v,c) ({ __iowmb(); writel_relaxed((v),(c));}) -#define readl(c) ({ u32 __v = readl_relaxed(c); __iormb(__v); __v; }) -#define writeq(v,c) ({ __iowmb(); writeq_relaxed((v),(c));}) -#define readq(c) ({ u64 __v = readq_relaxed(c); __iormb(__v); __v; }) - - -static inline void local_irq_enable(void) -{ - asm volatile("msr daifclr, #3" : : : "memory"); -} - -static inline void local_irq_disable(void) -{ - asm volatile("msr daifset, #3" : : : "memory"); -} - -/** - * struct arm_smccc_res - Result from SMC/HVC call - * @a0-a3 result values from registers 0 to 3 - */ -struct arm_smccc_res { - unsigned long a0; - unsigned long a1; - unsigned long a2; - unsigned long a3; -}; - -/** - * smccc_hvc - Invoke a SMCCC function using the hvc conduit - * @function_id: the SMCCC function to be called - * @arg0-arg6: SMCCC function arguments, corresponding to registers x1-x7 - * @res: pointer to write the return values from registers x0-x3 - * - */ -void smccc_hvc(uint32_t function_id, uint64_t arg0, uint64_t arg1, - uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5, - uint64_t arg6, struct arm_smccc_res *res); - -/** - * smccc_smc - Invoke a SMCCC function using the smc conduit - * @function_id: the SMCCC function to be called - * @arg0-arg6: SMCCC function arguments, corresponding to registers x1-x7 - * @res: pointer to write the return values from registers x0-x3 - * - */ -void smccc_smc(uint32_t function_id, uint64_t arg0, uint64_t arg1, - uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5, - uint64_t arg6, struct arm_smccc_res *res); - -/* Execute a Wait For Interrupt instruction. */ -void wfi(void); - -#endif /* SELFTEST_KVM_PROCESSOR_H */ diff --git a/tools/testing/selftests/kvm/include/aarch64/spinlock.h b/tools/testing/selftests/kvm/include/aarch64/spinlock.h deleted file mode 100644 index cf0984106d14..000000000000 --- a/tools/testing/selftests/kvm/include/aarch64/spinlock.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -#ifndef SELFTEST_KVM_ARM64_SPINLOCK_H -#define SELFTEST_KVM_ARM64_SPINLOCK_H - -struct spinlock { - int v; -}; - -extern void spin_lock(struct spinlock *lock); -extern void spin_unlock(struct spinlock *lock); - -#endif /* SELFTEST_KVM_ARM64_SPINLOCK_H */ diff --git a/tools/testing/selftests/kvm/include/aarch64/ucall.h b/tools/testing/selftests/kvm/include/aarch64/ucall.h deleted file mode 100644 index 4ec801f37f00..000000000000 --- a/tools/testing/selftests/kvm/include/aarch64/ucall.h +++ /dev/null @@ -1,20 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -#ifndef SELFTEST_KVM_UCALL_H -#define SELFTEST_KVM_UCALL_H - -#include "kvm_util.h" - -#define UCALL_EXIT_REASON KVM_EXIT_MMIO - -/* - * ucall_exit_mmio_addr holds per-VM values (global data is duplicated by each - * VM), it must not be accessed from host code. - */ -extern vm_vaddr_t *ucall_exit_mmio_addr; - -static inline void ucall_arch_do_ucall(vm_vaddr_t uc) -{ - WRITE_ONCE(*ucall_exit_mmio_addr, uc); -} - -#endif diff --git a/tools/testing/selftests/kvm/include/aarch64/vgic.h b/tools/testing/selftests/kvm/include/aarch64/vgic.h deleted file mode 100644 index c481d0c00a5d..000000000000 --- a/tools/testing/selftests/kvm/include/aarch64/vgic.h +++ /dev/null @@ -1,37 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * ARM Generic Interrupt Controller (GIC) host specific defines - */ - -#ifndef SELFTEST_KVM_VGIC_H -#define SELFTEST_KVM_VGIC_H - -#include - -#include "kvm_util.h" - -#define REDIST_REGION_ATTR_ADDR(count, base, flags, index) \ - (((uint64_t)(count) << 52) | \ - ((uint64_t)((base) >> 16) << 16) | \ - ((uint64_t)(flags) << 12) | \ - index) - -int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs); - -#define VGIC_MAX_RESERVED 1023 - -void kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level); -int _kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level); - -void kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level); -int _kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level); - -/* The vcpu arg only applies to private interrupts. */ -void kvm_irq_write_ispendr(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu); -void kvm_irq_write_isactiver(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu); - -#define KVM_IRQCHIP_NUM_PINS (1020 - 32) - -int vgic_its_setup(struct kvm_vm *vm); - -#endif // SELFTEST_KVM_VGIC_H diff --git a/tools/testing/selftests/kvm/include/arm64/arch_timer.h b/tools/testing/selftests/kvm/include/arm64/arch_timer.h new file mode 100644 index 000000000000..bf461de34785 --- /dev/null +++ b/tools/testing/selftests/kvm/include/arm64/arch_timer.h @@ -0,0 +1,158 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * ARM Generic Timer specific interface + */ + +#ifndef SELFTEST_KVM_ARCH_TIMER_H +#define SELFTEST_KVM_ARCH_TIMER_H + +#include "processor.h" + +enum arch_timer { + VIRTUAL, + PHYSICAL, +}; + +#define CTL_ENABLE (1 << 0) +#define CTL_IMASK (1 << 1) +#define CTL_ISTATUS (1 << 2) + +#define msec_to_cycles(msec) \ + (timer_get_cntfrq() * (uint64_t)(msec) / 1000) + +#define usec_to_cycles(usec) \ + (timer_get_cntfrq() * (uint64_t)(usec) / 1000000) + +#define cycles_to_usec(cycles) \ + ((uint64_t)(cycles) * 1000000 / timer_get_cntfrq()) + +static inline uint32_t timer_get_cntfrq(void) +{ + return read_sysreg(cntfrq_el0); +} + +static inline uint64_t timer_get_cntct(enum arch_timer timer) +{ + isb(); + + switch (timer) { + case VIRTUAL: + return read_sysreg(cntvct_el0); + case PHYSICAL: + return read_sysreg(cntpct_el0); + default: + GUEST_FAIL("Unexpected timer type = %u", timer); + } + + /* We should not reach here */ + return 0; +} + +static inline void timer_set_cval(enum arch_timer timer, uint64_t cval) +{ + switch (timer) { + case VIRTUAL: + write_sysreg(cval, cntv_cval_el0); + break; + case PHYSICAL: + write_sysreg(cval, cntp_cval_el0); + break; + default: + GUEST_FAIL("Unexpected timer type = %u", timer); + } + + isb(); +} + +static inline uint64_t timer_get_cval(enum arch_timer timer) +{ + switch (timer) { + case VIRTUAL: + return read_sysreg(cntv_cval_el0); + case PHYSICAL: + return read_sysreg(cntp_cval_el0); + default: + GUEST_FAIL("Unexpected timer type = %u", timer); + } + + /* We should not reach here */ + return 0; +} + +static inline void timer_set_tval(enum arch_timer timer, int32_t tval) +{ + switch (timer) { + case VIRTUAL: + write_sysreg(tval, cntv_tval_el0); + break; + case PHYSICAL: + write_sysreg(tval, cntp_tval_el0); + break; + default: + GUEST_FAIL("Unexpected timer type = %u", timer); + } + + isb(); +} + +static inline int32_t timer_get_tval(enum arch_timer timer) +{ + isb(); + switch (timer) { + case VIRTUAL: + return read_sysreg(cntv_tval_el0); + case PHYSICAL: + return read_sysreg(cntp_tval_el0); + default: + GUEST_FAIL("Could not get timer %d\n", timer); + } + + /* We should not reach here */ + return 0; +} + +static inline void timer_set_ctl(enum arch_timer timer, uint32_t ctl) +{ + switch (timer) { + case VIRTUAL: + write_sysreg(ctl, cntv_ctl_el0); + break; + case PHYSICAL: + write_sysreg(ctl, cntp_ctl_el0); + break; + default: + GUEST_FAIL("Unexpected timer type = %u", timer); + } + + isb(); +} + +static inline uint32_t timer_get_ctl(enum arch_timer timer) +{ + switch (timer) { + case VIRTUAL: + return read_sysreg(cntv_ctl_el0); + case PHYSICAL: + return read_sysreg(cntp_ctl_el0); + default: + GUEST_FAIL("Unexpected timer type = %u", timer); + } + + /* We should not reach here */ + return 0; +} + +static inline void timer_set_next_cval_ms(enum arch_timer timer, uint32_t msec) +{ + uint64_t now_ct = timer_get_cntct(timer); + uint64_t next_ct = now_ct + msec_to_cycles(msec); + + timer_set_cval(timer, next_ct); +} + +static inline void timer_set_next_tval_ms(enum arch_timer timer, uint32_t msec) +{ + timer_set_tval(timer, msec_to_cycles(msec)); +} + +#endif /* SELFTEST_KVM_ARCH_TIMER_H */ diff --git a/tools/testing/selftests/kvm/include/arm64/delay.h b/tools/testing/selftests/kvm/include/arm64/delay.h new file mode 100644 index 000000000000..329e4f5079ea --- /dev/null +++ b/tools/testing/selftests/kvm/include/arm64/delay.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * ARM simple delay routines + */ + +#ifndef SELFTEST_KVM_ARM_DELAY_H +#define SELFTEST_KVM_ARM_DELAY_H + +#include "arch_timer.h" + +static inline void __delay(uint64_t cycles) +{ + enum arch_timer timer = VIRTUAL; + uint64_t start = timer_get_cntct(timer); + + while ((timer_get_cntct(timer) - start) < cycles) + cpu_relax(); +} + +static inline void udelay(unsigned long usec) +{ + __delay(usec_to_cycles(usec)); +} + +#endif /* SELFTEST_KVM_ARM_DELAY_H */ diff --git a/tools/testing/selftests/kvm/include/arm64/gic.h b/tools/testing/selftests/kvm/include/arm64/gic.h new file mode 100644 index 000000000000..baeb3c859389 --- /dev/null +++ b/tools/testing/selftests/kvm/include/arm64/gic.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * ARM Generic Interrupt Controller (GIC) specific defines + */ + +#ifndef SELFTEST_KVM_GIC_H +#define SELFTEST_KVM_GIC_H + +#include + +enum gic_type { + GIC_V3, + GIC_TYPE_MAX, +}; + +/* + * Note that the redistributor frames are at the end, as the range scales + * with the number of vCPUs in the VM. + */ +#define GITS_BASE_GPA 0x8000000ULL +#define GICD_BASE_GPA (GITS_BASE_GPA + KVM_VGIC_V3_ITS_SIZE) +#define GICR_BASE_GPA (GICD_BASE_GPA + KVM_VGIC_V3_DIST_SIZE) + +/* The GIC is identity-mapped into the guest at the time of setup. */ +#define GITS_BASE_GVA ((volatile void *)GITS_BASE_GPA) +#define GICD_BASE_GVA ((volatile void *)GICD_BASE_GPA) +#define GICR_BASE_GVA ((volatile void *)GICR_BASE_GPA) + +#define MIN_SGI 0 +#define MIN_PPI 16 +#define MIN_SPI 32 +#define MAX_SPI 1019 +#define IAR_SPURIOUS 1023 + +#define INTID_IS_SGI(intid) (0 <= (intid) && (intid) < MIN_PPI) +#define INTID_IS_PPI(intid) (MIN_PPI <= (intid) && (intid) < MIN_SPI) +#define INTID_IS_SPI(intid) (MIN_SPI <= (intid) && (intid) <= MAX_SPI) + +void gic_init(enum gic_type type, unsigned int nr_cpus); +void gic_irq_enable(unsigned int intid); +void gic_irq_disable(unsigned int intid); +unsigned int gic_get_and_ack_irq(void); +void gic_set_eoi(unsigned int intid); +void gic_set_dir(unsigned int intid); + +/* + * Sets the EOI mode. When split is false, EOI just drops the priority. When + * split is true, EOI drops the priority and deactivates the interrupt. + */ +void gic_set_eoi_split(bool split); +void gic_set_priority_mask(uint64_t mask); +void gic_set_priority(uint32_t intid, uint32_t prio); +void gic_irq_set_active(unsigned int intid); +void gic_irq_clear_active(unsigned int intid); +bool gic_irq_get_active(unsigned int intid); +void gic_irq_set_pending(unsigned int intid); +void gic_irq_clear_pending(unsigned int intid); +bool gic_irq_get_pending(unsigned int intid); +void gic_irq_set_config(unsigned int intid, bool is_edge); + +void gic_rdist_enable_lpis(vm_paddr_t cfg_table, size_t cfg_table_size, + vm_paddr_t pend_table); + +#endif /* SELFTEST_KVM_GIC_H */ diff --git a/tools/testing/selftests/kvm/include/arm64/gic_v3.h b/tools/testing/selftests/kvm/include/arm64/gic_v3.h new file mode 100644 index 000000000000..a76615fa39a1 --- /dev/null +++ b/tools/testing/selftests/kvm/include/arm64/gic_v3.h @@ -0,0 +1,604 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2013, 2014 ARM Limited, All Rights Reserved. + * Author: Marc Zyngier + */ +#ifndef __SELFTESTS_GIC_V3_H +#define __SELFTESTS_GIC_V3_H + +/* + * Distributor registers. We assume we're running non-secure, with ARE + * being set. Secure-only and non-ARE registers are not described. + */ +#define GICD_CTLR 0x0000 +#define GICD_TYPER 0x0004 +#define GICD_IIDR 0x0008 +#define GICD_TYPER2 0x000C +#define GICD_STATUSR 0x0010 +#define GICD_SETSPI_NSR 0x0040 +#define GICD_CLRSPI_NSR 0x0048 +#define GICD_SETSPI_SR 0x0050 +#define GICD_CLRSPI_SR 0x0058 +#define GICD_IGROUPR 0x0080 +#define GICD_ISENABLER 0x0100 +#define GICD_ICENABLER 0x0180 +#define GICD_ISPENDR 0x0200 +#define GICD_ICPENDR 0x0280 +#define GICD_ISACTIVER 0x0300 +#define GICD_ICACTIVER 0x0380 +#define GICD_IPRIORITYR 0x0400 +#define GICD_ICFGR 0x0C00 +#define GICD_IGRPMODR 0x0D00 +#define GICD_NSACR 0x0E00 +#define GICD_IGROUPRnE 0x1000 +#define GICD_ISENABLERnE 0x1200 +#define GICD_ICENABLERnE 0x1400 +#define GICD_ISPENDRnE 0x1600 +#define GICD_ICPENDRnE 0x1800 +#define GICD_ISACTIVERnE 0x1A00 +#define GICD_ICACTIVERnE 0x1C00 +#define GICD_IPRIORITYRnE 0x2000 +#define GICD_ICFGRnE 0x3000 +#define GICD_IROUTER 0x6000 +#define GICD_IROUTERnE 0x8000 +#define GICD_IDREGS 0xFFD0 +#define GICD_PIDR2 0xFFE8 + +#define ESPI_BASE_INTID 4096 + +/* + * Those registers are actually from GICv2, but the spec demands that they + * are implemented as RES0 if ARE is 1 (which we do in KVM's emulated GICv3). + */ +#define GICD_ITARGETSR 0x0800 +#define GICD_SGIR 0x0F00 +#define GICD_CPENDSGIR 0x0F10 +#define GICD_SPENDSGIR 0x0F20 + +#define GICD_CTLR_RWP (1U << 31) +#define GICD_CTLR_nASSGIreq (1U << 8) +#define GICD_CTLR_DS (1U << 6) +#define GICD_CTLR_ARE_NS (1U << 4) +#define GICD_CTLR_ENABLE_G1A (1U << 1) +#define GICD_CTLR_ENABLE_G1 (1U << 0) + +#define GICD_IIDR_IMPLEMENTER_SHIFT 0 +#define GICD_IIDR_IMPLEMENTER_MASK (0xfff << GICD_IIDR_IMPLEMENTER_SHIFT) +#define GICD_IIDR_REVISION_SHIFT 12 +#define GICD_IIDR_REVISION_MASK (0xf << GICD_IIDR_REVISION_SHIFT) +#define GICD_IIDR_VARIANT_SHIFT 16 +#define GICD_IIDR_VARIANT_MASK (0xf << GICD_IIDR_VARIANT_SHIFT) +#define GICD_IIDR_PRODUCT_ID_SHIFT 24 +#define GICD_IIDR_PRODUCT_ID_MASK (0xff << GICD_IIDR_PRODUCT_ID_SHIFT) + + +/* + * In systems with a single security state (what we emulate in KVM) + * the meaning of the interrupt group enable bits is slightly different + */ +#define GICD_CTLR_ENABLE_SS_G1 (1U << 1) +#define GICD_CTLR_ENABLE_SS_G0 (1U << 0) + +#define GICD_TYPER_RSS (1U << 26) +#define GICD_TYPER_LPIS (1U << 17) +#define GICD_TYPER_MBIS (1U << 16) +#define GICD_TYPER_ESPI (1U << 8) + +#define GICD_TYPER_ID_BITS(typer) ((((typer) >> 19) & 0x1f) + 1) +#define GICD_TYPER_NUM_LPIS(typer) ((((typer) >> 11) & 0x1f) + 1) +#define GICD_TYPER_SPIS(typer) ((((typer) & 0x1f) + 1) * 32) +#define GICD_TYPER_ESPIS(typer) \ + (((typer) & GICD_TYPER_ESPI) ? GICD_TYPER_SPIS((typer) >> 27) : 0) + +#define GICD_TYPER2_nASSGIcap (1U << 8) +#define GICD_TYPER2_VIL (1U << 7) +#define GICD_TYPER2_VID GENMASK(4, 0) + +#define GICD_IROUTER_SPI_MODE_ONE (0U << 31) +#define GICD_IROUTER_SPI_MODE_ANY (1U << 31) + +#define GIC_PIDR2_ARCH_MASK 0xf0 +#define GIC_PIDR2_ARCH_GICv3 0x30 +#define GIC_PIDR2_ARCH_GICv4 0x40 + +#define GIC_V3_DIST_SIZE 0x10000 + +#define GIC_PAGE_SIZE_4K 0ULL +#define GIC_PAGE_SIZE_16K 1ULL +#define GIC_PAGE_SIZE_64K 2ULL +#define GIC_PAGE_SIZE_MASK 3ULL + +/* + * Re-Distributor registers, offsets from RD_base + */ +#define GICR_CTLR GICD_CTLR +#define GICR_IIDR 0x0004 +#define GICR_TYPER 0x0008 +#define GICR_STATUSR GICD_STATUSR +#define GICR_WAKER 0x0014 +#define GICR_SETLPIR 0x0040 +#define GICR_CLRLPIR 0x0048 +#define GICR_PROPBASER 0x0070 +#define GICR_PENDBASER 0x0078 +#define GICR_INVLPIR 0x00A0 +#define GICR_INVALLR 0x00B0 +#define GICR_SYNCR 0x00C0 +#define GICR_IDREGS GICD_IDREGS +#define GICR_PIDR2 GICD_PIDR2 + +#define GICR_CTLR_ENABLE_LPIS (1UL << 0) +#define GICR_CTLR_CES (1UL << 1) +#define GICR_CTLR_IR (1UL << 2) +#define GICR_CTLR_RWP (1UL << 3) + +#define GICR_TYPER_CPU_NUMBER(r) (((r) >> 8) & 0xffff) + +#define EPPI_BASE_INTID 1056 + +#define GICR_TYPER_NR_PPIS(r) \ + ({ \ + unsigned int __ppinum = ((r) >> 27) & 0x1f; \ + unsigned int __nr_ppis = 16; \ + if (__ppinum == 1 || __ppinum == 2) \ + __nr_ppis += __ppinum * 32; \ + \ + __nr_ppis; \ + }) + +#define GICR_WAKER_ProcessorSleep (1U << 1) +#define GICR_WAKER_ChildrenAsleep (1U << 2) + +#define GIC_BASER_CACHE_nCnB 0ULL +#define GIC_BASER_CACHE_SameAsInner 0ULL +#define GIC_BASER_CACHE_nC 1ULL +#define GIC_BASER_CACHE_RaWt 2ULL +#define GIC_BASER_CACHE_RaWb 3ULL +#define GIC_BASER_CACHE_WaWt 4ULL +#define GIC_BASER_CACHE_WaWb 5ULL +#define GIC_BASER_CACHE_RaWaWt 6ULL +#define GIC_BASER_CACHE_RaWaWb 7ULL +#define GIC_BASER_CACHE_MASK 7ULL +#define GIC_BASER_NonShareable 0ULL +#define GIC_BASER_InnerShareable 1ULL +#define GIC_BASER_OuterShareable 2ULL +#define GIC_BASER_SHAREABILITY_MASK 3ULL + +#define GIC_BASER_CACHEABILITY(reg, inner_outer, type) \ + (GIC_BASER_CACHE_##type << reg##_##inner_outer##_CACHEABILITY_SHIFT) + +#define GIC_BASER_SHAREABILITY(reg, type) \ + (GIC_BASER_##type << reg##_SHAREABILITY_SHIFT) + +/* encode a size field of width @w containing @n - 1 units */ +#define GIC_ENCODE_SZ(n, w) (((unsigned long)(n) - 1) & GENMASK_ULL(((w) - 1), 0)) + +#define GICR_PROPBASER_SHAREABILITY_SHIFT (10) +#define GICR_PROPBASER_INNER_CACHEABILITY_SHIFT (7) +#define GICR_PROPBASER_OUTER_CACHEABILITY_SHIFT (56) +#define GICR_PROPBASER_SHAREABILITY_MASK \ + GIC_BASER_SHAREABILITY(GICR_PROPBASER, SHAREABILITY_MASK) +#define GICR_PROPBASER_INNER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, MASK) +#define GICR_PROPBASER_OUTER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GICR_PROPBASER, OUTER, MASK) +#define GICR_PROPBASER_CACHEABILITY_MASK GICR_PROPBASER_INNER_CACHEABILITY_MASK + +#define GICR_PROPBASER_InnerShareable \ + GIC_BASER_SHAREABILITY(GICR_PROPBASER, InnerShareable) + +#define GICR_PROPBASER_nCnB GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, nCnB) +#define GICR_PROPBASER_nC GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, nC) +#define GICR_PROPBASER_RaWt GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWt) +#define GICR_PROPBASER_RaWb GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWb) +#define GICR_PROPBASER_WaWt GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, WaWt) +#define GICR_PROPBASER_WaWb GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, WaWb) +#define GICR_PROPBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWaWt) +#define GICR_PROPBASER_RaWaWb GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWaWb) + +#define GICR_PROPBASER_IDBITS_MASK (0x1f) +#define GICR_PROPBASER_ADDRESS(x) ((x) & GENMASK_ULL(51, 12)) +#define GICR_PENDBASER_ADDRESS(x) ((x) & GENMASK_ULL(51, 16)) + +#define GICR_PENDBASER_SHAREABILITY_SHIFT (10) +#define GICR_PENDBASER_INNER_CACHEABILITY_SHIFT (7) +#define GICR_PENDBASER_OUTER_CACHEABILITY_SHIFT (56) +#define GICR_PENDBASER_SHAREABILITY_MASK \ + GIC_BASER_SHAREABILITY(GICR_PENDBASER, SHAREABILITY_MASK) +#define GICR_PENDBASER_INNER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, MASK) +#define GICR_PENDBASER_OUTER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GICR_PENDBASER, OUTER, MASK) +#define GICR_PENDBASER_CACHEABILITY_MASK GICR_PENDBASER_INNER_CACHEABILITY_MASK + +#define GICR_PENDBASER_InnerShareable \ + GIC_BASER_SHAREABILITY(GICR_PENDBASER, InnerShareable) + +#define GICR_PENDBASER_nCnB GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, nCnB) +#define GICR_PENDBASER_nC GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, nC) +#define GICR_PENDBASER_RaWt GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWt) +#define GICR_PENDBASER_RaWb GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWb) +#define GICR_PENDBASER_WaWt GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, WaWt) +#define GICR_PENDBASER_WaWb GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, WaWb) +#define GICR_PENDBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWaWt) +#define GICR_PENDBASER_RaWaWb GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWaWb) + +#define GICR_PENDBASER_PTZ BIT_ULL(62) + +/* + * Re-Distributor registers, offsets from SGI_base + */ +#define GICR_IGROUPR0 GICD_IGROUPR +#define GICR_ISENABLER0 GICD_ISENABLER +#define GICR_ICENABLER0 GICD_ICENABLER +#define GICR_ISPENDR0 GICD_ISPENDR +#define GICR_ICPENDR0 GICD_ICPENDR +#define GICR_ISACTIVER0 GICD_ISACTIVER +#define GICR_ICACTIVER0 GICD_ICACTIVER +#define GICR_IPRIORITYR0 GICD_IPRIORITYR +#define GICR_ICFGR0 GICD_ICFGR +#define GICR_IGRPMODR0 GICD_IGRPMODR +#define GICR_NSACR GICD_NSACR + +#define GICR_TYPER_PLPIS (1U << 0) +#define GICR_TYPER_VLPIS (1U << 1) +#define GICR_TYPER_DIRTY (1U << 2) +#define GICR_TYPER_DirectLPIS (1U << 3) +#define GICR_TYPER_LAST (1U << 4) +#define GICR_TYPER_RVPEID (1U << 7) +#define GICR_TYPER_COMMON_LPI_AFF GENMASK_ULL(25, 24) +#define GICR_TYPER_AFFINITY GENMASK_ULL(63, 32) + +#define GICR_INVLPIR_INTID GENMASK_ULL(31, 0) +#define GICR_INVLPIR_VPEID GENMASK_ULL(47, 32) +#define GICR_INVLPIR_V GENMASK_ULL(63, 63) + +#define GICR_INVALLR_VPEID GICR_INVLPIR_VPEID +#define GICR_INVALLR_V GICR_INVLPIR_V + +#define GIC_V3_REDIST_SIZE 0x20000 + +#define LPI_PROP_GROUP1 (1 << 1) +#define LPI_PROP_ENABLED (1 << 0) + +/* + * Re-Distributor registers, offsets from VLPI_base + */ +#define GICR_VPROPBASER 0x0070 + +#define GICR_VPROPBASER_IDBITS_MASK 0x1f + +#define GICR_VPROPBASER_SHAREABILITY_SHIFT (10) +#define GICR_VPROPBASER_INNER_CACHEABILITY_SHIFT (7) +#define GICR_VPROPBASER_OUTER_CACHEABILITY_SHIFT (56) + +#define GICR_VPROPBASER_SHAREABILITY_MASK \ + GIC_BASER_SHAREABILITY(GICR_VPROPBASER, SHAREABILITY_MASK) +#define GICR_VPROPBASER_INNER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, MASK) +#define GICR_VPROPBASER_OUTER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GICR_VPROPBASER, OUTER, MASK) +#define GICR_VPROPBASER_CACHEABILITY_MASK \ + GICR_VPROPBASER_INNER_CACHEABILITY_MASK + +#define GICR_VPROPBASER_InnerShareable \ + GIC_BASER_SHAREABILITY(GICR_VPROPBASER, InnerShareable) + +#define GICR_VPROPBASER_nCnB GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, nCnB) +#define GICR_VPROPBASER_nC GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, nC) +#define GICR_VPROPBASER_RaWt GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWt) +#define GICR_VPROPBASER_RaWb GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWb) +#define GICR_VPROPBASER_WaWt GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, WaWt) +#define GICR_VPROPBASER_WaWb GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, WaWb) +#define GICR_VPROPBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWaWt) +#define GICR_VPROPBASER_RaWaWb GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWaWb) + +/* + * GICv4.1 VPROPBASER reinvention. A subtle mix between the old + * VPROPBASER and ITS_BASER. Just not quite any of the two. + */ +#define GICR_VPROPBASER_4_1_VALID (1ULL << 63) +#define GICR_VPROPBASER_4_1_ENTRY_SIZE GENMASK_ULL(61, 59) +#define GICR_VPROPBASER_4_1_INDIRECT (1ULL << 55) +#define GICR_VPROPBASER_4_1_PAGE_SIZE GENMASK_ULL(54, 53) +#define GICR_VPROPBASER_4_1_Z (1ULL << 52) +#define GICR_VPROPBASER_4_1_ADDR GENMASK_ULL(51, 12) +#define GICR_VPROPBASER_4_1_SIZE GENMASK_ULL(6, 0) + +#define GICR_VPENDBASER 0x0078 + +#define GICR_VPENDBASER_SHAREABILITY_SHIFT (10) +#define GICR_VPENDBASER_INNER_CACHEABILITY_SHIFT (7) +#define GICR_VPENDBASER_OUTER_CACHEABILITY_SHIFT (56) +#define GICR_VPENDBASER_SHAREABILITY_MASK \ + GIC_BASER_SHAREABILITY(GICR_VPENDBASER, SHAREABILITY_MASK) +#define GICR_VPENDBASER_INNER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, MASK) +#define GICR_VPENDBASER_OUTER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GICR_VPENDBASER, OUTER, MASK) +#define GICR_VPENDBASER_CACHEABILITY_MASK \ + GICR_VPENDBASER_INNER_CACHEABILITY_MASK + +#define GICR_VPENDBASER_NonShareable \ + GIC_BASER_SHAREABILITY(GICR_VPENDBASER, NonShareable) + +#define GICR_VPENDBASER_InnerShareable \ + GIC_BASER_SHAREABILITY(GICR_VPENDBASER, InnerShareable) + +#define GICR_VPENDBASER_nCnB GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, nCnB) +#define GICR_VPENDBASER_nC GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, nC) +#define GICR_VPENDBASER_RaWt GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWt) +#define GICR_VPENDBASER_RaWb GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWb) +#define GICR_VPENDBASER_WaWt GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, WaWt) +#define GICR_VPENDBASER_WaWb GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, WaWb) +#define GICR_VPENDBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWaWt) +#define GICR_VPENDBASER_RaWaWb GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWaWb) + +#define GICR_VPENDBASER_Dirty (1ULL << 60) +#define GICR_VPENDBASER_PendingLast (1ULL << 61) +#define GICR_VPENDBASER_IDAI (1ULL << 62) +#define GICR_VPENDBASER_Valid (1ULL << 63) + +/* + * GICv4.1 VPENDBASER, used for VPE residency. On top of these fields, + * also use the above Valid, PendingLast and Dirty. + */ +#define GICR_VPENDBASER_4_1_DB (1ULL << 62) +#define GICR_VPENDBASER_4_1_VGRP0EN (1ULL << 59) +#define GICR_VPENDBASER_4_1_VGRP1EN (1ULL << 58) +#define GICR_VPENDBASER_4_1_VPEID GENMASK_ULL(15, 0) + +#define GICR_VSGIR 0x0080 + +#define GICR_VSGIR_VPEID GENMASK(15, 0) + +#define GICR_VSGIPENDR 0x0088 + +#define GICR_VSGIPENDR_BUSY (1U << 31) +#define GICR_VSGIPENDR_PENDING GENMASK(15, 0) + +/* + * ITS registers, offsets from ITS_base + */ +#define GITS_CTLR 0x0000 +#define GITS_IIDR 0x0004 +#define GITS_TYPER 0x0008 +#define GITS_MPIDR 0x0018 +#define GITS_CBASER 0x0080 +#define GITS_CWRITER 0x0088 +#define GITS_CREADR 0x0090 +#define GITS_BASER 0x0100 +#define GITS_IDREGS_BASE 0xffd0 +#define GITS_PIDR0 0xffe0 +#define GITS_PIDR1 0xffe4 +#define GITS_PIDR2 GICR_PIDR2 +#define GITS_PIDR4 0xffd0 +#define GITS_CIDR0 0xfff0 +#define GITS_CIDR1 0xfff4 +#define GITS_CIDR2 0xfff8 +#define GITS_CIDR3 0xfffc + +#define GITS_TRANSLATER 0x10040 + +#define GITS_SGIR 0x20020 + +#define GITS_SGIR_VPEID GENMASK_ULL(47, 32) +#define GITS_SGIR_VINTID GENMASK_ULL(3, 0) + +#define GITS_CTLR_ENABLE (1U << 0) +#define GITS_CTLR_ImDe (1U << 1) +#define GITS_CTLR_ITS_NUMBER_SHIFT 4 +#define GITS_CTLR_ITS_NUMBER (0xFU << GITS_CTLR_ITS_NUMBER_SHIFT) +#define GITS_CTLR_QUIESCENT (1U << 31) + +#define GITS_TYPER_PLPIS (1UL << 0) +#define GITS_TYPER_VLPIS (1UL << 1) +#define GITS_TYPER_ITT_ENTRY_SIZE_SHIFT 4 +#define GITS_TYPER_ITT_ENTRY_SIZE GENMASK_ULL(7, 4) +#define GITS_TYPER_IDBITS_SHIFT 8 +#define GITS_TYPER_DEVBITS_SHIFT 13 +#define GITS_TYPER_DEVBITS GENMASK_ULL(17, 13) +#define GITS_TYPER_PTA (1UL << 19) +#define GITS_TYPER_HCC_SHIFT 24 +#define GITS_TYPER_HCC(r) (((r) >> GITS_TYPER_HCC_SHIFT) & 0xff) +#define GITS_TYPER_VMOVP (1ULL << 37) +#define GITS_TYPER_VMAPP (1ULL << 40) +#define GITS_TYPER_SVPET GENMASK_ULL(42, 41) + +#define GITS_IIDR_REV_SHIFT 12 +#define GITS_IIDR_REV_MASK (0xf << GITS_IIDR_REV_SHIFT) +#define GITS_IIDR_REV(r) (((r) >> GITS_IIDR_REV_SHIFT) & 0xf) +#define GITS_IIDR_PRODUCTID_SHIFT 24 + +#define GITS_CBASER_VALID (1ULL << 63) +#define GITS_CBASER_SHAREABILITY_SHIFT (10) +#define GITS_CBASER_INNER_CACHEABILITY_SHIFT (59) +#define GITS_CBASER_OUTER_CACHEABILITY_SHIFT (53) +#define GITS_CBASER_SHAREABILITY_MASK \ + GIC_BASER_SHAREABILITY(GITS_CBASER, SHAREABILITY_MASK) +#define GITS_CBASER_INNER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, MASK) +#define GITS_CBASER_OUTER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GITS_CBASER, OUTER, MASK) +#define GITS_CBASER_CACHEABILITY_MASK GITS_CBASER_INNER_CACHEABILITY_MASK + +#define GITS_CBASER_InnerShareable \ + GIC_BASER_SHAREABILITY(GITS_CBASER, InnerShareable) + +#define GITS_CBASER_nCnB GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, nCnB) +#define GITS_CBASER_nC GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, nC) +#define GITS_CBASER_RaWt GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWt) +#define GITS_CBASER_RaWb GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWb) +#define GITS_CBASER_WaWt GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, WaWt) +#define GITS_CBASER_WaWb GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, WaWb) +#define GITS_CBASER_RaWaWt GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWaWt) +#define GITS_CBASER_RaWaWb GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWaWb) + +#define GITS_CBASER_ADDRESS(cbaser) ((cbaser) & GENMASK_ULL(51, 12)) + +#define GITS_BASER_NR_REGS 8 + +#define GITS_BASER_VALID (1ULL << 63) +#define GITS_BASER_INDIRECT (1ULL << 62) + +#define GITS_BASER_INNER_CACHEABILITY_SHIFT (59) +#define GITS_BASER_OUTER_CACHEABILITY_SHIFT (53) +#define GITS_BASER_INNER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GITS_BASER, INNER, MASK) +#define GITS_BASER_CACHEABILITY_MASK GITS_BASER_INNER_CACHEABILITY_MASK +#define GITS_BASER_OUTER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GITS_BASER, OUTER, MASK) +#define GITS_BASER_SHAREABILITY_MASK \ + GIC_BASER_SHAREABILITY(GITS_BASER, SHAREABILITY_MASK) + +#define GITS_BASER_nCnB GIC_BASER_CACHEABILITY(GITS_BASER, INNER, nCnB) +#define GITS_BASER_nC GIC_BASER_CACHEABILITY(GITS_BASER, INNER, nC) +#define GITS_BASER_RaWt GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWt) +#define GITS_BASER_RaWb GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWb) +#define GITS_BASER_WaWt GIC_BASER_CACHEABILITY(GITS_BASER, INNER, WaWt) +#define GITS_BASER_WaWb GIC_BASER_CACHEABILITY(GITS_BASER, INNER, WaWb) +#define GITS_BASER_RaWaWt GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWaWt) +#define GITS_BASER_RaWaWb GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWaWb) + +#define GITS_BASER_TYPE_SHIFT (56) +#define GITS_BASER_TYPE(r) (((r) >> GITS_BASER_TYPE_SHIFT) & 7) +#define GITS_BASER_ENTRY_SIZE_SHIFT (48) +#define GITS_BASER_ENTRY_SIZE(r) ((((r) >> GITS_BASER_ENTRY_SIZE_SHIFT) & 0x1f) + 1) +#define GITS_BASER_ENTRY_SIZE_MASK GENMASK_ULL(52, 48) +#define GITS_BASER_PHYS_52_to_48(phys) \ + (((phys) & GENMASK_ULL(47, 16)) | (((phys) >> 48) & 0xf) << 12) +#define GITS_BASER_ADDR_48_to_52(baser) \ + (((baser) & GENMASK_ULL(47, 16)) | (((baser) >> 12) & 0xf) << 48) + +#define GITS_BASER_SHAREABILITY_SHIFT (10) +#define GITS_BASER_InnerShareable \ + GIC_BASER_SHAREABILITY(GITS_BASER, InnerShareable) +#define GITS_BASER_PAGE_SIZE_SHIFT (8) +#define __GITS_BASER_PSZ(sz) (GIC_PAGE_SIZE_ ## sz << GITS_BASER_PAGE_SIZE_SHIFT) +#define GITS_BASER_PAGE_SIZE_4K __GITS_BASER_PSZ(4K) +#define GITS_BASER_PAGE_SIZE_16K __GITS_BASER_PSZ(16K) +#define GITS_BASER_PAGE_SIZE_64K __GITS_BASER_PSZ(64K) +#define GITS_BASER_PAGE_SIZE_MASK __GITS_BASER_PSZ(MASK) +#define GITS_BASER_PAGES_MAX 256 +#define GITS_BASER_PAGES_SHIFT (0) +#define GITS_BASER_NR_PAGES(r) (((r) & 0xff) + 1) + +#define GITS_BASER_TYPE_NONE 0 +#define GITS_BASER_TYPE_DEVICE 1 +#define GITS_BASER_TYPE_VCPU 2 +#define GITS_BASER_TYPE_RESERVED3 3 +#define GITS_BASER_TYPE_COLLECTION 4 +#define GITS_BASER_TYPE_RESERVED5 5 +#define GITS_BASER_TYPE_RESERVED6 6 +#define GITS_BASER_TYPE_RESERVED7 7 + +#define GITS_LVL1_ENTRY_SIZE (8UL) + +/* + * ITS commands + */ +#define GITS_CMD_MAPD 0x08 +#define GITS_CMD_MAPC 0x09 +#define GITS_CMD_MAPTI 0x0a +#define GITS_CMD_MAPI 0x0b +#define GITS_CMD_MOVI 0x01 +#define GITS_CMD_DISCARD 0x0f +#define GITS_CMD_INV 0x0c +#define GITS_CMD_MOVALL 0x0e +#define GITS_CMD_INVALL 0x0d +#define GITS_CMD_INT 0x03 +#define GITS_CMD_CLEAR 0x04 +#define GITS_CMD_SYNC 0x05 + +/* + * GICv4 ITS specific commands + */ +#define GITS_CMD_GICv4(x) ((x) | 0x20) +#define GITS_CMD_VINVALL GITS_CMD_GICv4(GITS_CMD_INVALL) +#define GITS_CMD_VMAPP GITS_CMD_GICv4(GITS_CMD_MAPC) +#define GITS_CMD_VMAPTI GITS_CMD_GICv4(GITS_CMD_MAPTI) +#define GITS_CMD_VMOVI GITS_CMD_GICv4(GITS_CMD_MOVI) +#define GITS_CMD_VSYNC GITS_CMD_GICv4(GITS_CMD_SYNC) +/* VMOVP, VSGI and INVDB are the odd ones, as they dont have a physical counterpart */ +#define GITS_CMD_VMOVP GITS_CMD_GICv4(2) +#define GITS_CMD_VSGI GITS_CMD_GICv4(3) +#define GITS_CMD_INVDB GITS_CMD_GICv4(0xe) + +/* + * ITS error numbers + */ +#define E_ITS_MOVI_UNMAPPED_INTERRUPT 0x010107 +#define E_ITS_MOVI_UNMAPPED_COLLECTION 0x010109 +#define E_ITS_INT_UNMAPPED_INTERRUPT 0x010307 +#define E_ITS_CLEAR_UNMAPPED_INTERRUPT 0x010507 +#define E_ITS_MAPD_DEVICE_OOR 0x010801 +#define E_ITS_MAPD_ITTSIZE_OOR 0x010802 +#define E_ITS_MAPC_PROCNUM_OOR 0x010902 +#define E_ITS_MAPC_COLLECTION_OOR 0x010903 +#define E_ITS_MAPTI_UNMAPPED_DEVICE 0x010a04 +#define E_ITS_MAPTI_ID_OOR 0x010a05 +#define E_ITS_MAPTI_PHYSICALID_OOR 0x010a06 +#define E_ITS_INV_UNMAPPED_INTERRUPT 0x010c07 +#define E_ITS_INVALL_UNMAPPED_COLLECTION 0x010d09 +#define E_ITS_MOVALL_PROCNUM_OOR 0x010e01 +#define E_ITS_DISCARD_UNMAPPED_INTERRUPT 0x010f07 + +/* + * CPU interface registers + */ +#define ICC_CTLR_EL1_EOImode_SHIFT (1) +#define ICC_CTLR_EL1_EOImode_drop_dir (0U << ICC_CTLR_EL1_EOImode_SHIFT) +#define ICC_CTLR_EL1_EOImode_drop (1U << ICC_CTLR_EL1_EOImode_SHIFT) +#define ICC_CTLR_EL1_EOImode_MASK (1 << ICC_CTLR_EL1_EOImode_SHIFT) +#define ICC_CTLR_EL1_CBPR_SHIFT 0 +#define ICC_CTLR_EL1_CBPR_MASK (1 << ICC_CTLR_EL1_CBPR_SHIFT) +#define ICC_CTLR_EL1_PMHE_SHIFT 6 +#define ICC_CTLR_EL1_PMHE_MASK (1 << ICC_CTLR_EL1_PMHE_SHIFT) +#define ICC_CTLR_EL1_PRI_BITS_SHIFT 8 +#define ICC_CTLR_EL1_PRI_BITS_MASK (0x7 << ICC_CTLR_EL1_PRI_BITS_SHIFT) +#define ICC_CTLR_EL1_ID_BITS_SHIFT 11 +#define ICC_CTLR_EL1_ID_BITS_MASK (0x7 << ICC_CTLR_EL1_ID_BITS_SHIFT) +#define ICC_CTLR_EL1_SEIS_SHIFT 14 +#define ICC_CTLR_EL1_SEIS_MASK (0x1 << ICC_CTLR_EL1_SEIS_SHIFT) +#define ICC_CTLR_EL1_A3V_SHIFT 15 +#define ICC_CTLR_EL1_A3V_MASK (0x1 << ICC_CTLR_EL1_A3V_SHIFT) +#define ICC_CTLR_EL1_RSS (0x1 << 18) +#define ICC_CTLR_EL1_ExtRange (0x1 << 19) +#define ICC_PMR_EL1_SHIFT 0 +#define ICC_PMR_EL1_MASK (0xff << ICC_PMR_EL1_SHIFT) +#define ICC_BPR0_EL1_SHIFT 0 +#define ICC_BPR0_EL1_MASK (0x7 << ICC_BPR0_EL1_SHIFT) +#define ICC_BPR1_EL1_SHIFT 0 +#define ICC_BPR1_EL1_MASK (0x7 << ICC_BPR1_EL1_SHIFT) +#define ICC_IGRPEN0_EL1_SHIFT 0 +#define ICC_IGRPEN0_EL1_MASK (1 << ICC_IGRPEN0_EL1_SHIFT) +#define ICC_IGRPEN1_EL1_SHIFT 0 +#define ICC_IGRPEN1_EL1_MASK (1 << ICC_IGRPEN1_EL1_SHIFT) +#define ICC_SRE_EL1_DIB (1U << 2) +#define ICC_SRE_EL1_DFB (1U << 1) +#define ICC_SRE_EL1_SRE (1U << 0) + +/* These are for GICv2 emulation only */ +#define GICH_LR_VIRTUALID (0x3ffUL << 0) +#define GICH_LR_PHYSID_CPUID_SHIFT (10) +#define GICH_LR_PHYSID_CPUID (7UL << GICH_LR_PHYSID_CPUID_SHIFT) + +#define ICC_IAR1_EL1_SPURIOUS 0x3ff + +#define ICC_SRE_EL2_SRE (1 << 0) +#define ICC_SRE_EL2_ENABLE (1 << 3) + +#define ICC_SGI1R_TARGET_LIST_SHIFT 0 +#define ICC_SGI1R_TARGET_LIST_MASK (0xffff << ICC_SGI1R_TARGET_LIST_SHIFT) +#define ICC_SGI1R_AFFINITY_1_SHIFT 16 +#define ICC_SGI1R_AFFINITY_1_MASK (0xff << ICC_SGI1R_AFFINITY_1_SHIFT) +#define ICC_SGI1R_SGI_ID_SHIFT 24 +#define ICC_SGI1R_SGI_ID_MASK (0xfULL << ICC_SGI1R_SGI_ID_SHIFT) +#define ICC_SGI1R_AFFINITY_2_SHIFT 32 +#define ICC_SGI1R_AFFINITY_2_MASK (0xffULL << ICC_SGI1R_AFFINITY_2_SHIFT) +#define ICC_SGI1R_IRQ_ROUTING_MODE_BIT 40 +#define ICC_SGI1R_RS_SHIFT 44 +#define ICC_SGI1R_RS_MASK (0xfULL << ICC_SGI1R_RS_SHIFT) +#define ICC_SGI1R_AFFINITY_3_SHIFT 48 +#define ICC_SGI1R_AFFINITY_3_MASK (0xffULL << ICC_SGI1R_AFFINITY_3_SHIFT) + +#endif diff --git a/tools/testing/selftests/kvm/include/arm64/gic_v3_its.h b/tools/testing/selftests/kvm/include/arm64/gic_v3_its.h new file mode 100644 index 000000000000..3722ed9c8f96 --- /dev/null +++ b/tools/testing/selftests/kvm/include/arm64/gic_v3_its.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __SELFTESTS_GIC_V3_ITS_H__ +#define __SELFTESTS_GIC_V3_ITS_H__ + +#include + +void its_init(vm_paddr_t coll_tbl, size_t coll_tbl_sz, + vm_paddr_t device_tbl, size_t device_tbl_sz, + vm_paddr_t cmdq, size_t cmdq_size); + +void its_send_mapd_cmd(void *cmdq_base, u32 device_id, vm_paddr_t itt_base, + size_t itt_size, bool valid); +void its_send_mapc_cmd(void *cmdq_base, u32 vcpu_id, u32 collection_id, bool valid); +void its_send_mapti_cmd(void *cmdq_base, u32 device_id, u32 event_id, + u32 collection_id, u32 intid); +void its_send_invall_cmd(void *cmdq_base, u32 collection_id); + +#endif // __SELFTESTS_GIC_V3_ITS_H__ diff --git a/tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h b/tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h new file mode 100644 index 000000000000..e43a57d99b56 --- /dev/null +++ b/tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef SELFTEST_KVM_UTIL_ARCH_H +#define SELFTEST_KVM_UTIL_ARCH_H + +struct kvm_vm_arch {}; + +#endif // SELFTEST_KVM_UTIL_ARCH_H diff --git a/tools/testing/selftests/kvm/include/arm64/processor.h b/tools/testing/selftests/kvm/include/arm64/processor.h new file mode 100644 index 000000000000..1e8d0d531fbd --- /dev/null +++ b/tools/testing/selftests/kvm/include/arm64/processor.h @@ -0,0 +1,238 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * AArch64 processor specific defines + * + * Copyright (C) 2018, Red Hat, Inc. + */ +#ifndef SELFTEST_KVM_PROCESSOR_H +#define SELFTEST_KVM_PROCESSOR_H + +#include "kvm_util.h" +#include "ucall_common.h" + +#include +#include +#include +#include +#include + + +#define ARM64_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \ + KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x)) + +/* + * KVM_ARM64_SYS_REG(sys_reg_id): Helper macro to convert + * SYS_* register definitions in asm/sysreg.h to use in KVM + * calls such as vcpu_get_reg() and vcpu_set_reg(). + */ +#define KVM_ARM64_SYS_REG(sys_reg_id) \ + ARM64_SYS_REG(sys_reg_Op0(sys_reg_id), \ + sys_reg_Op1(sys_reg_id), \ + sys_reg_CRn(sys_reg_id), \ + sys_reg_CRm(sys_reg_id), \ + sys_reg_Op2(sys_reg_id)) + +/* + * Default MAIR + * index attribute + * DEVICE_nGnRnE 0 0000:0000 + * DEVICE_nGnRE 1 0000:0100 + * DEVICE_GRE 2 0000:1100 + * NORMAL_NC 3 0100:0100 + * NORMAL 4 1111:1111 + * NORMAL_WT 5 1011:1011 + */ + +/* Linux doesn't use these memory types, so let's define them. */ +#define MAIR_ATTR_DEVICE_GRE UL(0x0c) +#define MAIR_ATTR_NORMAL_WT UL(0xbb) + +#define MT_DEVICE_nGnRnE 0 +#define MT_DEVICE_nGnRE 1 +#define MT_DEVICE_GRE 2 +#define MT_NORMAL_NC 3 +#define MT_NORMAL 4 +#define MT_NORMAL_WT 5 + +#define DEFAULT_MAIR_EL1 \ + (MAIR_ATTRIDX(MAIR_ATTR_DEVICE_nGnRnE, MT_DEVICE_nGnRnE) | \ + MAIR_ATTRIDX(MAIR_ATTR_DEVICE_nGnRE, MT_DEVICE_nGnRE) | \ + MAIR_ATTRIDX(MAIR_ATTR_DEVICE_GRE, MT_DEVICE_GRE) | \ + MAIR_ATTRIDX(MAIR_ATTR_NORMAL_NC, MT_NORMAL_NC) | \ + MAIR_ATTRIDX(MAIR_ATTR_NORMAL, MT_NORMAL) | \ + MAIR_ATTRIDX(MAIR_ATTR_NORMAL_WT, MT_NORMAL_WT)) + +void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init); +struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, + struct kvm_vcpu_init *init, void *guest_code); + +struct ex_regs { + u64 regs[31]; + u64 sp; + u64 pc; + u64 pstate; +}; + +#define VECTOR_NUM 16 + +enum { + VECTOR_SYNC_CURRENT_SP0, + VECTOR_IRQ_CURRENT_SP0, + VECTOR_FIQ_CURRENT_SP0, + VECTOR_ERROR_CURRENT_SP0, + + VECTOR_SYNC_CURRENT, + VECTOR_IRQ_CURRENT, + VECTOR_FIQ_CURRENT, + VECTOR_ERROR_CURRENT, + + VECTOR_SYNC_LOWER_64, + VECTOR_IRQ_LOWER_64, + VECTOR_FIQ_LOWER_64, + VECTOR_ERROR_LOWER_64, + + VECTOR_SYNC_LOWER_32, + VECTOR_IRQ_LOWER_32, + VECTOR_FIQ_LOWER_32, + VECTOR_ERROR_LOWER_32, +}; + +#define VECTOR_IS_SYNC(v) ((v) == VECTOR_SYNC_CURRENT_SP0 || \ + (v) == VECTOR_SYNC_CURRENT || \ + (v) == VECTOR_SYNC_LOWER_64 || \ + (v) == VECTOR_SYNC_LOWER_32) + +/* Access flag */ +#define PTE_AF (1ULL << 10) + +/* Access flag update enable/disable */ +#define TCR_EL1_HA (1ULL << 39) + +void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k, + uint32_t *ipa16k, uint32_t *ipa64k); + +void vm_init_descriptor_tables(struct kvm_vm *vm); +void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu); + +typedef void(*handler_fn)(struct ex_regs *); +void vm_install_exception_handler(struct kvm_vm *vm, + int vector, handler_fn handler); +void vm_install_sync_handler(struct kvm_vm *vm, + int vector, int ec, handler_fn handler); + +uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva); + +static inline void cpu_relax(void) +{ + asm volatile("yield" ::: "memory"); +} + +#define isb() asm volatile("isb" : : : "memory") +#define dsb(opt) asm volatile("dsb " #opt : : : "memory") +#define dmb(opt) asm volatile("dmb " #opt : : : "memory") + +#define dma_wmb() dmb(oshst) +#define __iowmb() dma_wmb() + +#define dma_rmb() dmb(oshld) + +#define __iormb(v) \ +({ \ + unsigned long tmp; \ + \ + dma_rmb(); \ + \ + /* \ + * Courtesy of arch/arm64/include/asm/io.h: \ + * Create a dummy control dependency from the IO read to any \ + * later instructions. This ensures that a subsequent call \ + * to udelay() will be ordered due to the ISB in __delay(). \ + */ \ + asm volatile("eor %0, %1, %1\n" \ + "cbnz %0, ." \ + : "=r" (tmp) : "r" ((unsigned long)(v)) \ + : "memory"); \ +}) + +static __always_inline void __raw_writel(u32 val, volatile void *addr) +{ + asm volatile("str %w0, [%1]" : : "rZ" (val), "r" (addr)); +} + +static __always_inline u32 __raw_readl(const volatile void *addr) +{ + u32 val; + asm volatile("ldr %w0, [%1]" : "=r" (val) : "r" (addr)); + return val; +} + +static __always_inline void __raw_writeq(u64 val, volatile void *addr) +{ + asm volatile("str %0, [%1]" : : "rZ" (val), "r" (addr)); +} + +static __always_inline u64 __raw_readq(const volatile void *addr) +{ + u64 val; + asm volatile("ldr %0, [%1]" : "=r" (val) : "r" (addr)); + return val; +} + +#define writel_relaxed(v,c) ((void)__raw_writel((__force u32)cpu_to_le32(v),(c))) +#define readl_relaxed(c) ({ u32 __r = le32_to_cpu((__force __le32)__raw_readl(c)); __r; }) +#define writeq_relaxed(v,c) ((void)__raw_writeq((__force u64)cpu_to_le64(v),(c))) +#define readq_relaxed(c) ({ u64 __r = le64_to_cpu((__force __le64)__raw_readq(c)); __r; }) + +#define writel(v,c) ({ __iowmb(); writel_relaxed((v),(c));}) +#define readl(c) ({ u32 __v = readl_relaxed(c); __iormb(__v); __v; }) +#define writeq(v,c) ({ __iowmb(); writeq_relaxed((v),(c));}) +#define readq(c) ({ u64 __v = readq_relaxed(c); __iormb(__v); __v; }) + + +static inline void local_irq_enable(void) +{ + asm volatile("msr daifclr, #3" : : : "memory"); +} + +static inline void local_irq_disable(void) +{ + asm volatile("msr daifset, #3" : : : "memory"); +} + +/** + * struct arm_smccc_res - Result from SMC/HVC call + * @a0-a3 result values from registers 0 to 3 + */ +struct arm_smccc_res { + unsigned long a0; + unsigned long a1; + unsigned long a2; + unsigned long a3; +}; + +/** + * smccc_hvc - Invoke a SMCCC function using the hvc conduit + * @function_id: the SMCCC function to be called + * @arg0-arg6: SMCCC function arguments, corresponding to registers x1-x7 + * @res: pointer to write the return values from registers x0-x3 + * + */ +void smccc_hvc(uint32_t function_id, uint64_t arg0, uint64_t arg1, + uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5, + uint64_t arg6, struct arm_smccc_res *res); + +/** + * smccc_smc - Invoke a SMCCC function using the smc conduit + * @function_id: the SMCCC function to be called + * @arg0-arg6: SMCCC function arguments, corresponding to registers x1-x7 + * @res: pointer to write the return values from registers x0-x3 + * + */ +void smccc_smc(uint32_t function_id, uint64_t arg0, uint64_t arg1, + uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5, + uint64_t arg6, struct arm_smccc_res *res); + +/* Execute a Wait For Interrupt instruction. */ +void wfi(void); + +#endif /* SELFTEST_KVM_PROCESSOR_H */ diff --git a/tools/testing/selftests/kvm/include/arm64/spinlock.h b/tools/testing/selftests/kvm/include/arm64/spinlock.h new file mode 100644 index 000000000000..cf0984106d14 --- /dev/null +++ b/tools/testing/selftests/kvm/include/arm64/spinlock.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef SELFTEST_KVM_ARM64_SPINLOCK_H +#define SELFTEST_KVM_ARM64_SPINLOCK_H + +struct spinlock { + int v; +}; + +extern void spin_lock(struct spinlock *lock); +extern void spin_unlock(struct spinlock *lock); + +#endif /* SELFTEST_KVM_ARM64_SPINLOCK_H */ diff --git a/tools/testing/selftests/kvm/include/arm64/ucall.h b/tools/testing/selftests/kvm/include/arm64/ucall.h new file mode 100644 index 000000000000..4ec801f37f00 --- /dev/null +++ b/tools/testing/selftests/kvm/include/arm64/ucall.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef SELFTEST_KVM_UCALL_H +#define SELFTEST_KVM_UCALL_H + +#include "kvm_util.h" + +#define UCALL_EXIT_REASON KVM_EXIT_MMIO + +/* + * ucall_exit_mmio_addr holds per-VM values (global data is duplicated by each + * VM), it must not be accessed from host code. + */ +extern vm_vaddr_t *ucall_exit_mmio_addr; + +static inline void ucall_arch_do_ucall(vm_vaddr_t uc) +{ + WRITE_ONCE(*ucall_exit_mmio_addr, uc); +} + +#endif diff --git a/tools/testing/selftests/kvm/include/arm64/vgic.h b/tools/testing/selftests/kvm/include/arm64/vgic.h new file mode 100644 index 000000000000..c481d0c00a5d --- /dev/null +++ b/tools/testing/selftests/kvm/include/arm64/vgic.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * ARM Generic Interrupt Controller (GIC) host specific defines + */ + +#ifndef SELFTEST_KVM_VGIC_H +#define SELFTEST_KVM_VGIC_H + +#include + +#include "kvm_util.h" + +#define REDIST_REGION_ATTR_ADDR(count, base, flags, index) \ + (((uint64_t)(count) << 52) | \ + ((uint64_t)((base) >> 16) << 16) | \ + ((uint64_t)(flags) << 12) | \ + index) + +int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs); + +#define VGIC_MAX_RESERVED 1023 + +void kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level); +int _kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level); + +void kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level); +int _kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level); + +/* The vcpu arg only applies to private interrupts. */ +void kvm_irq_write_ispendr(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu); +void kvm_irq_write_isactiver(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu); + +#define KVM_IRQCHIP_NUM_PINS (1020 - 32) + +int vgic_its_setup(struct kvm_vm *vm); + +#endif // SELFTEST_KVM_VGIC_H diff --git a/tools/testing/selftests/kvm/include/s390/debug_print.h b/tools/testing/selftests/kvm/include/s390/debug_print.h new file mode 100644 index 000000000000..1bf275631cc6 --- /dev/null +++ b/tools/testing/selftests/kvm/include/s390/debug_print.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Definition for kernel virtual machines on s390x + * + * Copyright IBM Corp. 2024 + * + * Authors: + * Christoph Schlameuss + */ + +#ifndef SELFTEST_KVM_DEBUG_PRINT_H +#define SELFTEST_KVM_DEBUG_PRINT_H + +#include "asm/ptrace.h" +#include "kvm_util.h" +#include "sie.h" + +static inline void print_hex_bytes(const char *name, u64 addr, size_t len) +{ + u64 pos; + + pr_debug("%s (%p)\n", name, (void *)addr); + pr_debug(" 0/0x00---------|"); + if (len > 8) + pr_debug(" 8/0x08---------|"); + if (len > 16) + pr_debug(" 16/0x10--------|"); + if (len > 24) + pr_debug(" 24/0x18--------|"); + for (pos = 0; pos < len; pos += 8) { + if ((pos % 32) == 0) + pr_debug("\n %3lu 0x%.3lx ", pos, pos); + pr_debug(" %16lx", *((u64 *)(addr + pos))); + } + pr_debug("\n"); +} + +static inline void print_hex(const char *name, u64 addr) +{ + print_hex_bytes(name, addr, 512); +} + +static inline void print_psw(struct kvm_run *run, struct kvm_s390_sie_block *sie_block) +{ + pr_debug("flags:0x%x psw:0x%.16llx:0x%.16llx exit:%u %s\n", + run->flags, + run->psw_mask, run->psw_addr, + run->exit_reason, exit_reason_str(run->exit_reason)); + pr_debug("sie_block psw:0x%.16llx:0x%.16llx\n", + sie_block->psw_mask, sie_block->psw_addr); +} + +static inline void print_run(struct kvm_run *run, struct kvm_s390_sie_block *sie_block) +{ + print_hex_bytes("run", (u64)run, 0x150); + print_hex("sie_block", (u64)sie_block); + print_psw(run, sie_block); +} + +static inline void print_regs(struct kvm_run *run) +{ + struct kvm_sync_regs *sync_regs = &run->s.regs; + + print_hex_bytes("GPRS", (u64)sync_regs->gprs, 8 * NUM_GPRS); + print_hex_bytes("ACRS", (u64)sync_regs->acrs, 4 * NUM_ACRS); + print_hex_bytes("CRS", (u64)sync_regs->crs, 8 * NUM_CRS); +} + +#endif /* SELFTEST_KVM_DEBUG_PRINT_H */ diff --git a/tools/testing/selftests/kvm/include/s390/diag318_test_handler.h b/tools/testing/selftests/kvm/include/s390/diag318_test_handler.h new file mode 100644 index 000000000000..b0ed71302722 --- /dev/null +++ b/tools/testing/selftests/kvm/include/s390/diag318_test_handler.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * + * Test handler for the s390x DIAGNOSE 0x0318 instruction. + * + * Copyright (C) 2020, IBM + */ + +#ifndef SELFTEST_KVM_DIAG318_TEST_HANDLER +#define SELFTEST_KVM_DIAG318_TEST_HANDLER + +uint64_t get_diag318_info(void); + +#endif diff --git a/tools/testing/selftests/kvm/include/s390/facility.h b/tools/testing/selftests/kvm/include/s390/facility.h new file mode 100644 index 000000000000..00a1ced6538b --- /dev/null +++ b/tools/testing/selftests/kvm/include/s390/facility.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright IBM Corp. 2024 + * + * Authors: + * Hariharan Mari + * + * Get the facility bits with the STFLE instruction + */ + +#ifndef SELFTEST_KVM_FACILITY_H +#define SELFTEST_KVM_FACILITY_H + +#include + +/* alt_stfle_fac_list[16] + stfle_fac_list[16] */ +#define NB_STFL_DOUBLEWORDS 32 + +extern uint64_t stfl_doublewords[NB_STFL_DOUBLEWORDS]; +extern bool stfle_flag; + +static inline bool test_bit_inv(unsigned long nr, const unsigned long *ptr) +{ + return test_bit(nr ^ (BITS_PER_LONG - 1), ptr); +} + +static inline void stfle(uint64_t *fac, unsigned int nb_doublewords) +{ + register unsigned long r0 asm("0") = nb_doublewords - 1; + + asm volatile(" .insn s,0xb2b00000,0(%1)\n" + : "+d" (r0) + : "a" (fac) + : "memory", "cc"); +} + +static inline void setup_facilities(void) +{ + stfle(stfl_doublewords, NB_STFL_DOUBLEWORDS); + stfle_flag = true; +} + +static inline bool test_facility(int nr) +{ + if (!stfle_flag) + setup_facilities(); + return test_bit_inv(nr, stfl_doublewords); +} + +#endif diff --git a/tools/testing/selftests/kvm/include/s390/kvm_util_arch.h b/tools/testing/selftests/kvm/include/s390/kvm_util_arch.h new file mode 100644 index 000000000000..e43a57d99b56 --- /dev/null +++ b/tools/testing/selftests/kvm/include/s390/kvm_util_arch.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef SELFTEST_KVM_UTIL_ARCH_H +#define SELFTEST_KVM_UTIL_ARCH_H + +struct kvm_vm_arch {}; + +#endif // SELFTEST_KVM_UTIL_ARCH_H diff --git a/tools/testing/selftests/kvm/include/s390/processor.h b/tools/testing/selftests/kvm/include/s390/processor.h new file mode 100644 index 000000000000..33fef6fd9617 --- /dev/null +++ b/tools/testing/selftests/kvm/include/s390/processor.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * s390x processor specific defines + */ +#ifndef SELFTEST_KVM_PROCESSOR_H +#define SELFTEST_KVM_PROCESSOR_H + +#include + +/* Bits in the region/segment table entry */ +#define REGION_ENTRY_ORIGIN ~0xfffUL /* region/segment table origin */ +#define REGION_ENTRY_PROTECT 0x200 /* region protection bit */ +#define REGION_ENTRY_NOEXEC 0x100 /* region no-execute bit */ +#define REGION_ENTRY_OFFSET 0xc0 /* region table offset */ +#define REGION_ENTRY_INVALID 0x20 /* invalid region table entry */ +#define REGION_ENTRY_TYPE 0x0c /* region/segment table type mask */ +#define REGION_ENTRY_LENGTH 0x03 /* region third length */ + +/* Bits in the page table entry */ +#define PAGE_INVALID 0x400 /* HW invalid bit */ +#define PAGE_PROTECT 0x200 /* HW read-only bit */ +#define PAGE_NOEXEC 0x100 /* HW no-execute bit */ + +/* Page size definitions */ +#define PAGE_SHIFT 12 +#define PAGE_SIZE BIT_ULL(PAGE_SHIFT) +#define PAGE_MASK (~(PAGE_SIZE - 1)) + +/* Is there a portable way to do this? */ +static inline void cpu_relax(void) +{ + barrier(); +} + +/* Get the instruction length */ +static inline int insn_length(unsigned char code) +{ + return ((((int)code + 64) >> 7) + 1) << 1; +} + +#endif diff --git a/tools/testing/selftests/kvm/include/s390/sie.h b/tools/testing/selftests/kvm/include/s390/sie.h new file mode 100644 index 000000000000..160acd4a1db9 --- /dev/null +++ b/tools/testing/selftests/kvm/include/s390/sie.h @@ -0,0 +1,240 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Definition for kernel virtual machines on s390. + * + * Adapted copy of struct definition kvm_s390_sie_block from + * arch/s390/include/asm/kvm_host.h for use in userspace selftest programs. + * + * Copyright IBM Corp. 2008, 2024 + * + * Authors: + * Christoph Schlameuss + * Carsten Otte + */ + +#ifndef SELFTEST_KVM_SIE_H +#define SELFTEST_KVM_SIE_H + +#include + +struct kvm_s390_sie_block { +#define CPUSTAT_STOPPED 0x80000000 +#define CPUSTAT_WAIT 0x10000000 +#define CPUSTAT_ECALL_PEND 0x08000000 +#define CPUSTAT_STOP_INT 0x04000000 +#define CPUSTAT_IO_INT 0x02000000 +#define CPUSTAT_EXT_INT 0x01000000 +#define CPUSTAT_RUNNING 0x00800000 +#define CPUSTAT_RETAINED 0x00400000 +#define CPUSTAT_TIMING_SUB 0x00020000 +#define CPUSTAT_SIE_SUB 0x00010000 +#define CPUSTAT_RRF 0x00008000 +#define CPUSTAT_SLSV 0x00004000 +#define CPUSTAT_SLSR 0x00002000 +#define CPUSTAT_ZARCH 0x00000800 +#define CPUSTAT_MCDS 0x00000100 +#define CPUSTAT_KSS 0x00000200 +#define CPUSTAT_SM 0x00000080 +#define CPUSTAT_IBS 0x00000040 +#define CPUSTAT_GED2 0x00000010 +#define CPUSTAT_G 0x00000008 +#define CPUSTAT_GED 0x00000004 +#define CPUSTAT_J 0x00000002 +#define CPUSTAT_P 0x00000001 + __u32 cpuflags; /* 0x0000 */ + __u32: 1; /* 0x0004 */ + __u32 prefix : 18; + __u32: 1; + __u32 ibc : 12; + __u8 reserved08[4]; /* 0x0008 */ +#define PROG_IN_SIE BIT(0) + __u32 prog0c; /* 0x000c */ + union { + __u8 reserved10[16]; /* 0x0010 */ + struct { + __u64 pv_handle_cpu; + __u64 pv_handle_config; + }; + }; +#define PROG_BLOCK_SIE BIT(0) +#define PROG_REQUEST BIT(1) + __u32 prog20; /* 0x0020 */ + __u8 reserved24[4]; /* 0x0024 */ + __u64 cputm; /* 0x0028 */ + __u64 ckc; /* 0x0030 */ + __u64 epoch; /* 0x0038 */ + __u32 svcc; /* 0x0040 */ +#define LCTL_CR0 0x8000 +#define LCTL_CR6 0x0200 +#define LCTL_CR9 0x0040 +#define LCTL_CR10 0x0020 +#define LCTL_CR11 0x0010 +#define LCTL_CR14 0x0002 + __u16 lctl; /* 0x0044 */ + __s16 icpua; /* 0x0046 */ +#define ICTL_OPEREXC 0x80000000 +#define ICTL_PINT 0x20000000 +#define ICTL_LPSW 0x00400000 +#define ICTL_STCTL 0x00040000 +#define ICTL_ISKE 0x00004000 +#define ICTL_SSKE 0x00002000 +#define ICTL_RRBE 0x00001000 +#define ICTL_TPROT 0x00000200 + __u32 ictl; /* 0x0048 */ +#define ECA_CEI 0x80000000 +#define ECA_IB 0x40000000 +#define ECA_SIGPI 0x10000000 +#define ECA_MVPGI 0x01000000 +#define ECA_AIV 0x00200000 +#define ECA_VX 0x00020000 +#define ECA_PROTEXCI 0x00002000 +#define ECA_APIE 0x00000008 +#define ECA_SII 0x00000001 + __u32 eca; /* 0x004c */ +#define ICPT_INST 0x04 +#define ICPT_PROGI 0x08 +#define ICPT_INSTPROGI 0x0C +#define ICPT_EXTREQ 0x10 +#define ICPT_EXTINT 0x14 +#define ICPT_IOREQ 0x18 +#define ICPT_WAIT 0x1c +#define ICPT_VALIDITY 0x20 +#define ICPT_STOP 0x28 +#define ICPT_OPEREXC 0x2C +#define ICPT_PARTEXEC 0x38 +#define ICPT_IOINST 0x40 +#define ICPT_KSS 0x5c +#define ICPT_MCHKREQ 0x60 +#define ICPT_INT_ENABLE 0x64 +#define ICPT_PV_INSTR 0x68 +#define ICPT_PV_NOTIFY 0x6c +#define ICPT_PV_PREF 0x70 + __u8 icptcode; /* 0x0050 */ + __u8 icptstatus; /* 0x0051 */ + __u16 ihcpu; /* 0x0052 */ + __u8 reserved54; /* 0x0054 */ +#define IICTL_CODE_NONE 0x00 +#define IICTL_CODE_MCHK 0x01 +#define IICTL_CODE_EXT 0x02 +#define IICTL_CODE_IO 0x03 +#define IICTL_CODE_RESTART 0x04 +#define IICTL_CODE_SPECIFICATION 0x10 +#define IICTL_CODE_OPERAND 0x11 + __u8 iictl; /* 0x0055 */ + __u16 ipa; /* 0x0056 */ + __u32 ipb; /* 0x0058 */ + __u32 scaoh; /* 0x005c */ +#define FPF_BPBC 0x20 + __u8 fpf; /* 0x0060 */ +#define ECB_GS 0x40 +#define ECB_TE 0x10 +#define ECB_SPECI 0x08 +#define ECB_SRSI 0x04 +#define ECB_HOSTPROTINT 0x02 +#define ECB_PTF 0x01 + __u8 ecb; /* 0x0061 */ +#define ECB2_CMMA 0x80 +#define ECB2_IEP 0x20 +#define ECB2_PFMFI 0x08 +#define ECB2_ESCA 0x04 +#define ECB2_ZPCI_LSI 0x02 + __u8 ecb2; /* 0x0062 */ +#define ECB3_AISI 0x20 +#define ECB3_AISII 0x10 +#define ECB3_DEA 0x08 +#define ECB3_AES 0x04 +#define ECB3_RI 0x01 + __u8 ecb3; /* 0x0063 */ +#define ESCA_SCAOL_MASK ~0x3fU + __u32 scaol; /* 0x0064 */ + __u8 sdf; /* 0x0068 */ + __u8 epdx; /* 0x0069 */ + __u8 cpnc; /* 0x006a */ + __u8 reserved6b; /* 0x006b */ + __u32 todpr; /* 0x006c */ +#define GISA_FORMAT1 0x00000001 + __u32 gd; /* 0x0070 */ + __u8 reserved74[12]; /* 0x0074 */ + __u64 mso; /* 0x0080 */ + __u64 msl; /* 0x0088 */ + __u64 psw_mask; /* 0x0090 */ + __u64 psw_addr; /* 0x0098 */ + __u64 gg14; /* 0x00a0 */ + __u64 gg15; /* 0x00a8 */ + __u8 reservedb0[8]; /* 0x00b0 */ +#define HPID_KVM 0x4 +#define HPID_VSIE 0x5 + __u8 hpid; /* 0x00b8 */ + __u8 reservedb9[7]; /* 0x00b9 */ + union { + struct { + __u32 eiparams; /* 0x00c0 */ + __u16 extcpuaddr; /* 0x00c4 */ + __u16 eic; /* 0x00c6 */ + }; + __u64 mcic; /* 0x00c0 */ + } __packed; + __u32 reservedc8; /* 0x00c8 */ + union { + struct { + __u16 pgmilc; /* 0x00cc */ + __u16 iprcc; /* 0x00ce */ + }; + __u32 edc; /* 0x00cc */ + } __packed; + union { + struct { + __u32 dxc; /* 0x00d0 */ + __u16 mcn; /* 0x00d4 */ + __u8 perc; /* 0x00d6 */ + __u8 peratmid; /* 0x00d7 */ + }; + __u64 faddr; /* 0x00d0 */ + } __packed; + __u64 peraddr; /* 0x00d8 */ + __u8 eai; /* 0x00e0 */ + __u8 peraid; /* 0x00e1 */ + __u8 oai; /* 0x00e2 */ + __u8 armid; /* 0x00e3 */ + __u8 reservede4[4]; /* 0x00e4 */ + union { + __u64 tecmc; /* 0x00e8 */ + struct { + __u16 subchannel_id; /* 0x00e8 */ + __u16 subchannel_nr; /* 0x00ea */ + __u32 io_int_parm; /* 0x00ec */ + __u32 io_int_word; /* 0x00f0 */ + }; + } __packed; + __u8 reservedf4[8]; /* 0x00f4 */ +#define CRYCB_FORMAT_MASK 0x00000003 +#define CRYCB_FORMAT0 0x00000000 +#define CRYCB_FORMAT1 0x00000001 +#define CRYCB_FORMAT2 0x00000003 + __u32 crycbd; /* 0x00fc */ + __u64 gcr[16]; /* 0x0100 */ + union { + __u64 gbea; /* 0x0180 */ + __u64 sidad; + }; + __u8 reserved188[8]; /* 0x0188 */ + __u64 sdnxo; /* 0x0190 */ + __u8 reserved198[8]; /* 0x0198 */ + __u32 fac; /* 0x01a0 */ + __u8 reserved1a4[20]; /* 0x01a4 */ + __u64 cbrlo; /* 0x01b8 */ + __u8 reserved1c0[8]; /* 0x01c0 */ +#define ECD_HOSTREGMGMT 0x20000000 +#define ECD_MEF 0x08000000 +#define ECD_ETOKENF 0x02000000 +#define ECD_ECC 0x00200000 + __u32 ecd; /* 0x01c8 */ + __u8 reserved1cc[18]; /* 0x01cc */ + __u64 pp; /* 0x01de */ + __u8 reserved1e6[2]; /* 0x01e6 */ + __u64 itdba; /* 0x01e8 */ + __u64 riccbd; /* 0x01f0 */ + __u64 gvrd; /* 0x01f8 */ +} __packed __aligned(512); + +#endif /* SELFTEST_KVM_SIE_H */ diff --git a/tools/testing/selftests/kvm/include/s390/ucall.h b/tools/testing/selftests/kvm/include/s390/ucall.h new file mode 100644 index 000000000000..8035a872a351 --- /dev/null +++ b/tools/testing/selftests/kvm/include/s390/ucall.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef SELFTEST_KVM_UCALL_H +#define SELFTEST_KVM_UCALL_H + +#include "kvm_util.h" + +#define UCALL_EXIT_REASON KVM_EXIT_S390_SIEIC + +static inline void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) +{ +} + +static inline void ucall_arch_do_ucall(vm_vaddr_t uc) +{ + /* Exit via DIAGNOSE 0x501 (normally used for breakpoints) */ + asm volatile ("diag 0,%0,0x501" : : "a"(uc) : "memory"); +} + +#endif diff --git a/tools/testing/selftests/kvm/include/s390x/debug_print.h b/tools/testing/selftests/kvm/include/s390x/debug_print.h deleted file mode 100644 index 1bf275631cc6..000000000000 --- a/tools/testing/selftests/kvm/include/s390x/debug_print.h +++ /dev/null @@ -1,69 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Definition for kernel virtual machines on s390x - * - * Copyright IBM Corp. 2024 - * - * Authors: - * Christoph Schlameuss - */ - -#ifndef SELFTEST_KVM_DEBUG_PRINT_H -#define SELFTEST_KVM_DEBUG_PRINT_H - -#include "asm/ptrace.h" -#include "kvm_util.h" -#include "sie.h" - -static inline void print_hex_bytes(const char *name, u64 addr, size_t len) -{ - u64 pos; - - pr_debug("%s (%p)\n", name, (void *)addr); - pr_debug(" 0/0x00---------|"); - if (len > 8) - pr_debug(" 8/0x08---------|"); - if (len > 16) - pr_debug(" 16/0x10--------|"); - if (len > 24) - pr_debug(" 24/0x18--------|"); - for (pos = 0; pos < len; pos += 8) { - if ((pos % 32) == 0) - pr_debug("\n %3lu 0x%.3lx ", pos, pos); - pr_debug(" %16lx", *((u64 *)(addr + pos))); - } - pr_debug("\n"); -} - -static inline void print_hex(const char *name, u64 addr) -{ - print_hex_bytes(name, addr, 512); -} - -static inline void print_psw(struct kvm_run *run, struct kvm_s390_sie_block *sie_block) -{ - pr_debug("flags:0x%x psw:0x%.16llx:0x%.16llx exit:%u %s\n", - run->flags, - run->psw_mask, run->psw_addr, - run->exit_reason, exit_reason_str(run->exit_reason)); - pr_debug("sie_block psw:0x%.16llx:0x%.16llx\n", - sie_block->psw_mask, sie_block->psw_addr); -} - -static inline void print_run(struct kvm_run *run, struct kvm_s390_sie_block *sie_block) -{ - print_hex_bytes("run", (u64)run, 0x150); - print_hex("sie_block", (u64)sie_block); - print_psw(run, sie_block); -} - -static inline void print_regs(struct kvm_run *run) -{ - struct kvm_sync_regs *sync_regs = &run->s.regs; - - print_hex_bytes("GPRS", (u64)sync_regs->gprs, 8 * NUM_GPRS); - print_hex_bytes("ACRS", (u64)sync_regs->acrs, 4 * NUM_ACRS); - print_hex_bytes("CRS", (u64)sync_regs->crs, 8 * NUM_CRS); -} - -#endif /* SELFTEST_KVM_DEBUG_PRINT_H */ diff --git a/tools/testing/selftests/kvm/include/s390x/diag318_test_handler.h b/tools/testing/selftests/kvm/include/s390x/diag318_test_handler.h deleted file mode 100644 index b0ed71302722..000000000000 --- a/tools/testing/selftests/kvm/include/s390x/diag318_test_handler.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later - * - * Test handler for the s390x DIAGNOSE 0x0318 instruction. - * - * Copyright (C) 2020, IBM - */ - -#ifndef SELFTEST_KVM_DIAG318_TEST_HANDLER -#define SELFTEST_KVM_DIAG318_TEST_HANDLER - -uint64_t get_diag318_info(void); - -#endif diff --git a/tools/testing/selftests/kvm/include/s390x/facility.h b/tools/testing/selftests/kvm/include/s390x/facility.h deleted file mode 100644 index 00a1ced6538b..000000000000 --- a/tools/testing/selftests/kvm/include/s390x/facility.h +++ /dev/null @@ -1,50 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright IBM Corp. 2024 - * - * Authors: - * Hariharan Mari - * - * Get the facility bits with the STFLE instruction - */ - -#ifndef SELFTEST_KVM_FACILITY_H -#define SELFTEST_KVM_FACILITY_H - -#include - -/* alt_stfle_fac_list[16] + stfle_fac_list[16] */ -#define NB_STFL_DOUBLEWORDS 32 - -extern uint64_t stfl_doublewords[NB_STFL_DOUBLEWORDS]; -extern bool stfle_flag; - -static inline bool test_bit_inv(unsigned long nr, const unsigned long *ptr) -{ - return test_bit(nr ^ (BITS_PER_LONG - 1), ptr); -} - -static inline void stfle(uint64_t *fac, unsigned int nb_doublewords) -{ - register unsigned long r0 asm("0") = nb_doublewords - 1; - - asm volatile(" .insn s,0xb2b00000,0(%1)\n" - : "+d" (r0) - : "a" (fac) - : "memory", "cc"); -} - -static inline void setup_facilities(void) -{ - stfle(stfl_doublewords, NB_STFL_DOUBLEWORDS); - stfle_flag = true; -} - -static inline bool test_facility(int nr) -{ - if (!stfle_flag) - setup_facilities(); - return test_bit_inv(nr, stfl_doublewords); -} - -#endif diff --git a/tools/testing/selftests/kvm/include/s390x/kvm_util_arch.h b/tools/testing/selftests/kvm/include/s390x/kvm_util_arch.h deleted file mode 100644 index e43a57d99b56..000000000000 --- a/tools/testing/selftests/kvm/include/s390x/kvm_util_arch.h +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -#ifndef SELFTEST_KVM_UTIL_ARCH_H -#define SELFTEST_KVM_UTIL_ARCH_H - -struct kvm_vm_arch {}; - -#endif // SELFTEST_KVM_UTIL_ARCH_H diff --git a/tools/testing/selftests/kvm/include/s390x/processor.h b/tools/testing/selftests/kvm/include/s390x/processor.h deleted file mode 100644 index 33fef6fd9617..000000000000 --- a/tools/testing/selftests/kvm/include/s390x/processor.h +++ /dev/null @@ -1,41 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * s390x processor specific defines - */ -#ifndef SELFTEST_KVM_PROCESSOR_H -#define SELFTEST_KVM_PROCESSOR_H - -#include - -/* Bits in the region/segment table entry */ -#define REGION_ENTRY_ORIGIN ~0xfffUL /* region/segment table origin */ -#define REGION_ENTRY_PROTECT 0x200 /* region protection bit */ -#define REGION_ENTRY_NOEXEC 0x100 /* region no-execute bit */ -#define REGION_ENTRY_OFFSET 0xc0 /* region table offset */ -#define REGION_ENTRY_INVALID 0x20 /* invalid region table entry */ -#define REGION_ENTRY_TYPE 0x0c /* region/segment table type mask */ -#define REGION_ENTRY_LENGTH 0x03 /* region third length */ - -/* Bits in the page table entry */ -#define PAGE_INVALID 0x400 /* HW invalid bit */ -#define PAGE_PROTECT 0x200 /* HW read-only bit */ -#define PAGE_NOEXEC 0x100 /* HW no-execute bit */ - -/* Page size definitions */ -#define PAGE_SHIFT 12 -#define PAGE_SIZE BIT_ULL(PAGE_SHIFT) -#define PAGE_MASK (~(PAGE_SIZE - 1)) - -/* Is there a portable way to do this? */ -static inline void cpu_relax(void) -{ - barrier(); -} - -/* Get the instruction length */ -static inline int insn_length(unsigned char code) -{ - return ((((int)code + 64) >> 7) + 1) << 1; -} - -#endif diff --git a/tools/testing/selftests/kvm/include/s390x/sie.h b/tools/testing/selftests/kvm/include/s390x/sie.h deleted file mode 100644 index 160acd4a1db9..000000000000 --- a/tools/testing/selftests/kvm/include/s390x/sie.h +++ /dev/null @@ -1,240 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Definition for kernel virtual machines on s390. - * - * Adapted copy of struct definition kvm_s390_sie_block from - * arch/s390/include/asm/kvm_host.h for use in userspace selftest programs. - * - * Copyright IBM Corp. 2008, 2024 - * - * Authors: - * Christoph Schlameuss - * Carsten Otte - */ - -#ifndef SELFTEST_KVM_SIE_H -#define SELFTEST_KVM_SIE_H - -#include - -struct kvm_s390_sie_block { -#define CPUSTAT_STOPPED 0x80000000 -#define CPUSTAT_WAIT 0x10000000 -#define CPUSTAT_ECALL_PEND 0x08000000 -#define CPUSTAT_STOP_INT 0x04000000 -#define CPUSTAT_IO_INT 0x02000000 -#define CPUSTAT_EXT_INT 0x01000000 -#define CPUSTAT_RUNNING 0x00800000 -#define CPUSTAT_RETAINED 0x00400000 -#define CPUSTAT_TIMING_SUB 0x00020000 -#define CPUSTAT_SIE_SUB 0x00010000 -#define CPUSTAT_RRF 0x00008000 -#define CPUSTAT_SLSV 0x00004000 -#define CPUSTAT_SLSR 0x00002000 -#define CPUSTAT_ZARCH 0x00000800 -#define CPUSTAT_MCDS 0x00000100 -#define CPUSTAT_KSS 0x00000200 -#define CPUSTAT_SM 0x00000080 -#define CPUSTAT_IBS 0x00000040 -#define CPUSTAT_GED2 0x00000010 -#define CPUSTAT_G 0x00000008 -#define CPUSTAT_GED 0x00000004 -#define CPUSTAT_J 0x00000002 -#define CPUSTAT_P 0x00000001 - __u32 cpuflags; /* 0x0000 */ - __u32: 1; /* 0x0004 */ - __u32 prefix : 18; - __u32: 1; - __u32 ibc : 12; - __u8 reserved08[4]; /* 0x0008 */ -#define PROG_IN_SIE BIT(0) - __u32 prog0c; /* 0x000c */ - union { - __u8 reserved10[16]; /* 0x0010 */ - struct { - __u64 pv_handle_cpu; - __u64 pv_handle_config; - }; - }; -#define PROG_BLOCK_SIE BIT(0) -#define PROG_REQUEST BIT(1) - __u32 prog20; /* 0x0020 */ - __u8 reserved24[4]; /* 0x0024 */ - __u64 cputm; /* 0x0028 */ - __u64 ckc; /* 0x0030 */ - __u64 epoch; /* 0x0038 */ - __u32 svcc; /* 0x0040 */ -#define LCTL_CR0 0x8000 -#define LCTL_CR6 0x0200 -#define LCTL_CR9 0x0040 -#define LCTL_CR10 0x0020 -#define LCTL_CR11 0x0010 -#define LCTL_CR14 0x0002 - __u16 lctl; /* 0x0044 */ - __s16 icpua; /* 0x0046 */ -#define ICTL_OPEREXC 0x80000000 -#define ICTL_PINT 0x20000000 -#define ICTL_LPSW 0x00400000 -#define ICTL_STCTL 0x00040000 -#define ICTL_ISKE 0x00004000 -#define ICTL_SSKE 0x00002000 -#define ICTL_RRBE 0x00001000 -#define ICTL_TPROT 0x00000200 - __u32 ictl; /* 0x0048 */ -#define ECA_CEI 0x80000000 -#define ECA_IB 0x40000000 -#define ECA_SIGPI 0x10000000 -#define ECA_MVPGI 0x01000000 -#define ECA_AIV 0x00200000 -#define ECA_VX 0x00020000 -#define ECA_PROTEXCI 0x00002000 -#define ECA_APIE 0x00000008 -#define ECA_SII 0x00000001 - __u32 eca; /* 0x004c */ -#define ICPT_INST 0x04 -#define ICPT_PROGI 0x08 -#define ICPT_INSTPROGI 0x0C -#define ICPT_EXTREQ 0x10 -#define ICPT_EXTINT 0x14 -#define ICPT_IOREQ 0x18 -#define ICPT_WAIT 0x1c -#define ICPT_VALIDITY 0x20 -#define ICPT_STOP 0x28 -#define ICPT_OPEREXC 0x2C -#define ICPT_PARTEXEC 0x38 -#define ICPT_IOINST 0x40 -#define ICPT_KSS 0x5c -#define ICPT_MCHKREQ 0x60 -#define ICPT_INT_ENABLE 0x64 -#define ICPT_PV_INSTR 0x68 -#define ICPT_PV_NOTIFY 0x6c -#define ICPT_PV_PREF 0x70 - __u8 icptcode; /* 0x0050 */ - __u8 icptstatus; /* 0x0051 */ - __u16 ihcpu; /* 0x0052 */ - __u8 reserved54; /* 0x0054 */ -#define IICTL_CODE_NONE 0x00 -#define IICTL_CODE_MCHK 0x01 -#define IICTL_CODE_EXT 0x02 -#define IICTL_CODE_IO 0x03 -#define IICTL_CODE_RESTART 0x04 -#define IICTL_CODE_SPECIFICATION 0x10 -#define IICTL_CODE_OPERAND 0x11 - __u8 iictl; /* 0x0055 */ - __u16 ipa; /* 0x0056 */ - __u32 ipb; /* 0x0058 */ - __u32 scaoh; /* 0x005c */ -#define FPF_BPBC 0x20 - __u8 fpf; /* 0x0060 */ -#define ECB_GS 0x40 -#define ECB_TE 0x10 -#define ECB_SPECI 0x08 -#define ECB_SRSI 0x04 -#define ECB_HOSTPROTINT 0x02 -#define ECB_PTF 0x01 - __u8 ecb; /* 0x0061 */ -#define ECB2_CMMA 0x80 -#define ECB2_IEP 0x20 -#define ECB2_PFMFI 0x08 -#define ECB2_ESCA 0x04 -#define ECB2_ZPCI_LSI 0x02 - __u8 ecb2; /* 0x0062 */ -#define ECB3_AISI 0x20 -#define ECB3_AISII 0x10 -#define ECB3_DEA 0x08 -#define ECB3_AES 0x04 -#define ECB3_RI 0x01 - __u8 ecb3; /* 0x0063 */ -#define ESCA_SCAOL_MASK ~0x3fU - __u32 scaol; /* 0x0064 */ - __u8 sdf; /* 0x0068 */ - __u8 epdx; /* 0x0069 */ - __u8 cpnc; /* 0x006a */ - __u8 reserved6b; /* 0x006b */ - __u32 todpr; /* 0x006c */ -#define GISA_FORMAT1 0x00000001 - __u32 gd; /* 0x0070 */ - __u8 reserved74[12]; /* 0x0074 */ - __u64 mso; /* 0x0080 */ - __u64 msl; /* 0x0088 */ - __u64 psw_mask; /* 0x0090 */ - __u64 psw_addr; /* 0x0098 */ - __u64 gg14; /* 0x00a0 */ - __u64 gg15; /* 0x00a8 */ - __u8 reservedb0[8]; /* 0x00b0 */ -#define HPID_KVM 0x4 -#define HPID_VSIE 0x5 - __u8 hpid; /* 0x00b8 */ - __u8 reservedb9[7]; /* 0x00b9 */ - union { - struct { - __u32 eiparams; /* 0x00c0 */ - __u16 extcpuaddr; /* 0x00c4 */ - __u16 eic; /* 0x00c6 */ - }; - __u64 mcic; /* 0x00c0 */ - } __packed; - __u32 reservedc8; /* 0x00c8 */ - union { - struct { - __u16 pgmilc; /* 0x00cc */ - __u16 iprcc; /* 0x00ce */ - }; - __u32 edc; /* 0x00cc */ - } __packed; - union { - struct { - __u32 dxc; /* 0x00d0 */ - __u16 mcn; /* 0x00d4 */ - __u8 perc; /* 0x00d6 */ - __u8 peratmid; /* 0x00d7 */ - }; - __u64 faddr; /* 0x00d0 */ - } __packed; - __u64 peraddr; /* 0x00d8 */ - __u8 eai; /* 0x00e0 */ - __u8 peraid; /* 0x00e1 */ - __u8 oai; /* 0x00e2 */ - __u8 armid; /* 0x00e3 */ - __u8 reservede4[4]; /* 0x00e4 */ - union { - __u64 tecmc; /* 0x00e8 */ - struct { - __u16 subchannel_id; /* 0x00e8 */ - __u16 subchannel_nr; /* 0x00ea */ - __u32 io_int_parm; /* 0x00ec */ - __u32 io_int_word; /* 0x00f0 */ - }; - } __packed; - __u8 reservedf4[8]; /* 0x00f4 */ -#define CRYCB_FORMAT_MASK 0x00000003 -#define CRYCB_FORMAT0 0x00000000 -#define CRYCB_FORMAT1 0x00000001 -#define CRYCB_FORMAT2 0x00000003 - __u32 crycbd; /* 0x00fc */ - __u64 gcr[16]; /* 0x0100 */ - union { - __u64 gbea; /* 0x0180 */ - __u64 sidad; - }; - __u8 reserved188[8]; /* 0x0188 */ - __u64 sdnxo; /* 0x0190 */ - __u8 reserved198[8]; /* 0x0198 */ - __u32 fac; /* 0x01a0 */ - __u8 reserved1a4[20]; /* 0x01a4 */ - __u64 cbrlo; /* 0x01b8 */ - __u8 reserved1c0[8]; /* 0x01c0 */ -#define ECD_HOSTREGMGMT 0x20000000 -#define ECD_MEF 0x08000000 -#define ECD_ETOKENF 0x02000000 -#define ECD_ECC 0x00200000 - __u32 ecd; /* 0x01c8 */ - __u8 reserved1cc[18]; /* 0x01cc */ - __u64 pp; /* 0x01de */ - __u8 reserved1e6[2]; /* 0x01e6 */ - __u64 itdba; /* 0x01e8 */ - __u64 riccbd; /* 0x01f0 */ - __u64 gvrd; /* 0x01f8 */ -} __packed __aligned(512); - -#endif /* SELFTEST_KVM_SIE_H */ diff --git a/tools/testing/selftests/kvm/include/s390x/ucall.h b/tools/testing/selftests/kvm/include/s390x/ucall.h deleted file mode 100644 index 8035a872a351..000000000000 --- a/tools/testing/selftests/kvm/include/s390x/ucall.h +++ /dev/null @@ -1,19 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -#ifndef SELFTEST_KVM_UCALL_H -#define SELFTEST_KVM_UCALL_H - -#include "kvm_util.h" - -#define UCALL_EXIT_REASON KVM_EXIT_S390_SIEIC - -static inline void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) -{ -} - -static inline void ucall_arch_do_ucall(vm_vaddr_t uc) -{ - /* Exit via DIAGNOSE 0x501 (normally used for breakpoints) */ - asm volatile ("diag 0,%0,0x501" : : "a"(uc) : "memory"); -} - -#endif diff --git a/tools/testing/selftests/kvm/include/x86/apic.h b/tools/testing/selftests/kvm/include/x86/apic.h new file mode 100644 index 000000000000..80fe9f69b38d --- /dev/null +++ b/tools/testing/selftests/kvm/include/x86/apic.h @@ -0,0 +1,118 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2021, Google LLC. + */ + +#ifndef SELFTEST_KVM_APIC_H +#define SELFTEST_KVM_APIC_H + +#include + +#include "processor.h" +#include "ucall_common.h" + +#define APIC_DEFAULT_GPA 0xfee00000ULL + +/* APIC base address MSR and fields */ +#define MSR_IA32_APICBASE 0x0000001b +#define MSR_IA32_APICBASE_BSP (1<<8) +#define MSR_IA32_APICBASE_EXTD (1<<10) +#define MSR_IA32_APICBASE_ENABLE (1<<11) +#define MSR_IA32_APICBASE_BASE (0xfffff<<12) +#define GET_APIC_BASE(x) (((x) >> 12) << 12) + +#define APIC_BASE_MSR 0x800 +#define X2APIC_ENABLE (1UL << 10) +#define APIC_ID 0x20 +#define APIC_LVR 0x30 +#define GET_APIC_ID_FIELD(x) (((x) >> 24) & 0xFF) +#define APIC_TASKPRI 0x80 +#define APIC_PROCPRI 0xA0 +#define APIC_EOI 0xB0 +#define APIC_SPIV 0xF0 +#define APIC_SPIV_FOCUS_DISABLED (1 << 9) +#define APIC_SPIV_APIC_ENABLED (1 << 8) +#define APIC_IRR 0x200 +#define APIC_ICR 0x300 +#define APIC_LVTCMCI 0x2f0 +#define APIC_DEST_SELF 0x40000 +#define APIC_DEST_ALLINC 0x80000 +#define APIC_DEST_ALLBUT 0xC0000 +#define APIC_ICR_RR_MASK 0x30000 +#define APIC_ICR_RR_INVALID 0x00000 +#define APIC_ICR_RR_INPROG 0x10000 +#define APIC_ICR_RR_VALID 0x20000 +#define APIC_INT_LEVELTRIG 0x08000 +#define APIC_INT_ASSERT 0x04000 +#define APIC_ICR_BUSY 0x01000 +#define APIC_DEST_LOGICAL 0x00800 +#define APIC_DEST_PHYSICAL 0x00000 +#define APIC_DM_FIXED 0x00000 +#define APIC_DM_FIXED_MASK 0x00700 +#define APIC_DM_LOWEST 0x00100 +#define APIC_DM_SMI 0x00200 +#define APIC_DM_REMRD 0x00300 +#define APIC_DM_NMI 0x00400 +#define APIC_DM_INIT 0x00500 +#define APIC_DM_STARTUP 0x00600 +#define APIC_DM_EXTINT 0x00700 +#define APIC_VECTOR_MASK 0x000FF +#define APIC_ICR2 0x310 +#define SET_APIC_DEST_FIELD(x) ((x) << 24) +#define APIC_LVTT 0x320 +#define APIC_LVT_TIMER_ONESHOT (0 << 17) +#define APIC_LVT_TIMER_PERIODIC (1 << 17) +#define APIC_LVT_TIMER_TSCDEADLINE (2 << 17) +#define APIC_LVT_MASKED (1 << 16) +#define APIC_TMICT 0x380 +#define APIC_TMCCT 0x390 +#define APIC_TDCR 0x3E0 + +void apic_disable(void); +void xapic_enable(void); +void x2apic_enable(void); + +static inline uint32_t get_bsp_flag(void) +{ + return rdmsr(MSR_IA32_APICBASE) & MSR_IA32_APICBASE_BSP; +} + +static inline uint32_t xapic_read_reg(unsigned int reg) +{ + return ((volatile uint32_t *)APIC_DEFAULT_GPA)[reg >> 2]; +} + +static inline void xapic_write_reg(unsigned int reg, uint32_t val) +{ + ((volatile uint32_t *)APIC_DEFAULT_GPA)[reg >> 2] = val; +} + +static inline uint64_t x2apic_read_reg(unsigned int reg) +{ + return rdmsr(APIC_BASE_MSR + (reg >> 4)); +} + +static inline uint8_t x2apic_write_reg_safe(unsigned int reg, uint64_t value) +{ + return wrmsr_safe(APIC_BASE_MSR + (reg >> 4), value); +} + +static inline void x2apic_write_reg(unsigned int reg, uint64_t value) +{ + uint8_t fault = x2apic_write_reg_safe(reg, value); + + __GUEST_ASSERT(!fault, "Unexpected fault 0x%x on WRMSR(%x) = %lx\n", + fault, APIC_BASE_MSR + (reg >> 4), value); +} + +static inline void x2apic_write_reg_fault(unsigned int reg, uint64_t value) +{ + uint8_t fault = x2apic_write_reg_safe(reg, value); + + __GUEST_ASSERT(fault == GP_VECTOR, + "Wanted #GP on WRMSR(%x) = %lx, got 0x%x\n", + APIC_BASE_MSR + (reg >> 4), value, fault); +} + + +#endif /* SELFTEST_KVM_APIC_H */ diff --git a/tools/testing/selftests/kvm/include/x86/evmcs.h b/tools/testing/selftests/kvm/include/x86/evmcs.h new file mode 100644 index 000000000000..5a74bb30e2f8 --- /dev/null +++ b/tools/testing/selftests/kvm/include/x86/evmcs.h @@ -0,0 +1,1276 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2018, Red Hat, Inc. + */ + +#ifndef SELFTEST_KVM_EVMCS_H +#define SELFTEST_KVM_EVMCS_H + +#include +#include "hyperv.h" +#include "vmx.h" + +#define u16 uint16_t +#define u32 uint32_t +#define u64 uint64_t + +#define EVMCS_VERSION 1 + +extern bool enable_evmcs; + +struct hv_enlightened_vmcs { + u32 revision_id; + u32 abort; + + u16 host_es_selector; + u16 host_cs_selector; + u16 host_ss_selector; + u16 host_ds_selector; + u16 host_fs_selector; + u16 host_gs_selector; + u16 host_tr_selector; + + u16 padding16_1; + + u64 host_ia32_pat; + u64 host_ia32_efer; + + u64 host_cr0; + u64 host_cr3; + u64 host_cr4; + + u64 host_ia32_sysenter_esp; + u64 host_ia32_sysenter_eip; + u64 host_rip; + u32 host_ia32_sysenter_cs; + + u32 pin_based_vm_exec_control; + u32 vm_exit_controls; + u32 secondary_vm_exec_control; + + u64 io_bitmap_a; + u64 io_bitmap_b; + u64 msr_bitmap; + + u16 guest_es_selector; + u16 guest_cs_selector; + u16 guest_ss_selector; + u16 guest_ds_selector; + u16 guest_fs_selector; + u16 guest_gs_selector; + u16 guest_ldtr_selector; + u16 guest_tr_selector; + + u32 guest_es_limit; + u32 guest_cs_limit; + u32 guest_ss_limit; + u32 guest_ds_limit; + u32 guest_fs_limit; + u32 guest_gs_limit; + u32 guest_ldtr_limit; + u32 guest_tr_limit; + u32 guest_gdtr_limit; + u32 guest_idtr_limit; + + u32 guest_es_ar_bytes; + u32 guest_cs_ar_bytes; + u32 guest_ss_ar_bytes; + u32 guest_ds_ar_bytes; + u32 guest_fs_ar_bytes; + u32 guest_gs_ar_bytes; + u32 guest_ldtr_ar_bytes; + u32 guest_tr_ar_bytes; + + u64 guest_es_base; + u64 guest_cs_base; + u64 guest_ss_base; + u64 guest_ds_base; + u64 guest_fs_base; + u64 guest_gs_base; + u64 guest_ldtr_base; + u64 guest_tr_base; + u64 guest_gdtr_base; + u64 guest_idtr_base; + + u64 padding64_1[3]; + + u64 vm_exit_msr_store_addr; + u64 vm_exit_msr_load_addr; + u64 vm_entry_msr_load_addr; + + u64 cr3_target_value0; + u64 cr3_target_value1; + u64 cr3_target_value2; + u64 cr3_target_value3; + + u32 page_fault_error_code_mask; + u32 page_fault_error_code_match; + + u32 cr3_target_count; + u32 vm_exit_msr_store_count; + u32 vm_exit_msr_load_count; + u32 vm_entry_msr_load_count; + + u64 tsc_offset; + u64 virtual_apic_page_addr; + u64 vmcs_link_pointer; + + u64 guest_ia32_debugctl; + u64 guest_ia32_pat; + u64 guest_ia32_efer; + + u64 guest_pdptr0; + u64 guest_pdptr1; + u64 guest_pdptr2; + u64 guest_pdptr3; + + u64 guest_pending_dbg_exceptions; + u64 guest_sysenter_esp; + u64 guest_sysenter_eip; + + u32 guest_activity_state; + u32 guest_sysenter_cs; + + u64 cr0_guest_host_mask; + u64 cr4_guest_host_mask; + u64 cr0_read_shadow; + u64 cr4_read_shadow; + u64 guest_cr0; + u64 guest_cr3; + u64 guest_cr4; + u64 guest_dr7; + + u64 host_fs_base; + u64 host_gs_base; + u64 host_tr_base; + u64 host_gdtr_base; + u64 host_idtr_base; + u64 host_rsp; + + u64 ept_pointer; + + u16 virtual_processor_id; + u16 padding16_2[3]; + + u64 padding64_2[5]; + u64 guest_physical_address; + + u32 vm_instruction_error; + u32 vm_exit_reason; + u32 vm_exit_intr_info; + u32 vm_exit_intr_error_code; + u32 idt_vectoring_info_field; + u32 idt_vectoring_error_code; + u32 vm_exit_instruction_len; + u32 vmx_instruction_info; + + u64 exit_qualification; + u64 exit_io_instruction_ecx; + u64 exit_io_instruction_esi; + u64 exit_io_instruction_edi; + u64 exit_io_instruction_eip; + + u64 guest_linear_address; + u64 guest_rsp; + u64 guest_rflags; + + u32 guest_interruptibility_info; + u32 cpu_based_vm_exec_control; + u32 exception_bitmap; + u32 vm_entry_controls; + u32 vm_entry_intr_info_field; + u32 vm_entry_exception_error_code; + u32 vm_entry_instruction_len; + u32 tpr_threshold; + + u64 guest_rip; + + u32 hv_clean_fields; + u32 padding32_1; + u32 hv_synthetic_controls; + struct { + u32 nested_flush_hypercall:1; + u32 msr_bitmap:1; + u32 reserved:30; + } __packed hv_enlightenments_control; + u32 hv_vp_id; + u32 padding32_2; + u64 hv_vm_id; + u64 partition_assist_page; + u64 padding64_4[4]; + u64 guest_bndcfgs; + u64 guest_ia32_perf_global_ctrl; + u64 guest_ia32_s_cet; + u64 guest_ssp; + u64 guest_ia32_int_ssp_table_addr; + u64 guest_ia32_lbr_ctl; + u64 padding64_5[2]; + u64 xss_exit_bitmap; + u64 encls_exiting_bitmap; + u64 host_ia32_perf_global_ctrl; + u64 tsc_multiplier; + u64 host_ia32_s_cet; + u64 host_ssp; + u64 host_ia32_int_ssp_table_addr; + u64 padding64_6; +} __packed; + +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE 0 +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP BIT(0) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP BIT(1) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2 BIT(2) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1 BIT(3) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC BIT(4) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT BIT(5) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY BIT(6) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN BIT(7) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR BIT(8) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT BIT(9) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC BIT(10) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1 BIT(11) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2 BIT(12) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER BIT(13) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1 BIT(14) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ENLIGHTENMENTSCONTROL BIT(15) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL 0xFFFF + +#define HV_VMX_SYNTHETIC_EXIT_REASON_TRAP_AFTER_FLUSH 0x10000031 + +extern struct hv_enlightened_vmcs *current_evmcs; + +int vcpu_enable_evmcs(struct kvm_vcpu *vcpu); + +static inline void evmcs_enable(void) +{ + enable_evmcs = true; +} + +static inline int evmcs_vmptrld(uint64_t vmcs_pa, void *vmcs) +{ + current_vp_assist->current_nested_vmcs = vmcs_pa; + current_vp_assist->enlighten_vmentry = 1; + + current_evmcs = vmcs; + + return 0; +} + +static inline bool load_evmcs(struct hyperv_test_pages *hv) +{ + if (evmcs_vmptrld(hv->enlightened_vmcs_gpa, hv->enlightened_vmcs)) + return false; + + current_evmcs->revision_id = EVMCS_VERSION; + + return true; +} + +static inline int evmcs_vmptrst(uint64_t *value) +{ + *value = current_vp_assist->current_nested_vmcs & + ~HV_X64_MSR_VP_ASSIST_PAGE_ENABLE; + + return 0; +} + +static inline int evmcs_vmread(uint64_t encoding, uint64_t *value) +{ + switch (encoding) { + case GUEST_RIP: + *value = current_evmcs->guest_rip; + break; + case GUEST_RSP: + *value = current_evmcs->guest_rsp; + break; + case GUEST_RFLAGS: + *value = current_evmcs->guest_rflags; + break; + case HOST_IA32_PAT: + *value = current_evmcs->host_ia32_pat; + break; + case HOST_IA32_EFER: + *value = current_evmcs->host_ia32_efer; + break; + case HOST_CR0: + *value = current_evmcs->host_cr0; + break; + case HOST_CR3: + *value = current_evmcs->host_cr3; + break; + case HOST_CR4: + *value = current_evmcs->host_cr4; + break; + case HOST_IA32_SYSENTER_ESP: + *value = current_evmcs->host_ia32_sysenter_esp; + break; + case HOST_IA32_SYSENTER_EIP: + *value = current_evmcs->host_ia32_sysenter_eip; + break; + case HOST_RIP: + *value = current_evmcs->host_rip; + break; + case IO_BITMAP_A: + *value = current_evmcs->io_bitmap_a; + break; + case IO_BITMAP_B: + *value = current_evmcs->io_bitmap_b; + break; + case MSR_BITMAP: + *value = current_evmcs->msr_bitmap; + break; + case GUEST_ES_BASE: + *value = current_evmcs->guest_es_base; + break; + case GUEST_CS_BASE: + *value = current_evmcs->guest_cs_base; + break; + case GUEST_SS_BASE: + *value = current_evmcs->guest_ss_base; + break; + case GUEST_DS_BASE: + *value = current_evmcs->guest_ds_base; + break; + case GUEST_FS_BASE: + *value = current_evmcs->guest_fs_base; + break; + case GUEST_GS_BASE: + *value = current_evmcs->guest_gs_base; + break; + case GUEST_LDTR_BASE: + *value = current_evmcs->guest_ldtr_base; + break; + case GUEST_TR_BASE: + *value = current_evmcs->guest_tr_base; + break; + case GUEST_GDTR_BASE: + *value = current_evmcs->guest_gdtr_base; + break; + case GUEST_IDTR_BASE: + *value = current_evmcs->guest_idtr_base; + break; + case TSC_OFFSET: + *value = current_evmcs->tsc_offset; + break; + case VIRTUAL_APIC_PAGE_ADDR: + *value = current_evmcs->virtual_apic_page_addr; + break; + case VMCS_LINK_POINTER: + *value = current_evmcs->vmcs_link_pointer; + break; + case GUEST_IA32_DEBUGCTL: + *value = current_evmcs->guest_ia32_debugctl; + break; + case GUEST_IA32_PAT: + *value = current_evmcs->guest_ia32_pat; + break; + case GUEST_IA32_EFER: + *value = current_evmcs->guest_ia32_efer; + break; + case GUEST_PDPTR0: + *value = current_evmcs->guest_pdptr0; + break; + case GUEST_PDPTR1: + *value = current_evmcs->guest_pdptr1; + break; + case GUEST_PDPTR2: + *value = current_evmcs->guest_pdptr2; + break; + case GUEST_PDPTR3: + *value = current_evmcs->guest_pdptr3; + break; + case GUEST_PENDING_DBG_EXCEPTIONS: + *value = current_evmcs->guest_pending_dbg_exceptions; + break; + case GUEST_SYSENTER_ESP: + *value = current_evmcs->guest_sysenter_esp; + break; + case GUEST_SYSENTER_EIP: + *value = current_evmcs->guest_sysenter_eip; + break; + case CR0_GUEST_HOST_MASK: + *value = current_evmcs->cr0_guest_host_mask; + break; + case CR4_GUEST_HOST_MASK: + *value = current_evmcs->cr4_guest_host_mask; + break; + case CR0_READ_SHADOW: + *value = current_evmcs->cr0_read_shadow; + break; + case CR4_READ_SHADOW: + *value = current_evmcs->cr4_read_shadow; + break; + case GUEST_CR0: + *value = current_evmcs->guest_cr0; + break; + case GUEST_CR3: + *value = current_evmcs->guest_cr3; + break; + case GUEST_CR4: + *value = current_evmcs->guest_cr4; + break; + case GUEST_DR7: + *value = current_evmcs->guest_dr7; + break; + case HOST_FS_BASE: + *value = current_evmcs->host_fs_base; + break; + case HOST_GS_BASE: + *value = current_evmcs->host_gs_base; + break; + case HOST_TR_BASE: + *value = current_evmcs->host_tr_base; + break; + case HOST_GDTR_BASE: + *value = current_evmcs->host_gdtr_base; + break; + case HOST_IDTR_BASE: + *value = current_evmcs->host_idtr_base; + break; + case HOST_RSP: + *value = current_evmcs->host_rsp; + break; + case EPT_POINTER: + *value = current_evmcs->ept_pointer; + break; + case GUEST_BNDCFGS: + *value = current_evmcs->guest_bndcfgs; + break; + case XSS_EXIT_BITMAP: + *value = current_evmcs->xss_exit_bitmap; + break; + case GUEST_PHYSICAL_ADDRESS: + *value = current_evmcs->guest_physical_address; + break; + case EXIT_QUALIFICATION: + *value = current_evmcs->exit_qualification; + break; + case GUEST_LINEAR_ADDRESS: + *value = current_evmcs->guest_linear_address; + break; + case VM_EXIT_MSR_STORE_ADDR: + *value = current_evmcs->vm_exit_msr_store_addr; + break; + case VM_EXIT_MSR_LOAD_ADDR: + *value = current_evmcs->vm_exit_msr_load_addr; + break; + case VM_ENTRY_MSR_LOAD_ADDR: + *value = current_evmcs->vm_entry_msr_load_addr; + break; + case CR3_TARGET_VALUE0: + *value = current_evmcs->cr3_target_value0; + break; + case CR3_TARGET_VALUE1: + *value = current_evmcs->cr3_target_value1; + break; + case CR3_TARGET_VALUE2: + *value = current_evmcs->cr3_target_value2; + break; + case CR3_TARGET_VALUE3: + *value = current_evmcs->cr3_target_value3; + break; + case TPR_THRESHOLD: + *value = current_evmcs->tpr_threshold; + break; + case GUEST_INTERRUPTIBILITY_INFO: + *value = current_evmcs->guest_interruptibility_info; + break; + case CPU_BASED_VM_EXEC_CONTROL: + *value = current_evmcs->cpu_based_vm_exec_control; + break; + case EXCEPTION_BITMAP: + *value = current_evmcs->exception_bitmap; + break; + case VM_ENTRY_CONTROLS: + *value = current_evmcs->vm_entry_controls; + break; + case VM_ENTRY_INTR_INFO_FIELD: + *value = current_evmcs->vm_entry_intr_info_field; + break; + case VM_ENTRY_EXCEPTION_ERROR_CODE: + *value = current_evmcs->vm_entry_exception_error_code; + break; + case VM_ENTRY_INSTRUCTION_LEN: + *value = current_evmcs->vm_entry_instruction_len; + break; + case HOST_IA32_SYSENTER_CS: + *value = current_evmcs->host_ia32_sysenter_cs; + break; + case PIN_BASED_VM_EXEC_CONTROL: + *value = current_evmcs->pin_based_vm_exec_control; + break; + case VM_EXIT_CONTROLS: + *value = current_evmcs->vm_exit_controls; + break; + case SECONDARY_VM_EXEC_CONTROL: + *value = current_evmcs->secondary_vm_exec_control; + break; + case GUEST_ES_LIMIT: + *value = current_evmcs->guest_es_limit; + break; + case GUEST_CS_LIMIT: + *value = current_evmcs->guest_cs_limit; + break; + case GUEST_SS_LIMIT: + *value = current_evmcs->guest_ss_limit; + break; + case GUEST_DS_LIMIT: + *value = current_evmcs->guest_ds_limit; + break; + case GUEST_FS_LIMIT: + *value = current_evmcs->guest_fs_limit; + break; + case GUEST_GS_LIMIT: + *value = current_evmcs->guest_gs_limit; + break; + case GUEST_LDTR_LIMIT: + *value = current_evmcs->guest_ldtr_limit; + break; + case GUEST_TR_LIMIT: + *value = current_evmcs->guest_tr_limit; + break; + case GUEST_GDTR_LIMIT: + *value = current_evmcs->guest_gdtr_limit; + break; + case GUEST_IDTR_LIMIT: + *value = current_evmcs->guest_idtr_limit; + break; + case GUEST_ES_AR_BYTES: + *value = current_evmcs->guest_es_ar_bytes; + break; + case GUEST_CS_AR_BYTES: + *value = current_evmcs->guest_cs_ar_bytes; + break; + case GUEST_SS_AR_BYTES: + *value = current_evmcs->guest_ss_ar_bytes; + break; + case GUEST_DS_AR_BYTES: + *value = current_evmcs->guest_ds_ar_bytes; + break; + case GUEST_FS_AR_BYTES: + *value = current_evmcs->guest_fs_ar_bytes; + break; + case GUEST_GS_AR_BYTES: + *value = current_evmcs->guest_gs_ar_bytes; + break; + case GUEST_LDTR_AR_BYTES: + *value = current_evmcs->guest_ldtr_ar_bytes; + break; + case GUEST_TR_AR_BYTES: + *value = current_evmcs->guest_tr_ar_bytes; + break; + case GUEST_ACTIVITY_STATE: + *value = current_evmcs->guest_activity_state; + break; + case GUEST_SYSENTER_CS: + *value = current_evmcs->guest_sysenter_cs; + break; + case VM_INSTRUCTION_ERROR: + *value = current_evmcs->vm_instruction_error; + break; + case VM_EXIT_REASON: + *value = current_evmcs->vm_exit_reason; + break; + case VM_EXIT_INTR_INFO: + *value = current_evmcs->vm_exit_intr_info; + break; + case VM_EXIT_INTR_ERROR_CODE: + *value = current_evmcs->vm_exit_intr_error_code; + break; + case IDT_VECTORING_INFO_FIELD: + *value = current_evmcs->idt_vectoring_info_field; + break; + case IDT_VECTORING_ERROR_CODE: + *value = current_evmcs->idt_vectoring_error_code; + break; + case VM_EXIT_INSTRUCTION_LEN: + *value = current_evmcs->vm_exit_instruction_len; + break; + case VMX_INSTRUCTION_INFO: + *value = current_evmcs->vmx_instruction_info; + break; + case PAGE_FAULT_ERROR_CODE_MASK: + *value = current_evmcs->page_fault_error_code_mask; + break; + case PAGE_FAULT_ERROR_CODE_MATCH: + *value = current_evmcs->page_fault_error_code_match; + break; + case CR3_TARGET_COUNT: + *value = current_evmcs->cr3_target_count; + break; + case VM_EXIT_MSR_STORE_COUNT: + *value = current_evmcs->vm_exit_msr_store_count; + break; + case VM_EXIT_MSR_LOAD_COUNT: + *value = current_evmcs->vm_exit_msr_load_count; + break; + case VM_ENTRY_MSR_LOAD_COUNT: + *value = current_evmcs->vm_entry_msr_load_count; + break; + case HOST_ES_SELECTOR: + *value = current_evmcs->host_es_selector; + break; + case HOST_CS_SELECTOR: + *value = current_evmcs->host_cs_selector; + break; + case HOST_SS_SELECTOR: + *value = current_evmcs->host_ss_selector; + break; + case HOST_DS_SELECTOR: + *value = current_evmcs->host_ds_selector; + break; + case HOST_FS_SELECTOR: + *value = current_evmcs->host_fs_selector; + break; + case HOST_GS_SELECTOR: + *value = current_evmcs->host_gs_selector; + break; + case HOST_TR_SELECTOR: + *value = current_evmcs->host_tr_selector; + break; + case GUEST_ES_SELECTOR: + *value = current_evmcs->guest_es_selector; + break; + case GUEST_CS_SELECTOR: + *value = current_evmcs->guest_cs_selector; + break; + case GUEST_SS_SELECTOR: + *value = current_evmcs->guest_ss_selector; + break; + case GUEST_DS_SELECTOR: + *value = current_evmcs->guest_ds_selector; + break; + case GUEST_FS_SELECTOR: + *value = current_evmcs->guest_fs_selector; + break; + case GUEST_GS_SELECTOR: + *value = current_evmcs->guest_gs_selector; + break; + case GUEST_LDTR_SELECTOR: + *value = current_evmcs->guest_ldtr_selector; + break; + case GUEST_TR_SELECTOR: + *value = current_evmcs->guest_tr_selector; + break; + case VIRTUAL_PROCESSOR_ID: + *value = current_evmcs->virtual_processor_id; + break; + case HOST_IA32_PERF_GLOBAL_CTRL: + *value = current_evmcs->host_ia32_perf_global_ctrl; + break; + case GUEST_IA32_PERF_GLOBAL_CTRL: + *value = current_evmcs->guest_ia32_perf_global_ctrl; + break; + case ENCLS_EXITING_BITMAP: + *value = current_evmcs->encls_exiting_bitmap; + break; + case TSC_MULTIPLIER: + *value = current_evmcs->tsc_multiplier; + break; + default: return 1; + } + + return 0; +} + +static inline int evmcs_vmwrite(uint64_t encoding, uint64_t value) +{ + switch (encoding) { + case GUEST_RIP: + current_evmcs->guest_rip = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE; + break; + case GUEST_RSP: + current_evmcs->guest_rsp = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC; + break; + case GUEST_RFLAGS: + current_evmcs->guest_rflags = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC; + break; + case HOST_IA32_PAT: + current_evmcs->host_ia32_pat = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1; + break; + case HOST_IA32_EFER: + current_evmcs->host_ia32_efer = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1; + break; + case HOST_CR0: + current_evmcs->host_cr0 = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1; + break; + case HOST_CR3: + current_evmcs->host_cr3 = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1; + break; + case HOST_CR4: + current_evmcs->host_cr4 = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1; + break; + case HOST_IA32_SYSENTER_ESP: + current_evmcs->host_ia32_sysenter_esp = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1; + break; + case HOST_IA32_SYSENTER_EIP: + current_evmcs->host_ia32_sysenter_eip = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1; + break; + case HOST_RIP: + current_evmcs->host_rip = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1; + break; + case IO_BITMAP_A: + current_evmcs->io_bitmap_a = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP; + break; + case IO_BITMAP_B: + current_evmcs->io_bitmap_b = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP; + break; + case MSR_BITMAP: + current_evmcs->msr_bitmap = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP; + break; + case GUEST_ES_BASE: + current_evmcs->guest_es_base = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; + break; + case GUEST_CS_BASE: + current_evmcs->guest_cs_base = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; + break; + case GUEST_SS_BASE: + current_evmcs->guest_ss_base = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; + break; + case GUEST_DS_BASE: + current_evmcs->guest_ds_base = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; + break; + case GUEST_FS_BASE: + current_evmcs->guest_fs_base = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; + break; + case GUEST_GS_BASE: + current_evmcs->guest_gs_base = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; + break; + case GUEST_LDTR_BASE: + current_evmcs->guest_ldtr_base = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; + break; + case GUEST_TR_BASE: + current_evmcs->guest_tr_base = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; + break; + case GUEST_GDTR_BASE: + current_evmcs->guest_gdtr_base = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; + break; + case GUEST_IDTR_BASE: + current_evmcs->guest_idtr_base = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; + break; + case TSC_OFFSET: + current_evmcs->tsc_offset = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2; + break; + case VIRTUAL_APIC_PAGE_ADDR: + current_evmcs->virtual_apic_page_addr = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2; + break; + case VMCS_LINK_POINTER: + current_evmcs->vmcs_link_pointer = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1; + break; + case GUEST_IA32_DEBUGCTL: + current_evmcs->guest_ia32_debugctl = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1; + break; + case GUEST_IA32_PAT: + current_evmcs->guest_ia32_pat = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1; + break; + case GUEST_IA32_EFER: + current_evmcs->guest_ia32_efer = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1; + break; + case GUEST_PDPTR0: + current_evmcs->guest_pdptr0 = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1; + break; + case GUEST_PDPTR1: + current_evmcs->guest_pdptr1 = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1; + break; + case GUEST_PDPTR2: + current_evmcs->guest_pdptr2 = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1; + break; + case GUEST_PDPTR3: + current_evmcs->guest_pdptr3 = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1; + break; + case GUEST_PENDING_DBG_EXCEPTIONS: + current_evmcs->guest_pending_dbg_exceptions = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1; + break; + case GUEST_SYSENTER_ESP: + current_evmcs->guest_sysenter_esp = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1; + break; + case GUEST_SYSENTER_EIP: + current_evmcs->guest_sysenter_eip = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1; + break; + case CR0_GUEST_HOST_MASK: + current_evmcs->cr0_guest_host_mask = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR; + break; + case CR4_GUEST_HOST_MASK: + current_evmcs->cr4_guest_host_mask = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR; + break; + case CR0_READ_SHADOW: + current_evmcs->cr0_read_shadow = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR; + break; + case CR4_READ_SHADOW: + current_evmcs->cr4_read_shadow = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR; + break; + case GUEST_CR0: + current_evmcs->guest_cr0 = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR; + break; + case GUEST_CR3: + current_evmcs->guest_cr3 = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR; + break; + case GUEST_CR4: + current_evmcs->guest_cr4 = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR; + break; + case GUEST_DR7: + current_evmcs->guest_dr7 = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR; + break; + case HOST_FS_BASE: + current_evmcs->host_fs_base = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER; + break; + case HOST_GS_BASE: + current_evmcs->host_gs_base = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER; + break; + case HOST_TR_BASE: + current_evmcs->host_tr_base = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER; + break; + case HOST_GDTR_BASE: + current_evmcs->host_gdtr_base = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER; + break; + case HOST_IDTR_BASE: + current_evmcs->host_idtr_base = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER; + break; + case HOST_RSP: + current_evmcs->host_rsp = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER; + break; + case EPT_POINTER: + current_evmcs->ept_pointer = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT; + break; + case GUEST_BNDCFGS: + current_evmcs->guest_bndcfgs = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1; + break; + case XSS_EXIT_BITMAP: + current_evmcs->xss_exit_bitmap = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2; + break; + case GUEST_PHYSICAL_ADDRESS: + current_evmcs->guest_physical_address = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE; + break; + case EXIT_QUALIFICATION: + current_evmcs->exit_qualification = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE; + break; + case GUEST_LINEAR_ADDRESS: + current_evmcs->guest_linear_address = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE; + break; + case VM_EXIT_MSR_STORE_ADDR: + current_evmcs->vm_exit_msr_store_addr = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; + break; + case VM_EXIT_MSR_LOAD_ADDR: + current_evmcs->vm_exit_msr_load_addr = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; + break; + case VM_ENTRY_MSR_LOAD_ADDR: + current_evmcs->vm_entry_msr_load_addr = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; + break; + case CR3_TARGET_VALUE0: + current_evmcs->cr3_target_value0 = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; + break; + case CR3_TARGET_VALUE1: + current_evmcs->cr3_target_value1 = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; + break; + case CR3_TARGET_VALUE2: + current_evmcs->cr3_target_value2 = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; + break; + case CR3_TARGET_VALUE3: + current_evmcs->cr3_target_value3 = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; + break; + case TPR_THRESHOLD: + current_evmcs->tpr_threshold = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE; + break; + case GUEST_INTERRUPTIBILITY_INFO: + current_evmcs->guest_interruptibility_info = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC; + break; + case CPU_BASED_VM_EXEC_CONTROL: + current_evmcs->cpu_based_vm_exec_control = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC; + break; + case EXCEPTION_BITMAP: + current_evmcs->exception_bitmap = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN; + break; + case VM_ENTRY_CONTROLS: + current_evmcs->vm_entry_controls = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY; + break; + case VM_ENTRY_INTR_INFO_FIELD: + current_evmcs->vm_entry_intr_info_field = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT; + break; + case VM_ENTRY_EXCEPTION_ERROR_CODE: + current_evmcs->vm_entry_exception_error_code = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT; + break; + case VM_ENTRY_INSTRUCTION_LEN: + current_evmcs->vm_entry_instruction_len = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT; + break; + case HOST_IA32_SYSENTER_CS: + current_evmcs->host_ia32_sysenter_cs = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1; + break; + case PIN_BASED_VM_EXEC_CONTROL: + current_evmcs->pin_based_vm_exec_control = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1; + break; + case VM_EXIT_CONTROLS: + current_evmcs->vm_exit_controls = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1; + break; + case SECONDARY_VM_EXEC_CONTROL: + current_evmcs->secondary_vm_exec_control = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1; + break; + case GUEST_ES_LIMIT: + current_evmcs->guest_es_limit = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; + break; + case GUEST_CS_LIMIT: + current_evmcs->guest_cs_limit = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; + break; + case GUEST_SS_LIMIT: + current_evmcs->guest_ss_limit = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; + break; + case GUEST_DS_LIMIT: + current_evmcs->guest_ds_limit = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; + break; + case GUEST_FS_LIMIT: + current_evmcs->guest_fs_limit = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; + break; + case GUEST_GS_LIMIT: + current_evmcs->guest_gs_limit = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; + break; + case GUEST_LDTR_LIMIT: + current_evmcs->guest_ldtr_limit = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; + break; + case GUEST_TR_LIMIT: + current_evmcs->guest_tr_limit = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; + break; + case GUEST_GDTR_LIMIT: + current_evmcs->guest_gdtr_limit = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; + break; + case GUEST_IDTR_LIMIT: + current_evmcs->guest_idtr_limit = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; + break; + case GUEST_ES_AR_BYTES: + current_evmcs->guest_es_ar_bytes = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; + break; + case GUEST_CS_AR_BYTES: + current_evmcs->guest_cs_ar_bytes = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; + break; + case GUEST_SS_AR_BYTES: + current_evmcs->guest_ss_ar_bytes = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; + break; + case GUEST_DS_AR_BYTES: + current_evmcs->guest_ds_ar_bytes = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; + break; + case GUEST_FS_AR_BYTES: + current_evmcs->guest_fs_ar_bytes = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; + break; + case GUEST_GS_AR_BYTES: + current_evmcs->guest_gs_ar_bytes = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; + break; + case GUEST_LDTR_AR_BYTES: + current_evmcs->guest_ldtr_ar_bytes = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; + break; + case GUEST_TR_AR_BYTES: + current_evmcs->guest_tr_ar_bytes = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; + break; + case GUEST_ACTIVITY_STATE: + current_evmcs->guest_activity_state = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1; + break; + case GUEST_SYSENTER_CS: + current_evmcs->guest_sysenter_cs = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1; + break; + case VM_INSTRUCTION_ERROR: + current_evmcs->vm_instruction_error = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE; + break; + case VM_EXIT_REASON: + current_evmcs->vm_exit_reason = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE; + break; + case VM_EXIT_INTR_INFO: + current_evmcs->vm_exit_intr_info = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE; + break; + case VM_EXIT_INTR_ERROR_CODE: + current_evmcs->vm_exit_intr_error_code = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE; + break; + case IDT_VECTORING_INFO_FIELD: + current_evmcs->idt_vectoring_info_field = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE; + break; + case IDT_VECTORING_ERROR_CODE: + current_evmcs->idt_vectoring_error_code = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE; + break; + case VM_EXIT_INSTRUCTION_LEN: + current_evmcs->vm_exit_instruction_len = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE; + break; + case VMX_INSTRUCTION_INFO: + current_evmcs->vmx_instruction_info = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE; + break; + case PAGE_FAULT_ERROR_CODE_MASK: + current_evmcs->page_fault_error_code_mask = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; + break; + case PAGE_FAULT_ERROR_CODE_MATCH: + current_evmcs->page_fault_error_code_match = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; + break; + case CR3_TARGET_COUNT: + current_evmcs->cr3_target_count = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; + break; + case VM_EXIT_MSR_STORE_COUNT: + current_evmcs->vm_exit_msr_store_count = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; + break; + case VM_EXIT_MSR_LOAD_COUNT: + current_evmcs->vm_exit_msr_load_count = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; + break; + case VM_ENTRY_MSR_LOAD_COUNT: + current_evmcs->vm_entry_msr_load_count = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; + break; + case HOST_ES_SELECTOR: + current_evmcs->host_es_selector = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1; + break; + case HOST_CS_SELECTOR: + current_evmcs->host_cs_selector = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1; + break; + case HOST_SS_SELECTOR: + current_evmcs->host_ss_selector = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1; + break; + case HOST_DS_SELECTOR: + current_evmcs->host_ds_selector = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1; + break; + case HOST_FS_SELECTOR: + current_evmcs->host_fs_selector = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1; + break; + case HOST_GS_SELECTOR: + current_evmcs->host_gs_selector = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1; + break; + case HOST_TR_SELECTOR: + current_evmcs->host_tr_selector = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1; + break; + case GUEST_ES_SELECTOR: + current_evmcs->guest_es_selector = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; + break; + case GUEST_CS_SELECTOR: + current_evmcs->guest_cs_selector = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; + break; + case GUEST_SS_SELECTOR: + current_evmcs->guest_ss_selector = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; + break; + case GUEST_DS_SELECTOR: + current_evmcs->guest_ds_selector = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; + break; + case GUEST_FS_SELECTOR: + current_evmcs->guest_fs_selector = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; + break; + case GUEST_GS_SELECTOR: + current_evmcs->guest_gs_selector = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; + break; + case GUEST_LDTR_SELECTOR: + current_evmcs->guest_ldtr_selector = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; + break; + case GUEST_TR_SELECTOR: + current_evmcs->guest_tr_selector = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; + break; + case VIRTUAL_PROCESSOR_ID: + current_evmcs->virtual_processor_id = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT; + break; + case HOST_IA32_PERF_GLOBAL_CTRL: + current_evmcs->host_ia32_perf_global_ctrl = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1; + break; + case GUEST_IA32_PERF_GLOBAL_CTRL: + current_evmcs->guest_ia32_perf_global_ctrl = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1; + break; + case ENCLS_EXITING_BITMAP: + current_evmcs->encls_exiting_bitmap = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2; + break; + case TSC_MULTIPLIER: + current_evmcs->tsc_multiplier = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2; + break; + default: return 1; + } + + return 0; +} + +static inline int evmcs_vmlaunch(void) +{ + int ret; + + current_evmcs->hv_clean_fields = 0; + + __asm__ __volatile__("push %%rbp;" + "push %%rcx;" + "push %%rdx;" + "push %%rsi;" + "push %%rdi;" + "push $0;" + "mov %%rsp, (%[host_rsp]);" + "lea 1f(%%rip), %%rax;" + "mov %%rax, (%[host_rip]);" + "vmlaunch;" + "incq (%%rsp);" + "1: pop %%rax;" + "pop %%rdi;" + "pop %%rsi;" + "pop %%rdx;" + "pop %%rcx;" + "pop %%rbp;" + : [ret]"=&a"(ret) + : [host_rsp]"r" + ((uint64_t)¤t_evmcs->host_rsp), + [host_rip]"r" + ((uint64_t)¤t_evmcs->host_rip) + : "memory", "cc", "rbx", "r8", "r9", "r10", + "r11", "r12", "r13", "r14", "r15"); + return ret; +} + +/* + * No guest state (e.g. GPRs) is established by this vmresume. + */ +static inline int evmcs_vmresume(void) +{ + int ret; + + /* HOST_RIP */ + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1; + /* HOST_RSP */ + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER; + + __asm__ __volatile__("push %%rbp;" + "push %%rcx;" + "push %%rdx;" + "push %%rsi;" + "push %%rdi;" + "push $0;" + "mov %%rsp, (%[host_rsp]);" + "lea 1f(%%rip), %%rax;" + "mov %%rax, (%[host_rip]);" + "vmresume;" + "incq (%%rsp);" + "1: pop %%rax;" + "pop %%rdi;" + "pop %%rsi;" + "pop %%rdx;" + "pop %%rcx;" + "pop %%rbp;" + : [ret]"=&a"(ret) + : [host_rsp]"r" + ((uint64_t)¤t_evmcs->host_rsp), + [host_rip]"r" + ((uint64_t)¤t_evmcs->host_rip) + : "memory", "cc", "rbx", "r8", "r9", "r10", + "r11", "r12", "r13", "r14", "r15"); + return ret; +} + +#endif /* !SELFTEST_KVM_EVMCS_H */ diff --git a/tools/testing/selftests/kvm/include/x86/hyperv.h b/tools/testing/selftests/kvm/include/x86/hyperv.h new file mode 100644 index 000000000000..f13e532be240 --- /dev/null +++ b/tools/testing/selftests/kvm/include/x86/hyperv.h @@ -0,0 +1,361 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2021, Red Hat, Inc. + */ + +#ifndef SELFTEST_KVM_HYPERV_H +#define SELFTEST_KVM_HYPERV_H + +#include "processor.h" + +#define HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS 0x40000000 +#define HYPERV_CPUID_INTERFACE 0x40000001 +#define HYPERV_CPUID_VERSION 0x40000002 +#define HYPERV_CPUID_FEATURES 0x40000003 +#define HYPERV_CPUID_ENLIGHTMENT_INFO 0x40000004 +#define HYPERV_CPUID_IMPLEMENT_LIMITS 0x40000005 +#define HYPERV_CPUID_CPU_MANAGEMENT_FEATURES 0x40000007 +#define HYPERV_CPUID_NESTED_FEATURES 0x4000000A +#define HYPERV_CPUID_SYNDBG_VENDOR_AND_MAX_FUNCTIONS 0x40000080 +#define HYPERV_CPUID_SYNDBG_INTERFACE 0x40000081 +#define HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES 0x40000082 + +#define HV_X64_MSR_GUEST_OS_ID 0x40000000 +#define HV_X64_MSR_HYPERCALL 0x40000001 +#define HV_X64_MSR_VP_INDEX 0x40000002 +#define HV_X64_MSR_RESET 0x40000003 +#define HV_X64_MSR_VP_RUNTIME 0x40000010 +#define HV_X64_MSR_TIME_REF_COUNT 0x40000020 +#define HV_X64_MSR_REFERENCE_TSC 0x40000021 +#define HV_X64_MSR_TSC_FREQUENCY 0x40000022 +#define HV_X64_MSR_APIC_FREQUENCY 0x40000023 +#define HV_X64_MSR_EOI 0x40000070 +#define HV_X64_MSR_ICR 0x40000071 +#define HV_X64_MSR_TPR 0x40000072 +#define HV_X64_MSR_VP_ASSIST_PAGE 0x40000073 +#define HV_X64_MSR_SCONTROL 0x40000080 +#define HV_X64_MSR_SVERSION 0x40000081 +#define HV_X64_MSR_SIEFP 0x40000082 +#define HV_X64_MSR_SIMP 0x40000083 +#define HV_X64_MSR_EOM 0x40000084 +#define HV_X64_MSR_SINT0 0x40000090 +#define HV_X64_MSR_SINT1 0x40000091 +#define HV_X64_MSR_SINT2 0x40000092 +#define HV_X64_MSR_SINT3 0x40000093 +#define HV_X64_MSR_SINT4 0x40000094 +#define HV_X64_MSR_SINT5 0x40000095 +#define HV_X64_MSR_SINT6 0x40000096 +#define HV_X64_MSR_SINT7 0x40000097 +#define HV_X64_MSR_SINT8 0x40000098 +#define HV_X64_MSR_SINT9 0x40000099 +#define HV_X64_MSR_SINT10 0x4000009A +#define HV_X64_MSR_SINT11 0x4000009B +#define HV_X64_MSR_SINT12 0x4000009C +#define HV_X64_MSR_SINT13 0x4000009D +#define HV_X64_MSR_SINT14 0x4000009E +#define HV_X64_MSR_SINT15 0x4000009F +#define HV_X64_MSR_STIMER0_CONFIG 0x400000B0 +#define HV_X64_MSR_STIMER0_COUNT 0x400000B1 +#define HV_X64_MSR_STIMER1_CONFIG 0x400000B2 +#define HV_X64_MSR_STIMER1_COUNT 0x400000B3 +#define HV_X64_MSR_STIMER2_CONFIG 0x400000B4 +#define HV_X64_MSR_STIMER2_COUNT 0x400000B5 +#define HV_X64_MSR_STIMER3_CONFIG 0x400000B6 +#define HV_X64_MSR_STIMER3_COUNT 0x400000B7 +#define HV_X64_MSR_GUEST_IDLE 0x400000F0 +#define HV_X64_MSR_CRASH_P0 0x40000100 +#define HV_X64_MSR_CRASH_P1 0x40000101 +#define HV_X64_MSR_CRASH_P2 0x40000102 +#define HV_X64_MSR_CRASH_P3 0x40000103 +#define HV_X64_MSR_CRASH_P4 0x40000104 +#define HV_X64_MSR_CRASH_CTL 0x40000105 +#define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106 +#define HV_X64_MSR_TSC_EMULATION_CONTROL 0x40000107 +#define HV_X64_MSR_TSC_EMULATION_STATUS 0x40000108 +#define HV_X64_MSR_TSC_INVARIANT_CONTROL 0x40000118 + +#define HV_X64_MSR_SYNDBG_CONTROL 0x400000F1 +#define HV_X64_MSR_SYNDBG_STATUS 0x400000F2 +#define HV_X64_MSR_SYNDBG_SEND_BUFFER 0x400000F3 +#define HV_X64_MSR_SYNDBG_RECV_BUFFER 0x400000F4 +#define HV_X64_MSR_SYNDBG_PENDING_BUFFER 0x400000F5 +#define HV_X64_MSR_SYNDBG_OPTIONS 0x400000FF + +/* HYPERV_CPUID_FEATURES.EAX */ +#define HV_MSR_VP_RUNTIME_AVAILABLE \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 0) +#define HV_MSR_TIME_REF_COUNT_AVAILABLE \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 1) +#define HV_MSR_SYNIC_AVAILABLE \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 2) +#define HV_MSR_SYNTIMER_AVAILABLE \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 3) +#define HV_MSR_APIC_ACCESS_AVAILABLE \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 4) +#define HV_MSR_HYPERCALL_AVAILABLE \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 5) +#define HV_MSR_VP_INDEX_AVAILABLE \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 6) +#define HV_MSR_RESET_AVAILABLE \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 7) +#define HV_MSR_STAT_PAGES_AVAILABLE \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 8) +#define HV_MSR_REFERENCE_TSC_AVAILABLE \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 9) +#define HV_MSR_GUEST_IDLE_AVAILABLE \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 10) +#define HV_ACCESS_FREQUENCY_MSRS \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 11) +#define HV_ACCESS_REENLIGHTENMENT \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 13) +#define HV_ACCESS_TSC_INVARIANT \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 15) + +/* HYPERV_CPUID_FEATURES.EBX */ +#define HV_CREATE_PARTITIONS \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 0) +#define HV_ACCESS_PARTITION_ID \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 1) +#define HV_ACCESS_MEMORY_POOL \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 2) +#define HV_ADJUST_MESSAGE_BUFFERS \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 3) +#define HV_POST_MESSAGES \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 4) +#define HV_SIGNAL_EVENTS \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 5) +#define HV_CREATE_PORT \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 6) +#define HV_CONNECT_PORT \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 7) +#define HV_ACCESS_STATS \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 8) +#define HV_DEBUGGING \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 11) +#define HV_CPU_MANAGEMENT \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 12) +#define HV_ENABLE_EXTENDED_HYPERCALLS \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 20) +#define HV_ISOLATION \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 22) + +/* HYPERV_CPUID_FEATURES.EDX */ +#define HV_X64_MWAIT_AVAILABLE \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 0) +#define HV_X64_GUEST_DEBUGGING_AVAILABLE \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 1) +#define HV_X64_PERF_MONITOR_AVAILABLE \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 2) +#define HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLE \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 3) +#define HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 4) +#define HV_X64_GUEST_IDLE_STATE_AVAILABLE \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 5) +#define HV_FEATURE_FREQUENCY_MSRS_AVAILABLE \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 8) +#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 10) +#define HV_FEATURE_DEBUG_MSRS_AVAILABLE \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 11) +#define HV_STIMER_DIRECT_MODE_AVAILABLE \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 19) + +/* HYPERV_CPUID_ENLIGHTMENT_INFO.EAX */ +#define HV_X64_AS_SWITCH_RECOMMENDED \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 0) +#define HV_X64_LOCAL_TLB_FLUSH_RECOMMENDED \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 1) +#define HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 2) +#define HV_X64_APIC_ACCESS_RECOMMENDED \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 3) +#define HV_X64_SYSTEM_RESET_RECOMMENDED \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 4) +#define HV_X64_RELAXED_TIMING_RECOMMENDED \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 5) +#define HV_DEPRECATING_AEOI_RECOMMENDED \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 9) +#define HV_X64_CLUSTER_IPI_RECOMMENDED \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 10) +#define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 11) +#define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 14) + +/* HYPERV_CPUID_NESTED_FEATURES.EAX */ +#define HV_X64_NESTED_DIRECT_FLUSH \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_NESTED_FEATURES, 0, EAX, 17) +#define HV_X64_NESTED_GUEST_MAPPING_FLUSH \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_NESTED_FEATURES, 0, EAX, 18) +#define HV_X64_NESTED_MSR_BITMAP \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_NESTED_FEATURES, 0, EAX, 19) + +/* HYPERV_CPUID_NESTED_FEATURES.EBX */ +#define HV_X64_NESTED_EVMCS1_PERF_GLOBAL_CTRL \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_NESTED_FEATURES, 0, EBX, 0) + +/* HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES.EAX */ +#define HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES, 0, EAX, 1) + +/* Hypercalls */ +#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE 0x0002 +#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST 0x0003 +#define HVCALL_NOTIFY_LONG_SPIN_WAIT 0x0008 +#define HVCALL_SEND_IPI 0x000b +#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX 0x0013 +#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX 0x0014 +#define HVCALL_SEND_IPI_EX 0x0015 +#define HVCALL_GET_PARTITION_ID 0x0046 +#define HVCALL_DEPOSIT_MEMORY 0x0048 +#define HVCALL_CREATE_VP 0x004e +#define HVCALL_GET_VP_REGISTERS 0x0050 +#define HVCALL_SET_VP_REGISTERS 0x0051 +#define HVCALL_POST_MESSAGE 0x005c +#define HVCALL_SIGNAL_EVENT 0x005d +#define HVCALL_POST_DEBUG_DATA 0x0069 +#define HVCALL_RETRIEVE_DEBUG_DATA 0x006a +#define HVCALL_RESET_DEBUG_SESSION 0x006b +#define HVCALL_ADD_LOGICAL_PROCESSOR 0x0076 +#define HVCALL_MAP_DEVICE_INTERRUPT 0x007c +#define HVCALL_UNMAP_DEVICE_INTERRUPT 0x007d +#define HVCALL_RETARGET_INTERRUPT 0x007e +#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af +#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0 + +/* Extended hypercalls */ +#define HV_EXT_CALL_QUERY_CAPABILITIES 0x8001 + +#define HV_FLUSH_ALL_PROCESSORS BIT(0) +#define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES BIT(1) +#define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY BIT(2) +#define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT BIT(3) + +/* hypercall status code */ +#define HV_STATUS_SUCCESS 0 +#define HV_STATUS_INVALID_HYPERCALL_CODE 2 +#define HV_STATUS_INVALID_HYPERCALL_INPUT 3 +#define HV_STATUS_INVALID_ALIGNMENT 4 +#define HV_STATUS_INVALID_PARAMETER 5 +#define HV_STATUS_ACCESS_DENIED 6 +#define HV_STATUS_OPERATION_DENIED 8 +#define HV_STATUS_INSUFFICIENT_MEMORY 11 +#define HV_STATUS_INVALID_PORT_ID 17 +#define HV_STATUS_INVALID_CONNECTION_ID 18 +#define HV_STATUS_INSUFFICIENT_BUFFERS 19 + +/* hypercall options */ +#define HV_HYPERCALL_FAST_BIT BIT(16) +#define HV_HYPERCALL_VARHEAD_OFFSET 17 +#define HV_HYPERCALL_REP_COMP_OFFSET 32 + +/* + * Issue a Hyper-V hypercall. Returns exception vector raised or 0, 'hv_status' + * is set to the hypercall status (if no exception occurred). + */ +static inline uint8_t __hyperv_hypercall(u64 control, vm_vaddr_t input_address, + vm_vaddr_t output_address, + uint64_t *hv_status) +{ + uint64_t error_code; + uint8_t vector; + + /* Note both the hypercall and the "asm safe" clobber r9-r11. */ + asm volatile("mov %[output_address], %%r8\n\t" + KVM_ASM_SAFE("vmcall") + : "=a" (*hv_status), + "+c" (control), "+d" (input_address), + KVM_ASM_SAFE_OUTPUTS(vector, error_code) + : [output_address] "r"(output_address), + "a" (-EFAULT) + : "cc", "memory", "r8", KVM_ASM_SAFE_CLOBBERS); + return vector; +} + +/* Issue a Hyper-V hypercall and assert that it succeeded. */ +static inline void hyperv_hypercall(u64 control, vm_vaddr_t input_address, + vm_vaddr_t output_address) +{ + uint64_t hv_status; + uint8_t vector; + + vector = __hyperv_hypercall(control, input_address, output_address, &hv_status); + + GUEST_ASSERT(!vector); + GUEST_ASSERT((hv_status & 0xffff) == 0); +} + +/* Write 'Fast' hypercall input 'data' to the first 'n_sse_regs' SSE regs */ +static inline void hyperv_write_xmm_input(void *data, int n_sse_regs) +{ + int i; + + for (i = 0; i < n_sse_regs; i++) + write_sse_reg(i, (sse128_t *)(data + sizeof(sse128_t) * i)); +} + +/* Proper HV_X64_MSR_GUEST_OS_ID value */ +#define HYPERV_LINUX_OS_ID ((u64)0x8100 << 48) + +#define HV_X64_MSR_VP_ASSIST_PAGE 0x40000073 +#define HV_X64_MSR_VP_ASSIST_PAGE_ENABLE 0x00000001 +#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT 12 +#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK \ + (~((1ull << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) + +struct hv_nested_enlightenments_control { + struct { + __u32 directhypercall:1; + __u32 reserved:31; + } features; + struct { + __u32 reserved; + } hypercallControls; +} __packed; + +/* Define virtual processor assist page structure. */ +struct hv_vp_assist_page { + __u32 apic_assist; + __u32 reserved1; + __u64 vtl_control[3]; + struct hv_nested_enlightenments_control nested_control; + __u8 enlighten_vmentry; + __u8 reserved2[7]; + __u64 current_nested_vmcs; +} __packed; + +extern struct hv_vp_assist_page *current_vp_assist; + +int enable_vp_assist(uint64_t vp_assist_pa, void *vp_assist); + +struct hyperv_test_pages { + /* VP assist page */ + void *vp_assist_hva; + uint64_t vp_assist_gpa; + void *vp_assist; + + /* Partition assist page */ + void *partition_assist_hva; + uint64_t partition_assist_gpa; + void *partition_assist; + + /* Enlightened VMCS */ + void *enlightened_vmcs_hva; + uint64_t enlightened_vmcs_gpa; + void *enlightened_vmcs; +}; + +struct hyperv_test_pages *vcpu_alloc_hyperv_test_pages(struct kvm_vm *vm, + vm_vaddr_t *p_hv_pages_gva); + +/* HV_X64_MSR_TSC_INVARIANT_CONTROL bits */ +#define HV_INVARIANT_TSC_EXPOSED BIT_ULL(0) + +const struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void); +const struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vcpu *vcpu); +void vcpu_set_hv_cpuid(struct kvm_vcpu *vcpu); + +bool kvm_hv_cpu_has(struct kvm_x86_cpu_feature feature); + +#endif /* !SELFTEST_KVM_HYPERV_H */ diff --git a/tools/testing/selftests/kvm/include/x86/kvm_util_arch.h b/tools/testing/selftests/kvm/include/x86/kvm_util_arch.h new file mode 100644 index 000000000000..972bb1c4ab4c --- /dev/null +++ b/tools/testing/selftests/kvm/include/x86/kvm_util_arch.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef SELFTEST_KVM_UTIL_ARCH_H +#define SELFTEST_KVM_UTIL_ARCH_H + +#include +#include + +#include "kvm_util_types.h" +#include "test_util.h" + +extern bool is_forced_emulation_enabled; + +struct kvm_vm_arch { + vm_vaddr_t gdt; + vm_vaddr_t tss; + vm_vaddr_t idt; + + uint64_t c_bit; + uint64_t s_bit; + int sev_fd; + bool is_pt_protected; +}; + +static inline bool __vm_arch_has_protected_memory(struct kvm_vm_arch *arch) +{ + return arch->c_bit || arch->s_bit; +} + +#define vm_arch_has_protected_memory(vm) \ + __vm_arch_has_protected_memory(&(vm)->arch) + +#define vcpu_arch_put_guest(mem, __val) \ +do { \ + const typeof(mem) val = (__val); \ + \ + if (!is_forced_emulation_enabled || guest_random_bool(&guest_rng)) { \ + (mem) = val; \ + } else if (guest_random_bool(&guest_rng)) { \ + __asm__ __volatile__(KVM_FEP "mov %1, %0" \ + : "+m" (mem) \ + : "r" (val) : "memory"); \ + } else { \ + uint64_t __old = READ_ONCE(mem); \ + \ + __asm__ __volatile__(KVM_FEP LOCK_PREFIX "cmpxchg %[new], %[ptr]" \ + : [ptr] "+m" (mem), [old] "+a" (__old) \ + : [new]"r" (val) : "memory", "cc"); \ + } \ +} while (0) + +#endif // SELFTEST_KVM_UTIL_ARCH_H diff --git a/tools/testing/selftests/kvm/include/x86/mce.h b/tools/testing/selftests/kvm/include/x86/mce.h new file mode 100644 index 000000000000..295f2d554754 --- /dev/null +++ b/tools/testing/selftests/kvm/include/x86/mce.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2022, Google LLC. + */ + +#ifndef SELFTEST_KVM_MCE_H +#define SELFTEST_KVM_MCE_H + +#define MCG_CTL_P BIT_ULL(8) /* MCG_CTL register available */ +#define MCG_SER_P BIT_ULL(24) /* MCA recovery/new status bits */ +#define MCG_LMCE_P BIT_ULL(27) /* Local machine check supported */ +#define MCG_CMCI_P BIT_ULL(10) /* CMCI supported */ +#define KVM_MAX_MCE_BANKS 32 +#define MCG_CAP_BANKS_MASK 0xff /* Bit 0-7 of the MCG_CAP register are #banks */ +#define MCI_STATUS_VAL (1ULL << 63) /* valid error */ +#define MCI_STATUS_UC (1ULL << 61) /* uncorrected error */ +#define MCI_STATUS_EN (1ULL << 60) /* error enabled */ +#define MCI_STATUS_MISCV (1ULL << 59) /* misc error reg. valid */ +#define MCI_STATUS_ADDRV (1ULL << 58) /* addr reg. valid */ +#define MCM_ADDR_PHYS 2 /* physical address */ +#define MCI_CTL2_CMCI_EN BIT_ULL(30) + +#endif /* SELFTEST_KVM_MCE_H */ diff --git a/tools/testing/selftests/kvm/include/x86/pmu.h b/tools/testing/selftests/kvm/include/x86/pmu.h new file mode 100644 index 000000000000..3c10c4dc0ae8 --- /dev/null +++ b/tools/testing/selftests/kvm/include/x86/pmu.h @@ -0,0 +1,97 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2023, Tencent, Inc. + */ +#ifndef SELFTEST_KVM_PMU_H +#define SELFTEST_KVM_PMU_H + +#include + +#define KVM_PMU_EVENT_FILTER_MAX_EVENTS 300 + +/* + * Encode an eventsel+umask pair into event-select MSR format. Note, this is + * technically AMD's format, as Intel's format only supports 8 bits for the + * event selector, i.e. doesn't use bits 24:16 for the selector. But, OR-ing + * in '0' is a nop and won't clobber the CMASK. + */ +#define RAW_EVENT(eventsel, umask) (((eventsel & 0xf00UL) << 24) | \ + ((eventsel) & 0xff) | \ + ((umask) & 0xff) << 8) + +/* + * These are technically Intel's definitions, but except for CMASK (see above), + * AMD's layout is compatible with Intel's. + */ +#define ARCH_PERFMON_EVENTSEL_EVENT GENMASK_ULL(7, 0) +#define ARCH_PERFMON_EVENTSEL_UMASK GENMASK_ULL(15, 8) +#define ARCH_PERFMON_EVENTSEL_USR BIT_ULL(16) +#define ARCH_PERFMON_EVENTSEL_OS BIT_ULL(17) +#define ARCH_PERFMON_EVENTSEL_EDGE BIT_ULL(18) +#define ARCH_PERFMON_EVENTSEL_PIN_CONTROL BIT_ULL(19) +#define ARCH_PERFMON_EVENTSEL_INT BIT_ULL(20) +#define ARCH_PERFMON_EVENTSEL_ANY BIT_ULL(21) +#define ARCH_PERFMON_EVENTSEL_ENABLE BIT_ULL(22) +#define ARCH_PERFMON_EVENTSEL_INV BIT_ULL(23) +#define ARCH_PERFMON_EVENTSEL_CMASK GENMASK_ULL(31, 24) + +/* RDPMC control flags, Intel only. */ +#define INTEL_RDPMC_METRICS BIT_ULL(29) +#define INTEL_RDPMC_FIXED BIT_ULL(30) +#define INTEL_RDPMC_FAST BIT_ULL(31) + +/* Fixed PMC controls, Intel only. */ +#define FIXED_PMC_GLOBAL_CTRL_ENABLE(_idx) BIT_ULL((32 + (_idx))) + +#define FIXED_PMC_KERNEL BIT_ULL(0) +#define FIXED_PMC_USER BIT_ULL(1) +#define FIXED_PMC_ANYTHREAD BIT_ULL(2) +#define FIXED_PMC_ENABLE_PMI BIT_ULL(3) +#define FIXED_PMC_NR_BITS 4 +#define FIXED_PMC_CTRL(_idx, _val) ((_val) << ((_idx) * FIXED_PMC_NR_BITS)) + +#define PMU_CAP_FW_WRITES BIT_ULL(13) +#define PMU_CAP_LBR_FMT 0x3f + +#define INTEL_ARCH_CPU_CYCLES RAW_EVENT(0x3c, 0x00) +#define INTEL_ARCH_INSTRUCTIONS_RETIRED RAW_EVENT(0xc0, 0x00) +#define INTEL_ARCH_REFERENCE_CYCLES RAW_EVENT(0x3c, 0x01) +#define INTEL_ARCH_LLC_REFERENCES RAW_EVENT(0x2e, 0x4f) +#define INTEL_ARCH_LLC_MISSES RAW_EVENT(0x2e, 0x41) +#define INTEL_ARCH_BRANCHES_RETIRED RAW_EVENT(0xc4, 0x00) +#define INTEL_ARCH_BRANCHES_MISPREDICTED RAW_EVENT(0xc5, 0x00) +#define INTEL_ARCH_TOPDOWN_SLOTS RAW_EVENT(0xa4, 0x01) + +#define AMD_ZEN_CORE_CYCLES RAW_EVENT(0x76, 0x00) +#define AMD_ZEN_INSTRUCTIONS_RETIRED RAW_EVENT(0xc0, 0x00) +#define AMD_ZEN_BRANCHES_RETIRED RAW_EVENT(0xc2, 0x00) +#define AMD_ZEN_BRANCHES_MISPREDICTED RAW_EVENT(0xc3, 0x00) + +/* + * Note! The order and thus the index of the architectural events matters as + * support for each event is enumerated via CPUID using the index of the event. + */ +enum intel_pmu_architectural_events { + INTEL_ARCH_CPU_CYCLES_INDEX, + INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX, + INTEL_ARCH_REFERENCE_CYCLES_INDEX, + INTEL_ARCH_LLC_REFERENCES_INDEX, + INTEL_ARCH_LLC_MISSES_INDEX, + INTEL_ARCH_BRANCHES_RETIRED_INDEX, + INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX, + INTEL_ARCH_TOPDOWN_SLOTS_INDEX, + NR_INTEL_ARCH_EVENTS, +}; + +enum amd_pmu_zen_events { + AMD_ZEN_CORE_CYCLES_INDEX, + AMD_ZEN_INSTRUCTIONS_INDEX, + AMD_ZEN_BRANCHES_INDEX, + AMD_ZEN_BRANCH_MISSES_INDEX, + NR_AMD_ZEN_EVENTS, +}; + +extern const uint64_t intel_pmu_arch_events[]; +extern const uint64_t amd_pmu_zen_events[]; + +#endif /* SELFTEST_KVM_PMU_H */ diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h new file mode 100644 index 000000000000..9ec984cf8674 --- /dev/null +++ b/tools/testing/selftests/kvm/include/x86/processor.h @@ -0,0 +1,1395 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2018, Google LLC. + */ + +#ifndef SELFTEST_KVM_PROCESSOR_H +#define SELFTEST_KVM_PROCESSOR_H + +#include +#include +#include + +#include +#include + +#include +#include + +#include "kvm_util.h" +#include "ucall_common.h" + +extern bool host_cpu_is_intel; +extern bool host_cpu_is_amd; +extern uint64_t guest_tsc_khz; + +#ifndef MAX_NR_CPUID_ENTRIES +#define MAX_NR_CPUID_ENTRIES 100 +#endif + +/* Forced emulation prefix, used to invoke the emulator unconditionally. */ +#define KVM_FEP "ud2; .byte 'k', 'v', 'm';" + +#define NMI_VECTOR 0x02 + +#define X86_EFLAGS_FIXED (1u << 1) + +#define X86_CR4_VME (1ul << 0) +#define X86_CR4_PVI (1ul << 1) +#define X86_CR4_TSD (1ul << 2) +#define X86_CR4_DE (1ul << 3) +#define X86_CR4_PSE (1ul << 4) +#define X86_CR4_PAE (1ul << 5) +#define X86_CR4_MCE (1ul << 6) +#define X86_CR4_PGE (1ul << 7) +#define X86_CR4_PCE (1ul << 8) +#define X86_CR4_OSFXSR (1ul << 9) +#define X86_CR4_OSXMMEXCPT (1ul << 10) +#define X86_CR4_UMIP (1ul << 11) +#define X86_CR4_LA57 (1ul << 12) +#define X86_CR4_VMXE (1ul << 13) +#define X86_CR4_SMXE (1ul << 14) +#define X86_CR4_FSGSBASE (1ul << 16) +#define X86_CR4_PCIDE (1ul << 17) +#define X86_CR4_OSXSAVE (1ul << 18) +#define X86_CR4_SMEP (1ul << 20) +#define X86_CR4_SMAP (1ul << 21) +#define X86_CR4_PKE (1ul << 22) + +struct xstate_header { + u64 xstate_bv; + u64 xcomp_bv; + u64 reserved[6]; +} __attribute__((packed)); + +struct xstate { + u8 i387[512]; + struct xstate_header header; + u8 extended_state_area[0]; +} __attribute__ ((packed, aligned (64))); + +#define XFEATURE_MASK_FP BIT_ULL(0) +#define XFEATURE_MASK_SSE BIT_ULL(1) +#define XFEATURE_MASK_YMM BIT_ULL(2) +#define XFEATURE_MASK_BNDREGS BIT_ULL(3) +#define XFEATURE_MASK_BNDCSR BIT_ULL(4) +#define XFEATURE_MASK_OPMASK BIT_ULL(5) +#define XFEATURE_MASK_ZMM_Hi256 BIT_ULL(6) +#define XFEATURE_MASK_Hi16_ZMM BIT_ULL(7) +#define XFEATURE_MASK_PT BIT_ULL(8) +#define XFEATURE_MASK_PKRU BIT_ULL(9) +#define XFEATURE_MASK_PASID BIT_ULL(10) +#define XFEATURE_MASK_CET_USER BIT_ULL(11) +#define XFEATURE_MASK_CET_KERNEL BIT_ULL(12) +#define XFEATURE_MASK_LBR BIT_ULL(15) +#define XFEATURE_MASK_XTILE_CFG BIT_ULL(17) +#define XFEATURE_MASK_XTILE_DATA BIT_ULL(18) + +#define XFEATURE_MASK_AVX512 (XFEATURE_MASK_OPMASK | \ + XFEATURE_MASK_ZMM_Hi256 | \ + XFEATURE_MASK_Hi16_ZMM) +#define XFEATURE_MASK_XTILE (XFEATURE_MASK_XTILE_DATA | \ + XFEATURE_MASK_XTILE_CFG) + +/* Note, these are ordered alphabetically to match kvm_cpuid_entry2. Eww. */ +enum cpuid_output_regs { + KVM_CPUID_EAX, + KVM_CPUID_EBX, + KVM_CPUID_ECX, + KVM_CPUID_EDX +}; + +/* + * Pack the information into a 64-bit value so that each X86_FEATURE_XXX can be + * passed by value with no overhead. + */ +struct kvm_x86_cpu_feature { + u32 function; + u16 index; + u8 reg; + u8 bit; +}; +#define KVM_X86_CPU_FEATURE(fn, idx, gpr, __bit) \ +({ \ + struct kvm_x86_cpu_feature feature = { \ + .function = fn, \ + .index = idx, \ + .reg = KVM_CPUID_##gpr, \ + .bit = __bit, \ + }; \ + \ + kvm_static_assert((fn & 0xc0000000) == 0 || \ + (fn & 0xc0000000) == 0x40000000 || \ + (fn & 0xc0000000) == 0x80000000 || \ + (fn & 0xc0000000) == 0xc0000000); \ + kvm_static_assert(idx < BIT(sizeof(feature.index) * BITS_PER_BYTE)); \ + feature; \ +}) + +/* + * Basic Leafs, a.k.a. Intel defined + */ +#define X86_FEATURE_MWAIT KVM_X86_CPU_FEATURE(0x1, 0, ECX, 3) +#define X86_FEATURE_VMX KVM_X86_CPU_FEATURE(0x1, 0, ECX, 5) +#define X86_FEATURE_SMX KVM_X86_CPU_FEATURE(0x1, 0, ECX, 6) +#define X86_FEATURE_PDCM KVM_X86_CPU_FEATURE(0x1, 0, ECX, 15) +#define X86_FEATURE_PCID KVM_X86_CPU_FEATURE(0x1, 0, ECX, 17) +#define X86_FEATURE_X2APIC KVM_X86_CPU_FEATURE(0x1, 0, ECX, 21) +#define X86_FEATURE_MOVBE KVM_X86_CPU_FEATURE(0x1, 0, ECX, 22) +#define X86_FEATURE_TSC_DEADLINE_TIMER KVM_X86_CPU_FEATURE(0x1, 0, ECX, 24) +#define X86_FEATURE_XSAVE KVM_X86_CPU_FEATURE(0x1, 0, ECX, 26) +#define X86_FEATURE_OSXSAVE KVM_X86_CPU_FEATURE(0x1, 0, ECX, 27) +#define X86_FEATURE_RDRAND KVM_X86_CPU_FEATURE(0x1, 0, ECX, 30) +#define X86_FEATURE_HYPERVISOR KVM_X86_CPU_FEATURE(0x1, 0, ECX, 31) +#define X86_FEATURE_PAE KVM_X86_CPU_FEATURE(0x1, 0, EDX, 6) +#define X86_FEATURE_MCE KVM_X86_CPU_FEATURE(0x1, 0, EDX, 7) +#define X86_FEATURE_APIC KVM_X86_CPU_FEATURE(0x1, 0, EDX, 9) +#define X86_FEATURE_CLFLUSH KVM_X86_CPU_FEATURE(0x1, 0, EDX, 19) +#define X86_FEATURE_XMM KVM_X86_CPU_FEATURE(0x1, 0, EDX, 25) +#define X86_FEATURE_XMM2 KVM_X86_CPU_FEATURE(0x1, 0, EDX, 26) +#define X86_FEATURE_FSGSBASE KVM_X86_CPU_FEATURE(0x7, 0, EBX, 0) +#define X86_FEATURE_TSC_ADJUST KVM_X86_CPU_FEATURE(0x7, 0, EBX, 1) +#define X86_FEATURE_SGX KVM_X86_CPU_FEATURE(0x7, 0, EBX, 2) +#define X86_FEATURE_HLE KVM_X86_CPU_FEATURE(0x7, 0, EBX, 4) +#define X86_FEATURE_SMEP KVM_X86_CPU_FEATURE(0x7, 0, EBX, 7) +#define X86_FEATURE_INVPCID KVM_X86_CPU_FEATURE(0x7, 0, EBX, 10) +#define X86_FEATURE_RTM KVM_X86_CPU_FEATURE(0x7, 0, EBX, 11) +#define X86_FEATURE_MPX KVM_X86_CPU_FEATURE(0x7, 0, EBX, 14) +#define X86_FEATURE_SMAP KVM_X86_CPU_FEATURE(0x7, 0, EBX, 20) +#define X86_FEATURE_PCOMMIT KVM_X86_CPU_FEATURE(0x7, 0, EBX, 22) +#define X86_FEATURE_CLFLUSHOPT KVM_X86_CPU_FEATURE(0x7, 0, EBX, 23) +#define X86_FEATURE_CLWB KVM_X86_CPU_FEATURE(0x7, 0, EBX, 24) +#define X86_FEATURE_UMIP KVM_X86_CPU_FEATURE(0x7, 0, ECX, 2) +#define X86_FEATURE_PKU KVM_X86_CPU_FEATURE(0x7, 0, ECX, 3) +#define X86_FEATURE_OSPKE KVM_X86_CPU_FEATURE(0x7, 0, ECX, 4) +#define X86_FEATURE_LA57 KVM_X86_CPU_FEATURE(0x7, 0, ECX, 16) +#define X86_FEATURE_RDPID KVM_X86_CPU_FEATURE(0x7, 0, ECX, 22) +#define X86_FEATURE_SGX_LC KVM_X86_CPU_FEATURE(0x7, 0, ECX, 30) +#define X86_FEATURE_SHSTK KVM_X86_CPU_FEATURE(0x7, 0, ECX, 7) +#define X86_FEATURE_IBT KVM_X86_CPU_FEATURE(0x7, 0, EDX, 20) +#define X86_FEATURE_AMX_TILE KVM_X86_CPU_FEATURE(0x7, 0, EDX, 24) +#define X86_FEATURE_SPEC_CTRL KVM_X86_CPU_FEATURE(0x7, 0, EDX, 26) +#define X86_FEATURE_ARCH_CAPABILITIES KVM_X86_CPU_FEATURE(0x7, 0, EDX, 29) +#define X86_FEATURE_PKS KVM_X86_CPU_FEATURE(0x7, 0, ECX, 31) +#define X86_FEATURE_XTILECFG KVM_X86_CPU_FEATURE(0xD, 0, EAX, 17) +#define X86_FEATURE_XTILEDATA KVM_X86_CPU_FEATURE(0xD, 0, EAX, 18) +#define X86_FEATURE_XSAVES KVM_X86_CPU_FEATURE(0xD, 1, EAX, 3) +#define X86_FEATURE_XFD KVM_X86_CPU_FEATURE(0xD, 1, EAX, 4) +#define X86_FEATURE_XTILEDATA_XFD KVM_X86_CPU_FEATURE(0xD, 18, ECX, 2) + +/* + * Extended Leafs, a.k.a. AMD defined + */ +#define X86_FEATURE_SVM KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 2) +#define X86_FEATURE_NX KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 20) +#define X86_FEATURE_GBPAGES KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 26) +#define X86_FEATURE_RDTSCP KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 27) +#define X86_FEATURE_LM KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 29) +#define X86_FEATURE_INVTSC KVM_X86_CPU_FEATURE(0x80000007, 0, EDX, 8) +#define X86_FEATURE_RDPRU KVM_X86_CPU_FEATURE(0x80000008, 0, EBX, 4) +#define X86_FEATURE_AMD_IBPB KVM_X86_CPU_FEATURE(0x80000008, 0, EBX, 12) +#define X86_FEATURE_NPT KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 0) +#define X86_FEATURE_LBRV KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 1) +#define X86_FEATURE_NRIPS KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 3) +#define X86_FEATURE_TSCRATEMSR KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 4) +#define X86_FEATURE_PAUSEFILTER KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 10) +#define X86_FEATURE_PFTHRESHOLD KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 12) +#define X86_FEATURE_VGIF KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 16) +#define X86_FEATURE_SEV KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 1) +#define X86_FEATURE_SEV_ES KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 3) + +/* + * KVM defined paravirt features. + */ +#define X86_FEATURE_KVM_CLOCKSOURCE KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 0) +#define X86_FEATURE_KVM_NOP_IO_DELAY KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 1) +#define X86_FEATURE_KVM_MMU_OP KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 2) +#define X86_FEATURE_KVM_CLOCKSOURCE2 KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 3) +#define X86_FEATURE_KVM_ASYNC_PF KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 4) +#define X86_FEATURE_KVM_STEAL_TIME KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 5) +#define X86_FEATURE_KVM_PV_EOI KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 6) +#define X86_FEATURE_KVM_PV_UNHALT KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 7) +/* Bit 8 apparently isn't used?!?! */ +#define X86_FEATURE_KVM_PV_TLB_FLUSH KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 9) +#define X86_FEATURE_KVM_ASYNC_PF_VMEXIT KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 10) +#define X86_FEATURE_KVM_PV_SEND_IPI KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 11) +#define X86_FEATURE_KVM_POLL_CONTROL KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 12) +#define X86_FEATURE_KVM_PV_SCHED_YIELD KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 13) +#define X86_FEATURE_KVM_ASYNC_PF_INT KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 14) +#define X86_FEATURE_KVM_MSI_EXT_DEST_ID KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 15) +#define X86_FEATURE_KVM_HC_MAP_GPA_RANGE KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 16) +#define X86_FEATURE_KVM_MIGRATION_CONTROL KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 17) + +/* + * Same idea as X86_FEATURE_XXX, but X86_PROPERTY_XXX retrieves a multi-bit + * value/property as opposed to a single-bit feature. Again, pack the info + * into a 64-bit value to pass by value with no overhead. + */ +struct kvm_x86_cpu_property { + u32 function; + u8 index; + u8 reg; + u8 lo_bit; + u8 hi_bit; +}; +#define KVM_X86_CPU_PROPERTY(fn, idx, gpr, low_bit, high_bit) \ +({ \ + struct kvm_x86_cpu_property property = { \ + .function = fn, \ + .index = idx, \ + .reg = KVM_CPUID_##gpr, \ + .lo_bit = low_bit, \ + .hi_bit = high_bit, \ + }; \ + \ + kvm_static_assert(low_bit < high_bit); \ + kvm_static_assert((fn & 0xc0000000) == 0 || \ + (fn & 0xc0000000) == 0x40000000 || \ + (fn & 0xc0000000) == 0x80000000 || \ + (fn & 0xc0000000) == 0xc0000000); \ + kvm_static_assert(idx < BIT(sizeof(property.index) * BITS_PER_BYTE)); \ + property; \ +}) + +#define X86_PROPERTY_MAX_BASIC_LEAF KVM_X86_CPU_PROPERTY(0, 0, EAX, 0, 31) +#define X86_PROPERTY_PMU_VERSION KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 0, 7) +#define X86_PROPERTY_PMU_NR_GP_COUNTERS KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 8, 15) +#define X86_PROPERTY_PMU_GP_COUNTERS_BIT_WIDTH KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 16, 23) +#define X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 24, 31) +#define X86_PROPERTY_PMU_EVENTS_MASK KVM_X86_CPU_PROPERTY(0xa, 0, EBX, 0, 7) +#define X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK KVM_X86_CPU_PROPERTY(0xa, 0, ECX, 0, 31) +#define X86_PROPERTY_PMU_NR_FIXED_COUNTERS KVM_X86_CPU_PROPERTY(0xa, 0, EDX, 0, 4) +#define X86_PROPERTY_PMU_FIXED_COUNTERS_BIT_WIDTH KVM_X86_CPU_PROPERTY(0xa, 0, EDX, 5, 12) + +#define X86_PROPERTY_SUPPORTED_XCR0_LO KVM_X86_CPU_PROPERTY(0xd, 0, EAX, 0, 31) +#define X86_PROPERTY_XSTATE_MAX_SIZE_XCR0 KVM_X86_CPU_PROPERTY(0xd, 0, EBX, 0, 31) +#define X86_PROPERTY_XSTATE_MAX_SIZE KVM_X86_CPU_PROPERTY(0xd, 0, ECX, 0, 31) +#define X86_PROPERTY_SUPPORTED_XCR0_HI KVM_X86_CPU_PROPERTY(0xd, 0, EDX, 0, 31) + +#define X86_PROPERTY_XSTATE_TILE_SIZE KVM_X86_CPU_PROPERTY(0xd, 18, EAX, 0, 31) +#define X86_PROPERTY_XSTATE_TILE_OFFSET KVM_X86_CPU_PROPERTY(0xd, 18, EBX, 0, 31) +#define X86_PROPERTY_AMX_MAX_PALETTE_TABLES KVM_X86_CPU_PROPERTY(0x1d, 0, EAX, 0, 31) +#define X86_PROPERTY_AMX_TOTAL_TILE_BYTES KVM_X86_CPU_PROPERTY(0x1d, 1, EAX, 0, 15) +#define X86_PROPERTY_AMX_BYTES_PER_TILE KVM_X86_CPU_PROPERTY(0x1d, 1, EAX, 16, 31) +#define X86_PROPERTY_AMX_BYTES_PER_ROW KVM_X86_CPU_PROPERTY(0x1d, 1, EBX, 0, 15) +#define X86_PROPERTY_AMX_NR_TILE_REGS KVM_X86_CPU_PROPERTY(0x1d, 1, EBX, 16, 31) +#define X86_PROPERTY_AMX_MAX_ROWS KVM_X86_CPU_PROPERTY(0x1d, 1, ECX, 0, 15) + +#define X86_PROPERTY_MAX_KVM_LEAF KVM_X86_CPU_PROPERTY(0x40000000, 0, EAX, 0, 31) + +#define X86_PROPERTY_MAX_EXT_LEAF KVM_X86_CPU_PROPERTY(0x80000000, 0, EAX, 0, 31) +#define X86_PROPERTY_MAX_PHY_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 0, 7) +#define X86_PROPERTY_MAX_VIRT_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 8, 15) +#define X86_PROPERTY_GUEST_MAX_PHY_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 16, 23) +#define X86_PROPERTY_SEV_C_BIT KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 0, 5) +#define X86_PROPERTY_PHYS_ADDR_REDUCTION KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 6, 11) + +#define X86_PROPERTY_MAX_CENTAUR_LEAF KVM_X86_CPU_PROPERTY(0xC0000000, 0, EAX, 0, 31) + +/* + * Intel's architectural PMU events are bizarre. They have a "feature" bit + * that indicates the feature is _not_ supported, and a property that states + * the length of the bit mask of unsupported features. A feature is supported + * if the size of the bit mask is larger than the "unavailable" bit, and said + * bit is not set. Fixed counters also bizarre enumeration, but inverted from + * arch events for general purpose counters. Fixed counters are supported if a + * feature flag is set **OR** the total number of fixed counters is greater + * than index of the counter. + * + * Wrap the events for general purpose and fixed counters to simplify checking + * whether or not a given architectural event is supported. + */ +struct kvm_x86_pmu_feature { + struct kvm_x86_cpu_feature f; +}; +#define KVM_X86_PMU_FEATURE(__reg, __bit) \ +({ \ + struct kvm_x86_pmu_feature feature = { \ + .f = KVM_X86_CPU_FEATURE(0xa, 0, __reg, __bit), \ + }; \ + \ + kvm_static_assert(KVM_CPUID_##__reg == KVM_CPUID_EBX || \ + KVM_CPUID_##__reg == KVM_CPUID_ECX); \ + feature; \ +}) + +#define X86_PMU_FEATURE_CPU_CYCLES KVM_X86_PMU_FEATURE(EBX, 0) +#define X86_PMU_FEATURE_INSNS_RETIRED KVM_X86_PMU_FEATURE(EBX, 1) +#define X86_PMU_FEATURE_REFERENCE_CYCLES KVM_X86_PMU_FEATURE(EBX, 2) +#define X86_PMU_FEATURE_LLC_REFERENCES KVM_X86_PMU_FEATURE(EBX, 3) +#define X86_PMU_FEATURE_LLC_MISSES KVM_X86_PMU_FEATURE(EBX, 4) +#define X86_PMU_FEATURE_BRANCH_INSNS_RETIRED KVM_X86_PMU_FEATURE(EBX, 5) +#define X86_PMU_FEATURE_BRANCHES_MISPREDICTED KVM_X86_PMU_FEATURE(EBX, 6) +#define X86_PMU_FEATURE_TOPDOWN_SLOTS KVM_X86_PMU_FEATURE(EBX, 7) + +#define X86_PMU_FEATURE_INSNS_RETIRED_FIXED KVM_X86_PMU_FEATURE(ECX, 0) +#define X86_PMU_FEATURE_CPU_CYCLES_FIXED KVM_X86_PMU_FEATURE(ECX, 1) +#define X86_PMU_FEATURE_REFERENCE_TSC_CYCLES_FIXED KVM_X86_PMU_FEATURE(ECX, 2) +#define X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED KVM_X86_PMU_FEATURE(ECX, 3) + +static inline unsigned int x86_family(unsigned int eax) +{ + unsigned int x86; + + x86 = (eax >> 8) & 0xf; + + if (x86 == 0xf) + x86 += (eax >> 20) & 0xff; + + return x86; +} + +static inline unsigned int x86_model(unsigned int eax) +{ + return ((eax >> 12) & 0xf0) | ((eax >> 4) & 0x0f); +} + +/* Page table bitfield declarations */ +#define PTE_PRESENT_MASK BIT_ULL(0) +#define PTE_WRITABLE_MASK BIT_ULL(1) +#define PTE_USER_MASK BIT_ULL(2) +#define PTE_ACCESSED_MASK BIT_ULL(5) +#define PTE_DIRTY_MASK BIT_ULL(6) +#define PTE_LARGE_MASK BIT_ULL(7) +#define PTE_GLOBAL_MASK BIT_ULL(8) +#define PTE_NX_MASK BIT_ULL(63) + +#define PHYSICAL_PAGE_MASK GENMASK_ULL(51, 12) + +#define PAGE_SHIFT 12 +#define PAGE_SIZE (1ULL << PAGE_SHIFT) +#define PAGE_MASK (~(PAGE_SIZE-1) & PHYSICAL_PAGE_MASK) + +#define HUGEPAGE_SHIFT(x) (PAGE_SHIFT + (((x) - 1) * 9)) +#define HUGEPAGE_SIZE(x) (1UL << HUGEPAGE_SHIFT(x)) +#define HUGEPAGE_MASK(x) (~(HUGEPAGE_SIZE(x) - 1) & PHYSICAL_PAGE_MASK) + +#define PTE_GET_PA(pte) ((pte) & PHYSICAL_PAGE_MASK) +#define PTE_GET_PFN(pte) (PTE_GET_PA(pte) >> PAGE_SHIFT) + +/* General Registers in 64-Bit Mode */ +struct gpr64_regs { + u64 rax; + u64 rcx; + u64 rdx; + u64 rbx; + u64 rsp; + u64 rbp; + u64 rsi; + u64 rdi; + u64 r8; + u64 r9; + u64 r10; + u64 r11; + u64 r12; + u64 r13; + u64 r14; + u64 r15; +}; + +struct desc64 { + uint16_t limit0; + uint16_t base0; + unsigned base1:8, type:4, s:1, dpl:2, p:1; + unsigned limit1:4, avl:1, l:1, db:1, g:1, base2:8; + uint32_t base3; + uint32_t zero1; +} __attribute__((packed)); + +struct desc_ptr { + uint16_t size; + uint64_t address; +} __attribute__((packed)); + +struct kvm_x86_state { + struct kvm_xsave *xsave; + struct kvm_vcpu_events events; + struct kvm_mp_state mp_state; + struct kvm_regs regs; + struct kvm_xcrs xcrs; + struct kvm_sregs sregs; + struct kvm_debugregs debugregs; + union { + struct kvm_nested_state nested; + char nested_[16384]; + }; + struct kvm_msrs msrs; +}; + +static inline uint64_t get_desc64_base(const struct desc64 *desc) +{ + return ((uint64_t)desc->base3 << 32) | + (desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24)); +} + +static inline uint64_t rdtsc(void) +{ + uint32_t eax, edx; + uint64_t tsc_val; + /* + * The lfence is to wait (on Intel CPUs) until all previous + * instructions have been executed. If software requires RDTSC to be + * executed prior to execution of any subsequent instruction, it can + * execute LFENCE immediately after RDTSC + */ + __asm__ __volatile__("lfence; rdtsc; lfence" : "=a"(eax), "=d"(edx)); + tsc_val = ((uint64_t)edx) << 32 | eax; + return tsc_val; +} + +static inline uint64_t rdtscp(uint32_t *aux) +{ + uint32_t eax, edx; + + __asm__ __volatile__("rdtscp" : "=a"(eax), "=d"(edx), "=c"(*aux)); + return ((uint64_t)edx) << 32 | eax; +} + +static inline uint64_t rdmsr(uint32_t msr) +{ + uint32_t a, d; + + __asm__ __volatile__("rdmsr" : "=a"(a), "=d"(d) : "c"(msr) : "memory"); + + return a | ((uint64_t) d << 32); +} + +static inline void wrmsr(uint32_t msr, uint64_t value) +{ + uint32_t a = value; + uint32_t d = value >> 32; + + __asm__ __volatile__("wrmsr" :: "a"(a), "d"(d), "c"(msr) : "memory"); +} + + +static inline uint16_t inw(uint16_t port) +{ + uint16_t tmp; + + __asm__ __volatile__("in %%dx, %%ax" + : /* output */ "=a" (tmp) + : /* input */ "d" (port)); + + return tmp; +} + +static inline uint16_t get_es(void) +{ + uint16_t es; + + __asm__ __volatile__("mov %%es, %[es]" + : /* output */ [es]"=rm"(es)); + return es; +} + +static inline uint16_t get_cs(void) +{ + uint16_t cs; + + __asm__ __volatile__("mov %%cs, %[cs]" + : /* output */ [cs]"=rm"(cs)); + return cs; +} + +static inline uint16_t get_ss(void) +{ + uint16_t ss; + + __asm__ __volatile__("mov %%ss, %[ss]" + : /* output */ [ss]"=rm"(ss)); + return ss; +} + +static inline uint16_t get_ds(void) +{ + uint16_t ds; + + __asm__ __volatile__("mov %%ds, %[ds]" + : /* output */ [ds]"=rm"(ds)); + return ds; +} + +static inline uint16_t get_fs(void) +{ + uint16_t fs; + + __asm__ __volatile__("mov %%fs, %[fs]" + : /* output */ [fs]"=rm"(fs)); + return fs; +} + +static inline uint16_t get_gs(void) +{ + uint16_t gs; + + __asm__ __volatile__("mov %%gs, %[gs]" + : /* output */ [gs]"=rm"(gs)); + return gs; +} + +static inline uint16_t get_tr(void) +{ + uint16_t tr; + + __asm__ __volatile__("str %[tr]" + : /* output */ [tr]"=rm"(tr)); + return tr; +} + +static inline uint64_t get_cr0(void) +{ + uint64_t cr0; + + __asm__ __volatile__("mov %%cr0, %[cr0]" + : /* output */ [cr0]"=r"(cr0)); + return cr0; +} + +static inline uint64_t get_cr3(void) +{ + uint64_t cr3; + + __asm__ __volatile__("mov %%cr3, %[cr3]" + : /* output */ [cr3]"=r"(cr3)); + return cr3; +} + +static inline uint64_t get_cr4(void) +{ + uint64_t cr4; + + __asm__ __volatile__("mov %%cr4, %[cr4]" + : /* output */ [cr4]"=r"(cr4)); + return cr4; +} + +static inline void set_cr4(uint64_t val) +{ + __asm__ __volatile__("mov %0, %%cr4" : : "r" (val) : "memory"); +} + +static inline u64 xgetbv(u32 index) +{ + u32 eax, edx; + + __asm__ __volatile__("xgetbv;" + : "=a" (eax), "=d" (edx) + : "c" (index)); + return eax | ((u64)edx << 32); +} + +static inline void xsetbv(u32 index, u64 value) +{ + u32 eax = value; + u32 edx = value >> 32; + + __asm__ __volatile__("xsetbv" :: "a" (eax), "d" (edx), "c" (index)); +} + +static inline void wrpkru(u32 pkru) +{ + /* Note, ECX and EDX are architecturally required to be '0'. */ + asm volatile(".byte 0x0f,0x01,0xef\n\t" + : : "a" (pkru), "c"(0), "d"(0)); +} + +static inline struct desc_ptr get_gdt(void) +{ + struct desc_ptr gdt; + __asm__ __volatile__("sgdt %[gdt]" + : /* output */ [gdt]"=m"(gdt)); + return gdt; +} + +static inline struct desc_ptr get_idt(void) +{ + struct desc_ptr idt; + __asm__ __volatile__("sidt %[idt]" + : /* output */ [idt]"=m"(idt)); + return idt; +} + +static inline void outl(uint16_t port, uint32_t value) +{ + __asm__ __volatile__("outl %%eax, %%dx" : : "d"(port), "a"(value)); +} + +static inline void __cpuid(uint32_t function, uint32_t index, + uint32_t *eax, uint32_t *ebx, + uint32_t *ecx, uint32_t *edx) +{ + *eax = function; + *ecx = index; + + asm volatile("cpuid" + : "=a" (*eax), + "=b" (*ebx), + "=c" (*ecx), + "=d" (*edx) + : "0" (*eax), "2" (*ecx) + : "memory"); +} + +static inline void cpuid(uint32_t function, + uint32_t *eax, uint32_t *ebx, + uint32_t *ecx, uint32_t *edx) +{ + return __cpuid(function, 0, eax, ebx, ecx, edx); +} + +static inline uint32_t this_cpu_fms(void) +{ + uint32_t eax, ebx, ecx, edx; + + cpuid(1, &eax, &ebx, &ecx, &edx); + return eax; +} + +static inline uint32_t this_cpu_family(void) +{ + return x86_family(this_cpu_fms()); +} + +static inline uint32_t this_cpu_model(void) +{ + return x86_model(this_cpu_fms()); +} + +static inline bool this_cpu_vendor_string_is(const char *vendor) +{ + const uint32_t *chunk = (const uint32_t *)vendor; + uint32_t eax, ebx, ecx, edx; + + cpuid(0, &eax, &ebx, &ecx, &edx); + return (ebx == chunk[0] && edx == chunk[1] && ecx == chunk[2]); +} + +static inline bool this_cpu_is_intel(void) +{ + return this_cpu_vendor_string_is("GenuineIntel"); +} + +/* + * Exclude early K5 samples with a vendor string of "AMDisbetter!" + */ +static inline bool this_cpu_is_amd(void) +{ + return this_cpu_vendor_string_is("AuthenticAMD"); +} + +static inline uint32_t __this_cpu_has(uint32_t function, uint32_t index, + uint8_t reg, uint8_t lo, uint8_t hi) +{ + uint32_t gprs[4]; + + __cpuid(function, index, + &gprs[KVM_CPUID_EAX], &gprs[KVM_CPUID_EBX], + &gprs[KVM_CPUID_ECX], &gprs[KVM_CPUID_EDX]); + + return (gprs[reg] & GENMASK(hi, lo)) >> lo; +} + +static inline bool this_cpu_has(struct kvm_x86_cpu_feature feature) +{ + return __this_cpu_has(feature.function, feature.index, + feature.reg, feature.bit, feature.bit); +} + +static inline uint32_t this_cpu_property(struct kvm_x86_cpu_property property) +{ + return __this_cpu_has(property.function, property.index, + property.reg, property.lo_bit, property.hi_bit); +} + +static __always_inline bool this_cpu_has_p(struct kvm_x86_cpu_property property) +{ + uint32_t max_leaf; + + switch (property.function & 0xc0000000) { + case 0: + max_leaf = this_cpu_property(X86_PROPERTY_MAX_BASIC_LEAF); + break; + case 0x40000000: + max_leaf = this_cpu_property(X86_PROPERTY_MAX_KVM_LEAF); + break; + case 0x80000000: + max_leaf = this_cpu_property(X86_PROPERTY_MAX_EXT_LEAF); + break; + case 0xc0000000: + max_leaf = this_cpu_property(X86_PROPERTY_MAX_CENTAUR_LEAF); + } + return max_leaf >= property.function; +} + +static inline bool this_pmu_has(struct kvm_x86_pmu_feature feature) +{ + uint32_t nr_bits; + + if (feature.f.reg == KVM_CPUID_EBX) { + nr_bits = this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH); + return nr_bits > feature.f.bit && !this_cpu_has(feature.f); + } + + GUEST_ASSERT(feature.f.reg == KVM_CPUID_ECX); + nr_bits = this_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS); + return nr_bits > feature.f.bit || this_cpu_has(feature.f); +} + +static __always_inline uint64_t this_cpu_supported_xcr0(void) +{ + if (!this_cpu_has_p(X86_PROPERTY_SUPPORTED_XCR0_LO)) + return 0; + + return this_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_LO) | + ((uint64_t)this_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_HI) << 32); +} + +typedef u32 __attribute__((vector_size(16))) sse128_t; +#define __sse128_u union { sse128_t vec; u64 as_u64[2]; u32 as_u32[4]; } +#define sse128_lo(x) ({ __sse128_u t; t.vec = x; t.as_u64[0]; }) +#define sse128_hi(x) ({ __sse128_u t; t.vec = x; t.as_u64[1]; }) + +static inline void read_sse_reg(int reg, sse128_t *data) +{ + switch (reg) { + case 0: + asm("movdqa %%xmm0, %0" : "=m"(*data)); + break; + case 1: + asm("movdqa %%xmm1, %0" : "=m"(*data)); + break; + case 2: + asm("movdqa %%xmm2, %0" : "=m"(*data)); + break; + case 3: + asm("movdqa %%xmm3, %0" : "=m"(*data)); + break; + case 4: + asm("movdqa %%xmm4, %0" : "=m"(*data)); + break; + case 5: + asm("movdqa %%xmm5, %0" : "=m"(*data)); + break; + case 6: + asm("movdqa %%xmm6, %0" : "=m"(*data)); + break; + case 7: + asm("movdqa %%xmm7, %0" : "=m"(*data)); + break; + default: + BUG(); + } +} + +static inline void write_sse_reg(int reg, const sse128_t *data) +{ + switch (reg) { + case 0: + asm("movdqa %0, %%xmm0" : : "m"(*data)); + break; + case 1: + asm("movdqa %0, %%xmm1" : : "m"(*data)); + break; + case 2: + asm("movdqa %0, %%xmm2" : : "m"(*data)); + break; + case 3: + asm("movdqa %0, %%xmm3" : : "m"(*data)); + break; + case 4: + asm("movdqa %0, %%xmm4" : : "m"(*data)); + break; + case 5: + asm("movdqa %0, %%xmm5" : : "m"(*data)); + break; + case 6: + asm("movdqa %0, %%xmm6" : : "m"(*data)); + break; + case 7: + asm("movdqa %0, %%xmm7" : : "m"(*data)); + break; + default: + BUG(); + } +} + +static inline void cpu_relax(void) +{ + asm volatile("rep; nop" ::: "memory"); +} + +static inline void udelay(unsigned long usec) +{ + uint64_t start, now, cycles; + + GUEST_ASSERT(guest_tsc_khz); + cycles = guest_tsc_khz / 1000 * usec; + + /* + * Deliberately don't PAUSE, a.k.a. cpu_relax(), so that the delay is + * as accurate as possible, e.g. doesn't trigger PAUSE-Loop VM-Exits. + */ + start = rdtsc(); + do { + now = rdtsc(); + } while (now - start < cycles); +} + +#define ud2() \ + __asm__ __volatile__( \ + "ud2\n" \ + ) + +#define hlt() \ + __asm__ __volatile__( \ + "hlt\n" \ + ) + +struct kvm_x86_state *vcpu_save_state(struct kvm_vcpu *vcpu); +void vcpu_load_state(struct kvm_vcpu *vcpu, struct kvm_x86_state *state); +void kvm_x86_state_cleanup(struct kvm_x86_state *state); + +const struct kvm_msr_list *kvm_get_msr_index_list(void); +const struct kvm_msr_list *kvm_get_feature_msr_index_list(void); +bool kvm_msr_is_in_save_restore_list(uint32_t msr_index); +uint64_t kvm_get_feature_msr(uint64_t msr_index); + +static inline void vcpu_msrs_get(struct kvm_vcpu *vcpu, + struct kvm_msrs *msrs) +{ + int r = __vcpu_ioctl(vcpu, KVM_GET_MSRS, msrs); + + TEST_ASSERT(r == msrs->nmsrs, + "KVM_GET_MSRS failed, r: %i (failed on MSR %x)", + r, r < 0 || r >= msrs->nmsrs ? -1 : msrs->entries[r].index); +} +static inline void vcpu_msrs_set(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs) +{ + int r = __vcpu_ioctl(vcpu, KVM_SET_MSRS, msrs); + + TEST_ASSERT(r == msrs->nmsrs, + "KVM_SET_MSRS failed, r: %i (failed on MSR %x)", + r, r < 0 || r >= msrs->nmsrs ? -1 : msrs->entries[r].index); +} +static inline void vcpu_debugregs_get(struct kvm_vcpu *vcpu, + struct kvm_debugregs *debugregs) +{ + vcpu_ioctl(vcpu, KVM_GET_DEBUGREGS, debugregs); +} +static inline void vcpu_debugregs_set(struct kvm_vcpu *vcpu, + struct kvm_debugregs *debugregs) +{ + vcpu_ioctl(vcpu, KVM_SET_DEBUGREGS, debugregs); +} +static inline void vcpu_xsave_get(struct kvm_vcpu *vcpu, + struct kvm_xsave *xsave) +{ + vcpu_ioctl(vcpu, KVM_GET_XSAVE, xsave); +} +static inline void vcpu_xsave2_get(struct kvm_vcpu *vcpu, + struct kvm_xsave *xsave) +{ + vcpu_ioctl(vcpu, KVM_GET_XSAVE2, xsave); +} +static inline void vcpu_xsave_set(struct kvm_vcpu *vcpu, + struct kvm_xsave *xsave) +{ + vcpu_ioctl(vcpu, KVM_SET_XSAVE, xsave); +} +static inline void vcpu_xcrs_get(struct kvm_vcpu *vcpu, + struct kvm_xcrs *xcrs) +{ + vcpu_ioctl(vcpu, KVM_GET_XCRS, xcrs); +} +static inline void vcpu_xcrs_set(struct kvm_vcpu *vcpu, struct kvm_xcrs *xcrs) +{ + vcpu_ioctl(vcpu, KVM_SET_XCRS, xcrs); +} + +const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid, + uint32_t function, uint32_t index); +const struct kvm_cpuid2 *kvm_get_supported_cpuid(void); + +static inline uint32_t kvm_cpu_fms(void) +{ + return get_cpuid_entry(kvm_get_supported_cpuid(), 0x1, 0)->eax; +} + +static inline uint32_t kvm_cpu_family(void) +{ + return x86_family(kvm_cpu_fms()); +} + +static inline uint32_t kvm_cpu_model(void) +{ + return x86_model(kvm_cpu_fms()); +} + +bool kvm_cpuid_has(const struct kvm_cpuid2 *cpuid, + struct kvm_x86_cpu_feature feature); + +static inline bool kvm_cpu_has(struct kvm_x86_cpu_feature feature) +{ + return kvm_cpuid_has(kvm_get_supported_cpuid(), feature); +} + +uint32_t kvm_cpuid_property(const struct kvm_cpuid2 *cpuid, + struct kvm_x86_cpu_property property); + +static inline uint32_t kvm_cpu_property(struct kvm_x86_cpu_property property) +{ + return kvm_cpuid_property(kvm_get_supported_cpuid(), property); +} + +static __always_inline bool kvm_cpu_has_p(struct kvm_x86_cpu_property property) +{ + uint32_t max_leaf; + + switch (property.function & 0xc0000000) { + case 0: + max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_BASIC_LEAF); + break; + case 0x40000000: + max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_KVM_LEAF); + break; + case 0x80000000: + max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_EXT_LEAF); + break; + case 0xc0000000: + max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_CENTAUR_LEAF); + } + return max_leaf >= property.function; +} + +static inline bool kvm_pmu_has(struct kvm_x86_pmu_feature feature) +{ + uint32_t nr_bits; + + if (feature.f.reg == KVM_CPUID_EBX) { + nr_bits = kvm_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH); + return nr_bits > feature.f.bit && !kvm_cpu_has(feature.f); + } + + TEST_ASSERT_EQ(feature.f.reg, KVM_CPUID_ECX); + nr_bits = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS); + return nr_bits > feature.f.bit || kvm_cpu_has(feature.f); +} + +static __always_inline uint64_t kvm_cpu_supported_xcr0(void) +{ + if (!kvm_cpu_has_p(X86_PROPERTY_SUPPORTED_XCR0_LO)) + return 0; + + return kvm_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_LO) | + ((uint64_t)kvm_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_HI) << 32); +} + +static inline size_t kvm_cpuid2_size(int nr_entries) +{ + return sizeof(struct kvm_cpuid2) + + sizeof(struct kvm_cpuid_entry2) * nr_entries; +} + +/* + * Allocate a "struct kvm_cpuid2* instance, with the 0-length arrary of + * entries sized to hold @nr_entries. The caller is responsible for freeing + * the struct. + */ +static inline struct kvm_cpuid2 *allocate_kvm_cpuid2(int nr_entries) +{ + struct kvm_cpuid2 *cpuid; + + cpuid = malloc(kvm_cpuid2_size(nr_entries)); + TEST_ASSERT(cpuid, "-ENOMEM when allocating kvm_cpuid2"); + + cpuid->nent = nr_entries; + + return cpuid; +} + +void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid); + +static inline struct kvm_cpuid_entry2 *__vcpu_get_cpuid_entry(struct kvm_vcpu *vcpu, + uint32_t function, + uint32_t index) +{ + return (struct kvm_cpuid_entry2 *)get_cpuid_entry(vcpu->cpuid, + function, index); +} + +static inline struct kvm_cpuid_entry2 *vcpu_get_cpuid_entry(struct kvm_vcpu *vcpu, + uint32_t function) +{ + return __vcpu_get_cpuid_entry(vcpu, function, 0); +} + +static inline int __vcpu_set_cpuid(struct kvm_vcpu *vcpu) +{ + int r; + + TEST_ASSERT(vcpu->cpuid, "Must do vcpu_init_cpuid() first"); + r = __vcpu_ioctl(vcpu, KVM_SET_CPUID2, vcpu->cpuid); + if (r) + return r; + + /* On success, refresh the cache to pick up adjustments made by KVM. */ + vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid); + return 0; +} + +static inline void vcpu_set_cpuid(struct kvm_vcpu *vcpu) +{ + TEST_ASSERT(vcpu->cpuid, "Must do vcpu_init_cpuid() first"); + vcpu_ioctl(vcpu, KVM_SET_CPUID2, vcpu->cpuid); + + /* Refresh the cache to pick up adjustments made by KVM. */ + vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid); +} + +static inline void vcpu_get_cpuid(struct kvm_vcpu *vcpu) +{ + vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid); +} + +void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu, + struct kvm_x86_cpu_property property, + uint32_t value); +void vcpu_set_cpuid_maxphyaddr(struct kvm_vcpu *vcpu, uint8_t maxphyaddr); + +void vcpu_clear_cpuid_entry(struct kvm_vcpu *vcpu, uint32_t function); + +static inline bool vcpu_cpuid_has(struct kvm_vcpu *vcpu, + struct kvm_x86_cpu_feature feature) +{ + struct kvm_cpuid_entry2 *entry; + + entry = __vcpu_get_cpuid_entry(vcpu, feature.function, feature.index); + return *((&entry->eax) + feature.reg) & BIT(feature.bit); +} + +void vcpu_set_or_clear_cpuid_feature(struct kvm_vcpu *vcpu, + struct kvm_x86_cpu_feature feature, + bool set); + +static inline void vcpu_set_cpuid_feature(struct kvm_vcpu *vcpu, + struct kvm_x86_cpu_feature feature) +{ + vcpu_set_or_clear_cpuid_feature(vcpu, feature, true); + +} + +static inline void vcpu_clear_cpuid_feature(struct kvm_vcpu *vcpu, + struct kvm_x86_cpu_feature feature) +{ + vcpu_set_or_clear_cpuid_feature(vcpu, feature, false); +} + +uint64_t vcpu_get_msr(struct kvm_vcpu *vcpu, uint64_t msr_index); +int _vcpu_set_msr(struct kvm_vcpu *vcpu, uint64_t msr_index, uint64_t msr_value); + +/* + * Assert on an MSR access(es) and pretty print the MSR name when possible. + * Note, the caller provides the stringified name so that the name of macro is + * printed, not the value the macro resolves to (due to macro expansion). + */ +#define TEST_ASSERT_MSR(cond, fmt, msr, str, args...) \ +do { \ + if (__builtin_constant_p(msr)) { \ + TEST_ASSERT(cond, fmt, str, args); \ + } else if (!(cond)) { \ + char buf[16]; \ + \ + snprintf(buf, sizeof(buf), "MSR 0x%x", msr); \ + TEST_ASSERT(cond, fmt, buf, args); \ + } \ +} while (0) + +/* + * Returns true if KVM should return the last written value when reading an MSR + * from userspace, e.g. the MSR isn't a command MSR, doesn't emulate state that + * is changing, etc. This is NOT an exhaustive list! The intent is to filter + * out MSRs that are not durable _and_ that a selftest wants to write. + */ +static inline bool is_durable_msr(uint32_t msr) +{ + return msr != MSR_IA32_TSC; +} + +#define vcpu_set_msr(vcpu, msr, val) \ +do { \ + uint64_t r, v = val; \ + \ + TEST_ASSERT_MSR(_vcpu_set_msr(vcpu, msr, v) == 1, \ + "KVM_SET_MSRS failed on %s, value = 0x%lx", msr, #msr, v); \ + if (!is_durable_msr(msr)) \ + break; \ + r = vcpu_get_msr(vcpu, msr); \ + TEST_ASSERT_MSR(r == v, "Set %s to '0x%lx', got back '0x%lx'", msr, #msr, v, r);\ +} while (0) + +void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits); +void kvm_init_vm_address_properties(struct kvm_vm *vm); +bool vm_is_unrestricted_guest(struct kvm_vm *vm); + +struct ex_regs { + uint64_t rax, rcx, rdx, rbx; + uint64_t rbp, rsi, rdi; + uint64_t r8, r9, r10, r11; + uint64_t r12, r13, r14, r15; + uint64_t vector; + uint64_t error_code; + uint64_t rip; + uint64_t cs; + uint64_t rflags; +}; + +struct idt_entry { + uint16_t offset0; + uint16_t selector; + uint16_t ist : 3; + uint16_t : 5; + uint16_t type : 4; + uint16_t : 1; + uint16_t dpl : 2; + uint16_t p : 1; + uint16_t offset1; + uint32_t offset2; uint32_t reserved; +}; + +void vm_install_exception_handler(struct kvm_vm *vm, int vector, + void (*handler)(struct ex_regs *)); + +/* If a toddler were to say "abracadabra". */ +#define KVM_EXCEPTION_MAGIC 0xabacadabaULL + +/* + * KVM selftest exception fixup uses registers to coordinate with the exception + * handler, versus the kernel's in-memory tables and KVM-Unit-Tests's in-memory + * per-CPU data. Using only registers avoids having to map memory into the + * guest, doesn't require a valid, stable GS.base, and reduces the risk of + * for recursive faults when accessing memory in the handler. The downside to + * using registers is that it restricts what registers can be used by the actual + * instruction. But, selftests are 64-bit only, making register* pressure a + * minor concern. Use r9-r11 as they are volatile, i.e. don't need to be saved + * by the callee, and except for r11 are not implicit parameters to any + * instructions. Ideally, fixup would use r8-r10 and thus avoid implicit + * parameters entirely, but Hyper-V's hypercall ABI uses r8 and testing Hyper-V + * is higher priority than testing non-faulting SYSCALL/SYSRET. + * + * Note, the fixup handler deliberately does not handle #DE, i.e. the vector + * is guaranteed to be non-zero on fault. + * + * REGISTER INPUTS: + * r9 = MAGIC + * r10 = RIP + * r11 = new RIP on fault + * + * REGISTER OUTPUTS: + * r9 = exception vector (non-zero) + * r10 = error code + */ +#define __KVM_ASM_SAFE(insn, fep) \ + "mov $" __stringify(KVM_EXCEPTION_MAGIC) ", %%r9\n\t" \ + "lea 1f(%%rip), %%r10\n\t" \ + "lea 2f(%%rip), %%r11\n\t" \ + fep "1: " insn "\n\t" \ + "xor %%r9, %%r9\n\t" \ + "2:\n\t" \ + "mov %%r9b, %[vector]\n\t" \ + "mov %%r10, %[error_code]\n\t" + +#define KVM_ASM_SAFE(insn) __KVM_ASM_SAFE(insn, "") +#define KVM_ASM_SAFE_FEP(insn) __KVM_ASM_SAFE(insn, KVM_FEP) + +#define KVM_ASM_SAFE_OUTPUTS(v, ec) [vector] "=qm"(v), [error_code] "=rm"(ec) +#define KVM_ASM_SAFE_CLOBBERS "r9", "r10", "r11" + +#define kvm_asm_safe(insn, inputs...) \ +({ \ + uint64_t ign_error_code; \ + uint8_t vector; \ + \ + asm volatile(KVM_ASM_SAFE(insn) \ + : KVM_ASM_SAFE_OUTPUTS(vector, ign_error_code) \ + : inputs \ + : KVM_ASM_SAFE_CLOBBERS); \ + vector; \ +}) + +#define kvm_asm_safe_ec(insn, error_code, inputs...) \ +({ \ + uint8_t vector; \ + \ + asm volatile(KVM_ASM_SAFE(insn) \ + : KVM_ASM_SAFE_OUTPUTS(vector, error_code) \ + : inputs \ + : KVM_ASM_SAFE_CLOBBERS); \ + vector; \ +}) + +#define kvm_asm_safe_fep(insn, inputs...) \ +({ \ + uint64_t ign_error_code; \ + uint8_t vector; \ + \ + asm volatile(KVM_ASM_SAFE(insn) \ + : KVM_ASM_SAFE_OUTPUTS(vector, ign_error_code) \ + : inputs \ + : KVM_ASM_SAFE_CLOBBERS); \ + vector; \ +}) + +#define kvm_asm_safe_ec_fep(insn, error_code, inputs...) \ +({ \ + uint8_t vector; \ + \ + asm volatile(KVM_ASM_SAFE_FEP(insn) \ + : KVM_ASM_SAFE_OUTPUTS(vector, error_code) \ + : inputs \ + : KVM_ASM_SAFE_CLOBBERS); \ + vector; \ +}) + +#define BUILD_READ_U64_SAFE_HELPER(insn, _fep, _FEP) \ +static inline uint8_t insn##_safe ##_fep(uint32_t idx, uint64_t *val) \ +{ \ + uint64_t error_code; \ + uint8_t vector; \ + uint32_t a, d; \ + \ + asm volatile(KVM_ASM_SAFE##_FEP(#insn) \ + : "=a"(a), "=d"(d), \ + KVM_ASM_SAFE_OUTPUTS(vector, error_code) \ + : "c"(idx) \ + : KVM_ASM_SAFE_CLOBBERS); \ + \ + *val = (uint64_t)a | ((uint64_t)d << 32); \ + return vector; \ +} + +/* + * Generate {insn}_safe() and {insn}_safe_fep() helpers for instructions that + * use ECX as in input index, and EDX:EAX as a 64-bit output. + */ +#define BUILD_READ_U64_SAFE_HELPERS(insn) \ + BUILD_READ_U64_SAFE_HELPER(insn, , ) \ + BUILD_READ_U64_SAFE_HELPER(insn, _fep, _FEP) \ + +BUILD_READ_U64_SAFE_HELPERS(rdmsr) +BUILD_READ_U64_SAFE_HELPERS(rdpmc) +BUILD_READ_U64_SAFE_HELPERS(xgetbv) + +static inline uint8_t wrmsr_safe(uint32_t msr, uint64_t val) +{ + return kvm_asm_safe("wrmsr", "a"(val & -1u), "d"(val >> 32), "c"(msr)); +} + +static inline uint8_t xsetbv_safe(uint32_t index, uint64_t value) +{ + u32 eax = value; + u32 edx = value >> 32; + + return kvm_asm_safe("xsetbv", "a" (eax), "d" (edx), "c" (index)); +} + +bool kvm_is_tdp_enabled(void); + +static inline bool kvm_is_pmu_enabled(void) +{ + return get_kvm_param_bool("enable_pmu"); +} + +static inline bool kvm_is_forced_emulation_enabled(void) +{ + return !!get_kvm_param_integer("force_emulation_prefix"); +} + +uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr, + int *level); +uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr); + +uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2, + uint64_t a3); +uint64_t __xen_hypercall(uint64_t nr, uint64_t a0, void *a1); +void xen_hypercall(uint64_t nr, uint64_t a0, void *a1); + +static inline uint64_t __kvm_hypercall_map_gpa_range(uint64_t gpa, + uint64_t size, uint64_t flags) +{ + return kvm_hypercall(KVM_HC_MAP_GPA_RANGE, gpa, size >> PAGE_SHIFT, flags, 0); +} + +static inline void kvm_hypercall_map_gpa_range(uint64_t gpa, uint64_t size, + uint64_t flags) +{ + uint64_t ret = __kvm_hypercall_map_gpa_range(gpa, size, flags); + + GUEST_ASSERT(!ret); +} + +void __vm_xsave_require_permission(uint64_t xfeature, const char *name); + +#define vm_xsave_require_permission(xfeature) \ + __vm_xsave_require_permission(xfeature, #xfeature) + +enum pg_level { + PG_LEVEL_NONE, + PG_LEVEL_4K, + PG_LEVEL_2M, + PG_LEVEL_1G, + PG_LEVEL_512G, + PG_LEVEL_NUM +}; + +#define PG_LEVEL_SHIFT(_level) ((_level - 1) * 9 + 12) +#define PG_LEVEL_SIZE(_level) (1ull << PG_LEVEL_SHIFT(_level)) + +#define PG_SIZE_4K PG_LEVEL_SIZE(PG_LEVEL_4K) +#define PG_SIZE_2M PG_LEVEL_SIZE(PG_LEVEL_2M) +#define PG_SIZE_1G PG_LEVEL_SIZE(PG_LEVEL_1G) + +void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level); +void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, + uint64_t nr_bytes, int level); + +/* + * Basic CPU control in CR0 + */ +#define X86_CR0_PE (1UL<<0) /* Protection Enable */ +#define X86_CR0_MP (1UL<<1) /* Monitor Coprocessor */ +#define X86_CR0_EM (1UL<<2) /* Emulation */ +#define X86_CR0_TS (1UL<<3) /* Task Switched */ +#define X86_CR0_ET (1UL<<4) /* Extension Type */ +#define X86_CR0_NE (1UL<<5) /* Numeric Error */ +#define X86_CR0_WP (1UL<<16) /* Write Protect */ +#define X86_CR0_AM (1UL<<18) /* Alignment Mask */ +#define X86_CR0_NW (1UL<<29) /* Not Write-through */ +#define X86_CR0_CD (1UL<<30) /* Cache Disable */ +#define X86_CR0_PG (1UL<<31) /* Paging */ + +#define PFERR_PRESENT_BIT 0 +#define PFERR_WRITE_BIT 1 +#define PFERR_USER_BIT 2 +#define PFERR_RSVD_BIT 3 +#define PFERR_FETCH_BIT 4 +#define PFERR_PK_BIT 5 +#define PFERR_SGX_BIT 15 +#define PFERR_GUEST_FINAL_BIT 32 +#define PFERR_GUEST_PAGE_BIT 33 +#define PFERR_IMPLICIT_ACCESS_BIT 48 + +#define PFERR_PRESENT_MASK BIT(PFERR_PRESENT_BIT) +#define PFERR_WRITE_MASK BIT(PFERR_WRITE_BIT) +#define PFERR_USER_MASK BIT(PFERR_USER_BIT) +#define PFERR_RSVD_MASK BIT(PFERR_RSVD_BIT) +#define PFERR_FETCH_MASK BIT(PFERR_FETCH_BIT) +#define PFERR_PK_MASK BIT(PFERR_PK_BIT) +#define PFERR_SGX_MASK BIT(PFERR_SGX_BIT) +#define PFERR_GUEST_FINAL_MASK BIT_ULL(PFERR_GUEST_FINAL_BIT) +#define PFERR_GUEST_PAGE_MASK BIT_ULL(PFERR_GUEST_PAGE_BIT) +#define PFERR_IMPLICIT_ACCESS BIT_ULL(PFERR_IMPLICIT_ACCESS_BIT) + +bool sys_clocksource_is_based_on_tsc(void); + +#endif /* SELFTEST_KVM_PROCESSOR_H */ diff --git a/tools/testing/selftests/kvm/include/x86/sev.h b/tools/testing/selftests/kvm/include/x86/sev.h new file mode 100644 index 000000000000..82c11c81a956 --- /dev/null +++ b/tools/testing/selftests/kvm/include/x86/sev.h @@ -0,0 +1,96 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Helpers used for SEV guests + * + */ +#ifndef SELFTEST_KVM_SEV_H +#define SELFTEST_KVM_SEV_H + +#include +#include + +#include "linux/psp-sev.h" + +#include "kvm_util.h" +#include "svm_util.h" +#include "processor.h" + +enum sev_guest_state { + SEV_GUEST_STATE_UNINITIALIZED = 0, + SEV_GUEST_STATE_LAUNCH_UPDATE, + SEV_GUEST_STATE_LAUNCH_SECRET, + SEV_GUEST_STATE_RUNNING, +}; + +#define SEV_POLICY_NO_DBG (1UL << 0) +#define SEV_POLICY_ES (1UL << 2) + +#define GHCB_MSR_TERM_REQ 0x100 + +void sev_vm_launch(struct kvm_vm *vm, uint32_t policy); +void sev_vm_launch_measure(struct kvm_vm *vm, uint8_t *measurement); +void sev_vm_launch_finish(struct kvm_vm *vm); + +struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t type, void *guest_code, + struct kvm_vcpu **cpu); +void vm_sev_launch(struct kvm_vm *vm, uint32_t policy, uint8_t *measurement); + +kvm_static_assert(SEV_RET_SUCCESS == 0); + +/* + * The KVM_MEMORY_ENCRYPT_OP uAPI is utter garbage and takes an "unsigned long" + * instead of a proper struct. The size of the parameter is embedded in the + * ioctl number, i.e. is ABI and thus immutable. Hack around the mess by + * creating an overlay to pass in an "unsigned long" without a cast (casting + * will make the compiler unhappy due to dereferencing an aliased pointer). + */ +#define __vm_sev_ioctl(vm, cmd, arg) \ +({ \ + int r; \ + \ + union { \ + struct kvm_sev_cmd c; \ + unsigned long raw; \ + } sev_cmd = { .c = { \ + .id = (cmd), \ + .data = (uint64_t)(arg), \ + .sev_fd = (vm)->arch.sev_fd, \ + } }; \ + \ + r = __vm_ioctl(vm, KVM_MEMORY_ENCRYPT_OP, &sev_cmd.raw); \ + r ?: sev_cmd.c.error; \ +}) + +#define vm_sev_ioctl(vm, cmd, arg) \ +({ \ + int ret = __vm_sev_ioctl(vm, cmd, arg); \ + \ + __TEST_ASSERT_VM_VCPU_IOCTL(!ret, #cmd, ret, vm); \ +}) + +void sev_vm_init(struct kvm_vm *vm); +void sev_es_vm_init(struct kvm_vm *vm); + +static inline void sev_register_encrypted_memory(struct kvm_vm *vm, + struct userspace_mem_region *region) +{ + struct kvm_enc_region range = { + .addr = region->region.userspace_addr, + .size = region->region.memory_size, + }; + + vm_ioctl(vm, KVM_MEMORY_ENCRYPT_REG_REGION, &range); +} + +static inline void sev_launch_update_data(struct kvm_vm *vm, vm_paddr_t gpa, + uint64_t size) +{ + struct kvm_sev_launch_update_data update_data = { + .uaddr = (unsigned long)addr_gpa2hva(vm, gpa), + .len = size, + }; + + vm_sev_ioctl(vm, KVM_SEV_LAUNCH_UPDATE_DATA, &update_data); +} + +#endif /* SELFTEST_KVM_SEV_H */ diff --git a/tools/testing/selftests/kvm/include/x86/svm.h b/tools/testing/selftests/kvm/include/x86/svm.h new file mode 100644 index 000000000000..29cffd0a9181 --- /dev/null +++ b/tools/testing/selftests/kvm/include/x86/svm.h @@ -0,0 +1,320 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef SELFTEST_KVM_SVM_H +#define SELFTEST_KVM_SVM_H + +enum { + INTERCEPT_INTR, + INTERCEPT_NMI, + INTERCEPT_SMI, + INTERCEPT_INIT, + INTERCEPT_VINTR, + INTERCEPT_SELECTIVE_CR0, + INTERCEPT_STORE_IDTR, + INTERCEPT_STORE_GDTR, + INTERCEPT_STORE_LDTR, + INTERCEPT_STORE_TR, + INTERCEPT_LOAD_IDTR, + INTERCEPT_LOAD_GDTR, + INTERCEPT_LOAD_LDTR, + INTERCEPT_LOAD_TR, + INTERCEPT_RDTSC, + INTERCEPT_RDPMC, + INTERCEPT_PUSHF, + INTERCEPT_POPF, + INTERCEPT_CPUID, + INTERCEPT_RSM, + INTERCEPT_IRET, + INTERCEPT_INTn, + INTERCEPT_INVD, + INTERCEPT_PAUSE, + INTERCEPT_HLT, + INTERCEPT_INVLPG, + INTERCEPT_INVLPGA, + INTERCEPT_IOIO_PROT, + INTERCEPT_MSR_PROT, + INTERCEPT_TASK_SWITCH, + INTERCEPT_FERR_FREEZE, + INTERCEPT_SHUTDOWN, + INTERCEPT_VMRUN, + INTERCEPT_VMMCALL, + INTERCEPT_VMLOAD, + INTERCEPT_VMSAVE, + INTERCEPT_STGI, + INTERCEPT_CLGI, + INTERCEPT_SKINIT, + INTERCEPT_RDTSCP, + INTERCEPT_ICEBP, + INTERCEPT_WBINVD, + INTERCEPT_MONITOR, + INTERCEPT_MWAIT, + INTERCEPT_MWAIT_COND, + INTERCEPT_XSETBV, + INTERCEPT_RDPRU, +}; + +struct hv_vmcb_enlightenments { + struct __packed hv_enlightenments_control { + u32 nested_flush_hypercall:1; + u32 msr_bitmap:1; + u32 enlightened_npt_tlb: 1; + u32 reserved:29; + } __packed hv_enlightenments_control; + u32 hv_vp_id; + u64 hv_vm_id; + u64 partition_assist_page; + u64 reserved; +} __packed; + +/* + * Hyper-V uses the software reserved clean bit in VMCB + */ +#define HV_VMCB_NESTED_ENLIGHTENMENTS (1U << 31) + +/* Synthetic VM-Exit */ +#define HV_SVM_EXITCODE_ENL 0xf0000000 +#define HV_SVM_ENL_EXITCODE_TRAP_AFTER_FLUSH (1) + +struct __attribute__ ((__packed__)) vmcb_control_area { + u32 intercept_cr; + u32 intercept_dr; + u32 intercept_exceptions; + u64 intercept; + u8 reserved_1[40]; + u16 pause_filter_thresh; + u16 pause_filter_count; + u64 iopm_base_pa; + u64 msrpm_base_pa; + u64 tsc_offset; + u32 asid; + u8 tlb_ctl; + u8 reserved_2[3]; + u32 int_ctl; + u32 int_vector; + u32 int_state; + u8 reserved_3[4]; + u32 exit_code; + u32 exit_code_hi; + u64 exit_info_1; + u64 exit_info_2; + u32 exit_int_info; + u32 exit_int_info_err; + u64 nested_ctl; + u64 avic_vapic_bar; + u8 reserved_4[8]; + u32 event_inj; + u32 event_inj_err; + u64 nested_cr3; + u64 virt_ext; + u32 clean; + u32 reserved_5; + u64 next_rip; + u8 insn_len; + u8 insn_bytes[15]; + u64 avic_backing_page; /* Offset 0xe0 */ + u8 reserved_6[8]; /* Offset 0xe8 */ + u64 avic_logical_id; /* Offset 0xf0 */ + u64 avic_physical_id; /* Offset 0xf8 */ + u8 reserved_7[8]; + u64 vmsa_pa; /* Used for an SEV-ES guest */ + u8 reserved_8[720]; + /* + * Offset 0x3e0, 32 bytes reserved + * for use by hypervisor/software. + */ + union { + struct hv_vmcb_enlightenments hv_enlightenments; + u8 reserved_sw[32]; + }; +}; + + +#define TLB_CONTROL_DO_NOTHING 0 +#define TLB_CONTROL_FLUSH_ALL_ASID 1 +#define TLB_CONTROL_FLUSH_ASID 3 +#define TLB_CONTROL_FLUSH_ASID_LOCAL 7 + +#define V_TPR_MASK 0x0f + +#define V_IRQ_SHIFT 8 +#define V_IRQ_MASK (1 << V_IRQ_SHIFT) + +#define V_GIF_SHIFT 9 +#define V_GIF_MASK (1 << V_GIF_SHIFT) + +#define V_INTR_PRIO_SHIFT 16 +#define V_INTR_PRIO_MASK (0x0f << V_INTR_PRIO_SHIFT) + +#define V_IGN_TPR_SHIFT 20 +#define V_IGN_TPR_MASK (1 << V_IGN_TPR_SHIFT) + +#define V_INTR_MASKING_SHIFT 24 +#define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT) + +#define V_GIF_ENABLE_SHIFT 25 +#define V_GIF_ENABLE_MASK (1 << V_GIF_ENABLE_SHIFT) + +#define AVIC_ENABLE_SHIFT 31 +#define AVIC_ENABLE_MASK (1 << AVIC_ENABLE_SHIFT) + +#define LBR_CTL_ENABLE_MASK BIT_ULL(0) +#define VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK BIT_ULL(1) + +#define SVM_INTERRUPT_SHADOW_MASK 1 + +#define SVM_IOIO_STR_SHIFT 2 +#define SVM_IOIO_REP_SHIFT 3 +#define SVM_IOIO_SIZE_SHIFT 4 +#define SVM_IOIO_ASIZE_SHIFT 7 + +#define SVM_IOIO_TYPE_MASK 1 +#define SVM_IOIO_STR_MASK (1 << SVM_IOIO_STR_SHIFT) +#define SVM_IOIO_REP_MASK (1 << SVM_IOIO_REP_SHIFT) +#define SVM_IOIO_SIZE_MASK (7 << SVM_IOIO_SIZE_SHIFT) +#define SVM_IOIO_ASIZE_MASK (7 << SVM_IOIO_ASIZE_SHIFT) + +#define SVM_VM_CR_VALID_MASK 0x001fULL +#define SVM_VM_CR_SVM_LOCK_MASK 0x0008ULL +#define SVM_VM_CR_SVM_DIS_MASK 0x0010ULL + +#define SVM_NESTED_CTL_NP_ENABLE BIT(0) +#define SVM_NESTED_CTL_SEV_ENABLE BIT(1) + +struct __attribute__ ((__packed__)) vmcb_seg { + u16 selector; + u16 attrib; + u32 limit; + u64 base; +}; + +struct __attribute__ ((__packed__)) vmcb_save_area { + struct vmcb_seg es; + struct vmcb_seg cs; + struct vmcb_seg ss; + struct vmcb_seg ds; + struct vmcb_seg fs; + struct vmcb_seg gs; + struct vmcb_seg gdtr; + struct vmcb_seg ldtr; + struct vmcb_seg idtr; + struct vmcb_seg tr; + u8 reserved_1[43]; + u8 cpl; + u8 reserved_2[4]; + u64 efer; + u8 reserved_3[112]; + u64 cr4; + u64 cr3; + u64 cr0; + u64 dr7; + u64 dr6; + u64 rflags; + u64 rip; + u8 reserved_4[88]; + u64 rsp; + u8 reserved_5[24]; + u64 rax; + u64 star; + u64 lstar; + u64 cstar; + u64 sfmask; + u64 kernel_gs_base; + u64 sysenter_cs; + u64 sysenter_esp; + u64 sysenter_eip; + u64 cr2; + u8 reserved_6[32]; + u64 g_pat; + u64 dbgctl; + u64 br_from; + u64 br_to; + u64 last_excp_from; + u64 last_excp_to; +}; + +struct __attribute__ ((__packed__)) vmcb { + struct vmcb_control_area control; + struct vmcb_save_area save; +}; + +#define SVM_VM_CR_SVM_DISABLE 4 + +#define SVM_SELECTOR_S_SHIFT 4 +#define SVM_SELECTOR_DPL_SHIFT 5 +#define SVM_SELECTOR_P_SHIFT 7 +#define SVM_SELECTOR_AVL_SHIFT 8 +#define SVM_SELECTOR_L_SHIFT 9 +#define SVM_SELECTOR_DB_SHIFT 10 +#define SVM_SELECTOR_G_SHIFT 11 + +#define SVM_SELECTOR_TYPE_MASK (0xf) +#define SVM_SELECTOR_S_MASK (1 << SVM_SELECTOR_S_SHIFT) +#define SVM_SELECTOR_DPL_MASK (3 << SVM_SELECTOR_DPL_SHIFT) +#define SVM_SELECTOR_P_MASK (1 << SVM_SELECTOR_P_SHIFT) +#define SVM_SELECTOR_AVL_MASK (1 << SVM_SELECTOR_AVL_SHIFT) +#define SVM_SELECTOR_L_MASK (1 << SVM_SELECTOR_L_SHIFT) +#define SVM_SELECTOR_DB_MASK (1 << SVM_SELECTOR_DB_SHIFT) +#define SVM_SELECTOR_G_MASK (1 << SVM_SELECTOR_G_SHIFT) + +#define SVM_SELECTOR_WRITE_MASK (1 << 1) +#define SVM_SELECTOR_READ_MASK SVM_SELECTOR_WRITE_MASK +#define SVM_SELECTOR_CODE_MASK (1 << 3) + +#define INTERCEPT_CR0_READ 0 +#define INTERCEPT_CR3_READ 3 +#define INTERCEPT_CR4_READ 4 +#define INTERCEPT_CR8_READ 8 +#define INTERCEPT_CR0_WRITE (16 + 0) +#define INTERCEPT_CR3_WRITE (16 + 3) +#define INTERCEPT_CR4_WRITE (16 + 4) +#define INTERCEPT_CR8_WRITE (16 + 8) + +#define INTERCEPT_DR0_READ 0 +#define INTERCEPT_DR1_READ 1 +#define INTERCEPT_DR2_READ 2 +#define INTERCEPT_DR3_READ 3 +#define INTERCEPT_DR4_READ 4 +#define INTERCEPT_DR5_READ 5 +#define INTERCEPT_DR6_READ 6 +#define INTERCEPT_DR7_READ 7 +#define INTERCEPT_DR0_WRITE (16 + 0) +#define INTERCEPT_DR1_WRITE (16 + 1) +#define INTERCEPT_DR2_WRITE (16 + 2) +#define INTERCEPT_DR3_WRITE (16 + 3) +#define INTERCEPT_DR4_WRITE (16 + 4) +#define INTERCEPT_DR5_WRITE (16 + 5) +#define INTERCEPT_DR6_WRITE (16 + 6) +#define INTERCEPT_DR7_WRITE (16 + 7) + +#define SVM_EVTINJ_VEC_MASK 0xff + +#define SVM_EVTINJ_TYPE_SHIFT 8 +#define SVM_EVTINJ_TYPE_MASK (7 << SVM_EVTINJ_TYPE_SHIFT) + +#define SVM_EVTINJ_TYPE_INTR (0 << SVM_EVTINJ_TYPE_SHIFT) +#define SVM_EVTINJ_TYPE_NMI (2 << SVM_EVTINJ_TYPE_SHIFT) +#define SVM_EVTINJ_TYPE_EXEPT (3 << SVM_EVTINJ_TYPE_SHIFT) +#define SVM_EVTINJ_TYPE_SOFT (4 << SVM_EVTINJ_TYPE_SHIFT) + +#define SVM_EVTINJ_VALID (1 << 31) +#define SVM_EVTINJ_VALID_ERR (1 << 11) + +#define SVM_EXITINTINFO_VEC_MASK SVM_EVTINJ_VEC_MASK +#define SVM_EXITINTINFO_TYPE_MASK SVM_EVTINJ_TYPE_MASK + +#define SVM_EXITINTINFO_TYPE_INTR SVM_EVTINJ_TYPE_INTR +#define SVM_EXITINTINFO_TYPE_NMI SVM_EVTINJ_TYPE_NMI +#define SVM_EXITINTINFO_TYPE_EXEPT SVM_EVTINJ_TYPE_EXEPT +#define SVM_EXITINTINFO_TYPE_SOFT SVM_EVTINJ_TYPE_SOFT + +#define SVM_EXITINTINFO_VALID SVM_EVTINJ_VALID +#define SVM_EXITINTINFO_VALID_ERR SVM_EVTINJ_VALID_ERR + +#define SVM_EXITINFOSHIFT_TS_REASON_IRET 36 +#define SVM_EXITINFOSHIFT_TS_REASON_JMP 38 +#define SVM_EXITINFOSHIFT_TS_HAS_ERROR_CODE 44 + +#define SVM_EXITINFO_REG_MASK 0x0F + +#define SVM_CR0_SELECTIVE_MASK (X86_CR0_TS | X86_CR0_MP) + +#endif /* SELFTEST_KVM_SVM_H */ diff --git a/tools/testing/selftests/kvm/include/x86/svm_util.h b/tools/testing/selftests/kvm/include/x86/svm_util.h new file mode 100644 index 000000000000..b74c6dcddcbd --- /dev/null +++ b/tools/testing/selftests/kvm/include/x86/svm_util.h @@ -0,0 +1,62 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020, Red Hat, Inc. + */ + +#ifndef SELFTEST_KVM_SVM_UTILS_H +#define SELFTEST_KVM_SVM_UTILS_H + +#include + +#include +#include "svm.h" +#include "processor.h" + +struct svm_test_data { + /* VMCB */ + struct vmcb *vmcb; /* gva */ + void *vmcb_hva; + uint64_t vmcb_gpa; + + /* host state-save area */ + struct vmcb_save_area *save_area; /* gva */ + void *save_area_hva; + uint64_t save_area_gpa; + + /* MSR-Bitmap */ + void *msr; /* gva */ + void *msr_hva; + uint64_t msr_gpa; +}; + +static inline void vmmcall(void) +{ + /* + * Stuff RAX and RCX with "safe" values to make sure L0 doesn't handle + * it as a valid hypercall (e.g. Hyper-V L2 TLB flush) as the intended + * use of this function is to exit to L1 from L2. Clobber all other + * GPRs as L1 doesn't correctly preserve them during vmexits. + */ + __asm__ __volatile__("push %%rbp; vmmcall; pop %%rbp" + : : "a"(0xdeadbeef), "c"(0xbeefdead) + : "rbx", "rdx", "rsi", "rdi", "r8", "r9", + "r10", "r11", "r12", "r13", "r14", "r15"); +} + +#define stgi() \ + __asm__ __volatile__( \ + "stgi\n" \ + ) + +#define clgi() \ + __asm__ __volatile__( \ + "clgi\n" \ + ) + +struct svm_test_data *vcpu_alloc_svm(struct kvm_vm *vm, vm_vaddr_t *p_svm_gva); +void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_rsp); +void run_guest(struct vmcb *vmcb, uint64_t vmcb_gpa); + +int open_sev_dev_path_or_exit(void); + +#endif /* SELFTEST_KVM_SVM_UTILS_H */ diff --git a/tools/testing/selftests/kvm/include/x86/ucall.h b/tools/testing/selftests/kvm/include/x86/ucall.h new file mode 100644 index 000000000000..d3825dcc3cd9 --- /dev/null +++ b/tools/testing/selftests/kvm/include/x86/ucall.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef SELFTEST_KVM_UCALL_H +#define SELFTEST_KVM_UCALL_H + +#include "kvm_util.h" + +#define UCALL_EXIT_REASON KVM_EXIT_IO + +static inline void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) +{ +} + +#endif diff --git a/tools/testing/selftests/kvm/include/x86/vmx.h b/tools/testing/selftests/kvm/include/x86/vmx.h new file mode 100644 index 000000000000..edb3c391b982 --- /dev/null +++ b/tools/testing/selftests/kvm/include/x86/vmx.h @@ -0,0 +1,575 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2018, Google LLC. + */ + +#ifndef SELFTEST_KVM_VMX_H +#define SELFTEST_KVM_VMX_H + +#include + +#include +#include "processor.h" +#include "apic.h" + +/* + * Definitions of Primary Processor-Based VM-Execution Controls. + */ +#define CPU_BASED_INTR_WINDOW_EXITING 0x00000004 +#define CPU_BASED_USE_TSC_OFFSETTING 0x00000008 +#define CPU_BASED_HLT_EXITING 0x00000080 +#define CPU_BASED_INVLPG_EXITING 0x00000200 +#define CPU_BASED_MWAIT_EXITING 0x00000400 +#define CPU_BASED_RDPMC_EXITING 0x00000800 +#define CPU_BASED_RDTSC_EXITING 0x00001000 +#define CPU_BASED_CR3_LOAD_EXITING 0x00008000 +#define CPU_BASED_CR3_STORE_EXITING 0x00010000 +#define CPU_BASED_CR8_LOAD_EXITING 0x00080000 +#define CPU_BASED_CR8_STORE_EXITING 0x00100000 +#define CPU_BASED_TPR_SHADOW 0x00200000 +#define CPU_BASED_NMI_WINDOW_EXITING 0x00400000 +#define CPU_BASED_MOV_DR_EXITING 0x00800000 +#define CPU_BASED_UNCOND_IO_EXITING 0x01000000 +#define CPU_BASED_USE_IO_BITMAPS 0x02000000 +#define CPU_BASED_MONITOR_TRAP 0x08000000 +#define CPU_BASED_USE_MSR_BITMAPS 0x10000000 +#define CPU_BASED_MONITOR_EXITING 0x20000000 +#define CPU_BASED_PAUSE_EXITING 0x40000000 +#define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS 0x80000000 + +#define CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x0401e172 + +/* + * Definitions of Secondary Processor-Based VM-Execution Controls. + */ +#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 +#define SECONDARY_EXEC_ENABLE_EPT 0x00000002 +#define SECONDARY_EXEC_DESC 0x00000004 +#define SECONDARY_EXEC_ENABLE_RDTSCP 0x00000008 +#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE 0x00000010 +#define SECONDARY_EXEC_ENABLE_VPID 0x00000020 +#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 +#define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 +#define SECONDARY_EXEC_APIC_REGISTER_VIRT 0x00000100 +#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200 +#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 +#define SECONDARY_EXEC_RDRAND_EXITING 0x00000800 +#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 +#define SECONDARY_EXEC_ENABLE_VMFUNC 0x00002000 +#define SECONDARY_EXEC_SHADOW_VMCS 0x00004000 +#define SECONDARY_EXEC_RDSEED_EXITING 0x00010000 +#define SECONDARY_EXEC_ENABLE_PML 0x00020000 +#define SECONDARY_EPT_VE 0x00040000 +#define SECONDARY_ENABLE_XSAV_RESTORE 0x00100000 +#define SECONDARY_EXEC_TSC_SCALING 0x02000000 + +#define PIN_BASED_EXT_INTR_MASK 0x00000001 +#define PIN_BASED_NMI_EXITING 0x00000008 +#define PIN_BASED_VIRTUAL_NMIS 0x00000020 +#define PIN_BASED_VMX_PREEMPTION_TIMER 0x00000040 +#define PIN_BASED_POSTED_INTR 0x00000080 + +#define PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x00000016 + +#define VM_EXIT_SAVE_DEBUG_CONTROLS 0x00000004 +#define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200 +#define VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL 0x00001000 +#define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000 +#define VM_EXIT_SAVE_IA32_PAT 0x00040000 +#define VM_EXIT_LOAD_IA32_PAT 0x00080000 +#define VM_EXIT_SAVE_IA32_EFER 0x00100000 +#define VM_EXIT_LOAD_IA32_EFER 0x00200000 +#define VM_EXIT_SAVE_VMX_PREEMPTION_TIMER 0x00400000 + +#define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR 0x00036dff + +#define VM_ENTRY_LOAD_DEBUG_CONTROLS 0x00000004 +#define VM_ENTRY_IA32E_MODE 0x00000200 +#define VM_ENTRY_SMM 0x00000400 +#define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800 +#define VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL 0x00002000 +#define VM_ENTRY_LOAD_IA32_PAT 0x00004000 +#define VM_ENTRY_LOAD_IA32_EFER 0x00008000 + +#define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x000011ff + +#define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f +#define VMX_MISC_SAVE_EFER_LMA 0x00000020 + +#define VMX_EPT_VPID_CAP_1G_PAGES 0x00020000 +#define VMX_EPT_VPID_CAP_AD_BITS 0x00200000 + +#define EXIT_REASON_FAILED_VMENTRY 0x80000000 + +enum vmcs_field { + VIRTUAL_PROCESSOR_ID = 0x00000000, + POSTED_INTR_NV = 0x00000002, + GUEST_ES_SELECTOR = 0x00000800, + GUEST_CS_SELECTOR = 0x00000802, + GUEST_SS_SELECTOR = 0x00000804, + GUEST_DS_SELECTOR = 0x00000806, + GUEST_FS_SELECTOR = 0x00000808, + GUEST_GS_SELECTOR = 0x0000080a, + GUEST_LDTR_SELECTOR = 0x0000080c, + GUEST_TR_SELECTOR = 0x0000080e, + GUEST_INTR_STATUS = 0x00000810, + GUEST_PML_INDEX = 0x00000812, + HOST_ES_SELECTOR = 0x00000c00, + HOST_CS_SELECTOR = 0x00000c02, + HOST_SS_SELECTOR = 0x00000c04, + HOST_DS_SELECTOR = 0x00000c06, + HOST_FS_SELECTOR = 0x00000c08, + HOST_GS_SELECTOR = 0x00000c0a, + HOST_TR_SELECTOR = 0x00000c0c, + IO_BITMAP_A = 0x00002000, + IO_BITMAP_A_HIGH = 0x00002001, + IO_BITMAP_B = 0x00002002, + IO_BITMAP_B_HIGH = 0x00002003, + MSR_BITMAP = 0x00002004, + MSR_BITMAP_HIGH = 0x00002005, + VM_EXIT_MSR_STORE_ADDR = 0x00002006, + VM_EXIT_MSR_STORE_ADDR_HIGH = 0x00002007, + VM_EXIT_MSR_LOAD_ADDR = 0x00002008, + VM_EXIT_MSR_LOAD_ADDR_HIGH = 0x00002009, + VM_ENTRY_MSR_LOAD_ADDR = 0x0000200a, + VM_ENTRY_MSR_LOAD_ADDR_HIGH = 0x0000200b, + PML_ADDRESS = 0x0000200e, + PML_ADDRESS_HIGH = 0x0000200f, + TSC_OFFSET = 0x00002010, + TSC_OFFSET_HIGH = 0x00002011, + VIRTUAL_APIC_PAGE_ADDR = 0x00002012, + VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x00002013, + APIC_ACCESS_ADDR = 0x00002014, + APIC_ACCESS_ADDR_HIGH = 0x00002015, + POSTED_INTR_DESC_ADDR = 0x00002016, + POSTED_INTR_DESC_ADDR_HIGH = 0x00002017, + EPT_POINTER = 0x0000201a, + EPT_POINTER_HIGH = 0x0000201b, + EOI_EXIT_BITMAP0 = 0x0000201c, + EOI_EXIT_BITMAP0_HIGH = 0x0000201d, + EOI_EXIT_BITMAP1 = 0x0000201e, + EOI_EXIT_BITMAP1_HIGH = 0x0000201f, + EOI_EXIT_BITMAP2 = 0x00002020, + EOI_EXIT_BITMAP2_HIGH = 0x00002021, + EOI_EXIT_BITMAP3 = 0x00002022, + EOI_EXIT_BITMAP3_HIGH = 0x00002023, + VMREAD_BITMAP = 0x00002026, + VMREAD_BITMAP_HIGH = 0x00002027, + VMWRITE_BITMAP = 0x00002028, + VMWRITE_BITMAP_HIGH = 0x00002029, + XSS_EXIT_BITMAP = 0x0000202C, + XSS_EXIT_BITMAP_HIGH = 0x0000202D, + ENCLS_EXITING_BITMAP = 0x0000202E, + ENCLS_EXITING_BITMAP_HIGH = 0x0000202F, + TSC_MULTIPLIER = 0x00002032, + TSC_MULTIPLIER_HIGH = 0x00002033, + GUEST_PHYSICAL_ADDRESS = 0x00002400, + GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401, + VMCS_LINK_POINTER = 0x00002800, + VMCS_LINK_POINTER_HIGH = 0x00002801, + GUEST_IA32_DEBUGCTL = 0x00002802, + GUEST_IA32_DEBUGCTL_HIGH = 0x00002803, + GUEST_IA32_PAT = 0x00002804, + GUEST_IA32_PAT_HIGH = 0x00002805, + GUEST_IA32_EFER = 0x00002806, + GUEST_IA32_EFER_HIGH = 0x00002807, + GUEST_IA32_PERF_GLOBAL_CTRL = 0x00002808, + GUEST_IA32_PERF_GLOBAL_CTRL_HIGH= 0x00002809, + GUEST_PDPTR0 = 0x0000280a, + GUEST_PDPTR0_HIGH = 0x0000280b, + GUEST_PDPTR1 = 0x0000280c, + GUEST_PDPTR1_HIGH = 0x0000280d, + GUEST_PDPTR2 = 0x0000280e, + GUEST_PDPTR2_HIGH = 0x0000280f, + GUEST_PDPTR3 = 0x00002810, + GUEST_PDPTR3_HIGH = 0x00002811, + GUEST_BNDCFGS = 0x00002812, + GUEST_BNDCFGS_HIGH = 0x00002813, + HOST_IA32_PAT = 0x00002c00, + HOST_IA32_PAT_HIGH = 0x00002c01, + HOST_IA32_EFER = 0x00002c02, + HOST_IA32_EFER_HIGH = 0x00002c03, + HOST_IA32_PERF_GLOBAL_CTRL = 0x00002c04, + HOST_IA32_PERF_GLOBAL_CTRL_HIGH = 0x00002c05, + PIN_BASED_VM_EXEC_CONTROL = 0x00004000, + CPU_BASED_VM_EXEC_CONTROL = 0x00004002, + EXCEPTION_BITMAP = 0x00004004, + PAGE_FAULT_ERROR_CODE_MASK = 0x00004006, + PAGE_FAULT_ERROR_CODE_MATCH = 0x00004008, + CR3_TARGET_COUNT = 0x0000400a, + VM_EXIT_CONTROLS = 0x0000400c, + VM_EXIT_MSR_STORE_COUNT = 0x0000400e, + VM_EXIT_MSR_LOAD_COUNT = 0x00004010, + VM_ENTRY_CONTROLS = 0x00004012, + VM_ENTRY_MSR_LOAD_COUNT = 0x00004014, + VM_ENTRY_INTR_INFO_FIELD = 0x00004016, + VM_ENTRY_EXCEPTION_ERROR_CODE = 0x00004018, + VM_ENTRY_INSTRUCTION_LEN = 0x0000401a, + TPR_THRESHOLD = 0x0000401c, + SECONDARY_VM_EXEC_CONTROL = 0x0000401e, + PLE_GAP = 0x00004020, + PLE_WINDOW = 0x00004022, + VM_INSTRUCTION_ERROR = 0x00004400, + VM_EXIT_REASON = 0x00004402, + VM_EXIT_INTR_INFO = 0x00004404, + VM_EXIT_INTR_ERROR_CODE = 0x00004406, + IDT_VECTORING_INFO_FIELD = 0x00004408, + IDT_VECTORING_ERROR_CODE = 0x0000440a, + VM_EXIT_INSTRUCTION_LEN = 0x0000440c, + VMX_INSTRUCTION_INFO = 0x0000440e, + GUEST_ES_LIMIT = 0x00004800, + GUEST_CS_LIMIT = 0x00004802, + GUEST_SS_LIMIT = 0x00004804, + GUEST_DS_LIMIT = 0x00004806, + GUEST_FS_LIMIT = 0x00004808, + GUEST_GS_LIMIT = 0x0000480a, + GUEST_LDTR_LIMIT = 0x0000480c, + GUEST_TR_LIMIT = 0x0000480e, + GUEST_GDTR_LIMIT = 0x00004810, + GUEST_IDTR_LIMIT = 0x00004812, + GUEST_ES_AR_BYTES = 0x00004814, + GUEST_CS_AR_BYTES = 0x00004816, + GUEST_SS_AR_BYTES = 0x00004818, + GUEST_DS_AR_BYTES = 0x0000481a, + GUEST_FS_AR_BYTES = 0x0000481c, + GUEST_GS_AR_BYTES = 0x0000481e, + GUEST_LDTR_AR_BYTES = 0x00004820, + GUEST_TR_AR_BYTES = 0x00004822, + GUEST_INTERRUPTIBILITY_INFO = 0x00004824, + GUEST_ACTIVITY_STATE = 0X00004826, + GUEST_SYSENTER_CS = 0x0000482A, + VMX_PREEMPTION_TIMER_VALUE = 0x0000482E, + HOST_IA32_SYSENTER_CS = 0x00004c00, + CR0_GUEST_HOST_MASK = 0x00006000, + CR4_GUEST_HOST_MASK = 0x00006002, + CR0_READ_SHADOW = 0x00006004, + CR4_READ_SHADOW = 0x00006006, + CR3_TARGET_VALUE0 = 0x00006008, + CR3_TARGET_VALUE1 = 0x0000600a, + CR3_TARGET_VALUE2 = 0x0000600c, + CR3_TARGET_VALUE3 = 0x0000600e, + EXIT_QUALIFICATION = 0x00006400, + GUEST_LINEAR_ADDRESS = 0x0000640a, + GUEST_CR0 = 0x00006800, + GUEST_CR3 = 0x00006802, + GUEST_CR4 = 0x00006804, + GUEST_ES_BASE = 0x00006806, + GUEST_CS_BASE = 0x00006808, + GUEST_SS_BASE = 0x0000680a, + GUEST_DS_BASE = 0x0000680c, + GUEST_FS_BASE = 0x0000680e, + GUEST_GS_BASE = 0x00006810, + GUEST_LDTR_BASE = 0x00006812, + GUEST_TR_BASE = 0x00006814, + GUEST_GDTR_BASE = 0x00006816, + GUEST_IDTR_BASE = 0x00006818, + GUEST_DR7 = 0x0000681a, + GUEST_RSP = 0x0000681c, + GUEST_RIP = 0x0000681e, + GUEST_RFLAGS = 0x00006820, + GUEST_PENDING_DBG_EXCEPTIONS = 0x00006822, + GUEST_SYSENTER_ESP = 0x00006824, + GUEST_SYSENTER_EIP = 0x00006826, + HOST_CR0 = 0x00006c00, + HOST_CR3 = 0x00006c02, + HOST_CR4 = 0x00006c04, + HOST_FS_BASE = 0x00006c06, + HOST_GS_BASE = 0x00006c08, + HOST_TR_BASE = 0x00006c0a, + HOST_GDTR_BASE = 0x00006c0c, + HOST_IDTR_BASE = 0x00006c0e, + HOST_IA32_SYSENTER_ESP = 0x00006c10, + HOST_IA32_SYSENTER_EIP = 0x00006c12, + HOST_RSP = 0x00006c14, + HOST_RIP = 0x00006c16, +}; + +struct vmx_msr_entry { + uint32_t index; + uint32_t reserved; + uint64_t value; +} __attribute__ ((aligned(16))); + +#include "evmcs.h" + +static inline int vmxon(uint64_t phys) +{ + uint8_t ret; + + __asm__ __volatile__ ("vmxon %[pa]; setna %[ret]" + : [ret]"=rm"(ret) + : [pa]"m"(phys) + : "cc", "memory"); + + return ret; +} + +static inline void vmxoff(void) +{ + __asm__ __volatile__("vmxoff"); +} + +static inline int vmclear(uint64_t vmcs_pa) +{ + uint8_t ret; + + __asm__ __volatile__ ("vmclear %[pa]; setna %[ret]" + : [ret]"=rm"(ret) + : [pa]"m"(vmcs_pa) + : "cc", "memory"); + + return ret; +} + +static inline int vmptrld(uint64_t vmcs_pa) +{ + uint8_t ret; + + if (enable_evmcs) + return -1; + + __asm__ __volatile__ ("vmptrld %[pa]; setna %[ret]" + : [ret]"=rm"(ret) + : [pa]"m"(vmcs_pa) + : "cc", "memory"); + + return ret; +} + +static inline int vmptrst(uint64_t *value) +{ + uint64_t tmp; + uint8_t ret; + + if (enable_evmcs) + return evmcs_vmptrst(value); + + __asm__ __volatile__("vmptrst %[value]; setna %[ret]" + : [value]"=m"(tmp), [ret]"=rm"(ret) + : : "cc", "memory"); + + *value = tmp; + return ret; +} + +/* + * A wrapper around vmptrst that ignores errors and returns zero if the + * vmptrst instruction fails. + */ +static inline uint64_t vmptrstz(void) +{ + uint64_t value = 0; + vmptrst(&value); + return value; +} + +/* + * No guest state (e.g. GPRs) is established by this vmlaunch. + */ +static inline int vmlaunch(void) +{ + int ret; + + if (enable_evmcs) + return evmcs_vmlaunch(); + + __asm__ __volatile__("push %%rbp;" + "push %%rcx;" + "push %%rdx;" + "push %%rsi;" + "push %%rdi;" + "push $0;" + "vmwrite %%rsp, %[host_rsp];" + "lea 1f(%%rip), %%rax;" + "vmwrite %%rax, %[host_rip];" + "vmlaunch;" + "incq (%%rsp);" + "1: pop %%rax;" + "pop %%rdi;" + "pop %%rsi;" + "pop %%rdx;" + "pop %%rcx;" + "pop %%rbp;" + : [ret]"=&a"(ret) + : [host_rsp]"r"((uint64_t)HOST_RSP), + [host_rip]"r"((uint64_t)HOST_RIP) + : "memory", "cc", "rbx", "r8", "r9", "r10", + "r11", "r12", "r13", "r14", "r15"); + return ret; +} + +/* + * No guest state (e.g. GPRs) is established by this vmresume. + */ +static inline int vmresume(void) +{ + int ret; + + if (enable_evmcs) + return evmcs_vmresume(); + + __asm__ __volatile__("push %%rbp;" + "push %%rcx;" + "push %%rdx;" + "push %%rsi;" + "push %%rdi;" + "push $0;" + "vmwrite %%rsp, %[host_rsp];" + "lea 1f(%%rip), %%rax;" + "vmwrite %%rax, %[host_rip];" + "vmresume;" + "incq (%%rsp);" + "1: pop %%rax;" + "pop %%rdi;" + "pop %%rsi;" + "pop %%rdx;" + "pop %%rcx;" + "pop %%rbp;" + : [ret]"=&a"(ret) + : [host_rsp]"r"((uint64_t)HOST_RSP), + [host_rip]"r"((uint64_t)HOST_RIP) + : "memory", "cc", "rbx", "r8", "r9", "r10", + "r11", "r12", "r13", "r14", "r15"); + return ret; +} + +static inline void vmcall(void) +{ + /* + * Stuff RAX and RCX with "safe" values to make sure L0 doesn't handle + * it as a valid hypercall (e.g. Hyper-V L2 TLB flush) as the intended + * use of this function is to exit to L1 from L2. Clobber all other + * GPRs as L1 doesn't correctly preserve them during vmexits. + */ + __asm__ __volatile__("push %%rbp; vmcall; pop %%rbp" + : : "a"(0xdeadbeef), "c"(0xbeefdead) + : "rbx", "rdx", "rsi", "rdi", "r8", "r9", + "r10", "r11", "r12", "r13", "r14", "r15"); +} + +static inline int vmread(uint64_t encoding, uint64_t *value) +{ + uint64_t tmp; + uint8_t ret; + + if (enable_evmcs) + return evmcs_vmread(encoding, value); + + __asm__ __volatile__("vmread %[encoding], %[value]; setna %[ret]" + : [value]"=rm"(tmp), [ret]"=rm"(ret) + : [encoding]"r"(encoding) + : "cc", "memory"); + + *value = tmp; + return ret; +} + +/* + * A wrapper around vmread that ignores errors and returns zero if the + * vmread instruction fails. + */ +static inline uint64_t vmreadz(uint64_t encoding) +{ + uint64_t value = 0; + vmread(encoding, &value); + return value; +} + +static inline int vmwrite(uint64_t encoding, uint64_t value) +{ + uint8_t ret; + + if (enable_evmcs) + return evmcs_vmwrite(encoding, value); + + __asm__ __volatile__ ("vmwrite %[value], %[encoding]; setna %[ret]" + : [ret]"=rm"(ret) + : [value]"rm"(value), [encoding]"r"(encoding) + : "cc", "memory"); + + return ret; +} + +static inline uint32_t vmcs_revision(void) +{ + return rdmsr(MSR_IA32_VMX_BASIC); +} + +struct vmx_pages { + void *vmxon_hva; + uint64_t vmxon_gpa; + void *vmxon; + + void *vmcs_hva; + uint64_t vmcs_gpa; + void *vmcs; + + void *msr_hva; + uint64_t msr_gpa; + void *msr; + + void *shadow_vmcs_hva; + uint64_t shadow_vmcs_gpa; + void *shadow_vmcs; + + void *vmread_hva; + uint64_t vmread_gpa; + void *vmread; + + void *vmwrite_hva; + uint64_t vmwrite_gpa; + void *vmwrite; + + void *eptp_hva; + uint64_t eptp_gpa; + void *eptp; + + void *apic_access_hva; + uint64_t apic_access_gpa; + void *apic_access; +}; + +union vmx_basic { + u64 val; + struct { + u32 revision; + u32 size:13, + reserved1:3, + width:1, + dual:1, + type:4, + insouts:1, + ctrl:1, + vm_entry_exception_ctrl:1, + reserved2:7; + }; +}; + +union vmx_ctrl_msr { + u64 val; + struct { + u32 set, clr; + }; +}; + +struct vmx_pages *vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva); +bool prepare_for_vmx_operation(struct vmx_pages *vmx); +void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp); +bool load_vmcs(struct vmx_pages *vmx); + +bool ept_1g_pages_supported(void); + +void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm, + uint64_t nested_paddr, uint64_t paddr); +void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm, + uint64_t nested_paddr, uint64_t paddr, uint64_t size); +void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm, + uint32_t memslot); +void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm, + uint64_t addr, uint64_t size); +bool kvm_cpu_has_ept(void); +void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm, + uint32_t eptp_memslot); +void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm); + +#endif /* SELFTEST_KVM_VMX_H */ diff --git a/tools/testing/selftests/kvm/include/x86_64/apic.h b/tools/testing/selftests/kvm/include/x86_64/apic.h deleted file mode 100644 index 51990094effd..000000000000 --- a/tools/testing/selftests/kvm/include/x86_64/apic.h +++ /dev/null @@ -1,120 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * tools/testing/selftests/kvm/include/x86_64/apic.h - * - * Copyright (C) 2021, Google LLC. - */ - -#ifndef SELFTEST_KVM_APIC_H -#define SELFTEST_KVM_APIC_H - -#include - -#include "processor.h" -#include "ucall_common.h" - -#define APIC_DEFAULT_GPA 0xfee00000ULL - -/* APIC base address MSR and fields */ -#define MSR_IA32_APICBASE 0x0000001b -#define MSR_IA32_APICBASE_BSP (1<<8) -#define MSR_IA32_APICBASE_EXTD (1<<10) -#define MSR_IA32_APICBASE_ENABLE (1<<11) -#define MSR_IA32_APICBASE_BASE (0xfffff<<12) -#define GET_APIC_BASE(x) (((x) >> 12) << 12) - -#define APIC_BASE_MSR 0x800 -#define X2APIC_ENABLE (1UL << 10) -#define APIC_ID 0x20 -#define APIC_LVR 0x30 -#define GET_APIC_ID_FIELD(x) (((x) >> 24) & 0xFF) -#define APIC_TASKPRI 0x80 -#define APIC_PROCPRI 0xA0 -#define APIC_EOI 0xB0 -#define APIC_SPIV 0xF0 -#define APIC_SPIV_FOCUS_DISABLED (1 << 9) -#define APIC_SPIV_APIC_ENABLED (1 << 8) -#define APIC_IRR 0x200 -#define APIC_ICR 0x300 -#define APIC_LVTCMCI 0x2f0 -#define APIC_DEST_SELF 0x40000 -#define APIC_DEST_ALLINC 0x80000 -#define APIC_DEST_ALLBUT 0xC0000 -#define APIC_ICR_RR_MASK 0x30000 -#define APIC_ICR_RR_INVALID 0x00000 -#define APIC_ICR_RR_INPROG 0x10000 -#define APIC_ICR_RR_VALID 0x20000 -#define APIC_INT_LEVELTRIG 0x08000 -#define APIC_INT_ASSERT 0x04000 -#define APIC_ICR_BUSY 0x01000 -#define APIC_DEST_LOGICAL 0x00800 -#define APIC_DEST_PHYSICAL 0x00000 -#define APIC_DM_FIXED 0x00000 -#define APIC_DM_FIXED_MASK 0x00700 -#define APIC_DM_LOWEST 0x00100 -#define APIC_DM_SMI 0x00200 -#define APIC_DM_REMRD 0x00300 -#define APIC_DM_NMI 0x00400 -#define APIC_DM_INIT 0x00500 -#define APIC_DM_STARTUP 0x00600 -#define APIC_DM_EXTINT 0x00700 -#define APIC_VECTOR_MASK 0x000FF -#define APIC_ICR2 0x310 -#define SET_APIC_DEST_FIELD(x) ((x) << 24) -#define APIC_LVTT 0x320 -#define APIC_LVT_TIMER_ONESHOT (0 << 17) -#define APIC_LVT_TIMER_PERIODIC (1 << 17) -#define APIC_LVT_TIMER_TSCDEADLINE (2 << 17) -#define APIC_LVT_MASKED (1 << 16) -#define APIC_TMICT 0x380 -#define APIC_TMCCT 0x390 -#define APIC_TDCR 0x3E0 - -void apic_disable(void); -void xapic_enable(void); -void x2apic_enable(void); - -static inline uint32_t get_bsp_flag(void) -{ - return rdmsr(MSR_IA32_APICBASE) & MSR_IA32_APICBASE_BSP; -} - -static inline uint32_t xapic_read_reg(unsigned int reg) -{ - return ((volatile uint32_t *)APIC_DEFAULT_GPA)[reg >> 2]; -} - -static inline void xapic_write_reg(unsigned int reg, uint32_t val) -{ - ((volatile uint32_t *)APIC_DEFAULT_GPA)[reg >> 2] = val; -} - -static inline uint64_t x2apic_read_reg(unsigned int reg) -{ - return rdmsr(APIC_BASE_MSR + (reg >> 4)); -} - -static inline uint8_t x2apic_write_reg_safe(unsigned int reg, uint64_t value) -{ - return wrmsr_safe(APIC_BASE_MSR + (reg >> 4), value); -} - -static inline void x2apic_write_reg(unsigned int reg, uint64_t value) -{ - uint8_t fault = x2apic_write_reg_safe(reg, value); - - __GUEST_ASSERT(!fault, "Unexpected fault 0x%x on WRMSR(%x) = %lx\n", - fault, APIC_BASE_MSR + (reg >> 4), value); -} - -static inline void x2apic_write_reg_fault(unsigned int reg, uint64_t value) -{ - uint8_t fault = x2apic_write_reg_safe(reg, value); - - __GUEST_ASSERT(fault == GP_VECTOR, - "Wanted #GP on WRMSR(%x) = %lx, got 0x%x\n", - APIC_BASE_MSR + (reg >> 4), value, fault); -} - - -#endif /* SELFTEST_KVM_APIC_H */ diff --git a/tools/testing/selftests/kvm/include/x86_64/evmcs.h b/tools/testing/selftests/kvm/include/x86_64/evmcs.h deleted file mode 100644 index 901caf0e0939..000000000000 --- a/tools/testing/selftests/kvm/include/x86_64/evmcs.h +++ /dev/null @@ -1,1279 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * tools/testing/selftests/kvm/include/x86_64/evmcs.h - * - * Copyright (C) 2018, Red Hat, Inc. - * - */ - -#ifndef SELFTEST_KVM_EVMCS_H -#define SELFTEST_KVM_EVMCS_H - -#include -#include "hyperv.h" -#include "vmx.h" - -#define u16 uint16_t -#define u32 uint32_t -#define u64 uint64_t - -#define EVMCS_VERSION 1 - -extern bool enable_evmcs; - -struct hv_enlightened_vmcs { - u32 revision_id; - u32 abort; - - u16 host_es_selector; - u16 host_cs_selector; - u16 host_ss_selector; - u16 host_ds_selector; - u16 host_fs_selector; - u16 host_gs_selector; - u16 host_tr_selector; - - u16 padding16_1; - - u64 host_ia32_pat; - u64 host_ia32_efer; - - u64 host_cr0; - u64 host_cr3; - u64 host_cr4; - - u64 host_ia32_sysenter_esp; - u64 host_ia32_sysenter_eip; - u64 host_rip; - u32 host_ia32_sysenter_cs; - - u32 pin_based_vm_exec_control; - u32 vm_exit_controls; - u32 secondary_vm_exec_control; - - u64 io_bitmap_a; - u64 io_bitmap_b; - u64 msr_bitmap; - - u16 guest_es_selector; - u16 guest_cs_selector; - u16 guest_ss_selector; - u16 guest_ds_selector; - u16 guest_fs_selector; - u16 guest_gs_selector; - u16 guest_ldtr_selector; - u16 guest_tr_selector; - - u32 guest_es_limit; - u32 guest_cs_limit; - u32 guest_ss_limit; - u32 guest_ds_limit; - u32 guest_fs_limit; - u32 guest_gs_limit; - u32 guest_ldtr_limit; - u32 guest_tr_limit; - u32 guest_gdtr_limit; - u32 guest_idtr_limit; - - u32 guest_es_ar_bytes; - u32 guest_cs_ar_bytes; - u32 guest_ss_ar_bytes; - u32 guest_ds_ar_bytes; - u32 guest_fs_ar_bytes; - u32 guest_gs_ar_bytes; - u32 guest_ldtr_ar_bytes; - u32 guest_tr_ar_bytes; - - u64 guest_es_base; - u64 guest_cs_base; - u64 guest_ss_base; - u64 guest_ds_base; - u64 guest_fs_base; - u64 guest_gs_base; - u64 guest_ldtr_base; - u64 guest_tr_base; - u64 guest_gdtr_base; - u64 guest_idtr_base; - - u64 padding64_1[3]; - - u64 vm_exit_msr_store_addr; - u64 vm_exit_msr_load_addr; - u64 vm_entry_msr_load_addr; - - u64 cr3_target_value0; - u64 cr3_target_value1; - u64 cr3_target_value2; - u64 cr3_target_value3; - - u32 page_fault_error_code_mask; - u32 page_fault_error_code_match; - - u32 cr3_target_count; - u32 vm_exit_msr_store_count; - u32 vm_exit_msr_load_count; - u32 vm_entry_msr_load_count; - - u64 tsc_offset; - u64 virtual_apic_page_addr; - u64 vmcs_link_pointer; - - u64 guest_ia32_debugctl; - u64 guest_ia32_pat; - u64 guest_ia32_efer; - - u64 guest_pdptr0; - u64 guest_pdptr1; - u64 guest_pdptr2; - u64 guest_pdptr3; - - u64 guest_pending_dbg_exceptions; - u64 guest_sysenter_esp; - u64 guest_sysenter_eip; - - u32 guest_activity_state; - u32 guest_sysenter_cs; - - u64 cr0_guest_host_mask; - u64 cr4_guest_host_mask; - u64 cr0_read_shadow; - u64 cr4_read_shadow; - u64 guest_cr0; - u64 guest_cr3; - u64 guest_cr4; - u64 guest_dr7; - - u64 host_fs_base; - u64 host_gs_base; - u64 host_tr_base; - u64 host_gdtr_base; - u64 host_idtr_base; - u64 host_rsp; - - u64 ept_pointer; - - u16 virtual_processor_id; - u16 padding16_2[3]; - - u64 padding64_2[5]; - u64 guest_physical_address; - - u32 vm_instruction_error; - u32 vm_exit_reason; - u32 vm_exit_intr_info; - u32 vm_exit_intr_error_code; - u32 idt_vectoring_info_field; - u32 idt_vectoring_error_code; - u32 vm_exit_instruction_len; - u32 vmx_instruction_info; - - u64 exit_qualification; - u64 exit_io_instruction_ecx; - u64 exit_io_instruction_esi; - u64 exit_io_instruction_edi; - u64 exit_io_instruction_eip; - - u64 guest_linear_address; - u64 guest_rsp; - u64 guest_rflags; - - u32 guest_interruptibility_info; - u32 cpu_based_vm_exec_control; - u32 exception_bitmap; - u32 vm_entry_controls; - u32 vm_entry_intr_info_field; - u32 vm_entry_exception_error_code; - u32 vm_entry_instruction_len; - u32 tpr_threshold; - - u64 guest_rip; - - u32 hv_clean_fields; - u32 padding32_1; - u32 hv_synthetic_controls; - struct { - u32 nested_flush_hypercall:1; - u32 msr_bitmap:1; - u32 reserved:30; - } __packed hv_enlightenments_control; - u32 hv_vp_id; - u32 padding32_2; - u64 hv_vm_id; - u64 partition_assist_page; - u64 padding64_4[4]; - u64 guest_bndcfgs; - u64 guest_ia32_perf_global_ctrl; - u64 guest_ia32_s_cet; - u64 guest_ssp; - u64 guest_ia32_int_ssp_table_addr; - u64 guest_ia32_lbr_ctl; - u64 padding64_5[2]; - u64 xss_exit_bitmap; - u64 encls_exiting_bitmap; - u64 host_ia32_perf_global_ctrl; - u64 tsc_multiplier; - u64 host_ia32_s_cet; - u64 host_ssp; - u64 host_ia32_int_ssp_table_addr; - u64 padding64_6; -} __packed; - -#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE 0 -#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP BIT(0) -#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP BIT(1) -#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2 BIT(2) -#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1 BIT(3) -#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC BIT(4) -#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT BIT(5) -#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY BIT(6) -#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN BIT(7) -#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR BIT(8) -#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT BIT(9) -#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC BIT(10) -#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1 BIT(11) -#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2 BIT(12) -#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER BIT(13) -#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1 BIT(14) -#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ENLIGHTENMENTSCONTROL BIT(15) -#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL 0xFFFF - -#define HV_VMX_SYNTHETIC_EXIT_REASON_TRAP_AFTER_FLUSH 0x10000031 - -extern struct hv_enlightened_vmcs *current_evmcs; - -int vcpu_enable_evmcs(struct kvm_vcpu *vcpu); - -static inline void evmcs_enable(void) -{ - enable_evmcs = true; -} - -static inline int evmcs_vmptrld(uint64_t vmcs_pa, void *vmcs) -{ - current_vp_assist->current_nested_vmcs = vmcs_pa; - current_vp_assist->enlighten_vmentry = 1; - - current_evmcs = vmcs; - - return 0; -} - -static inline bool load_evmcs(struct hyperv_test_pages *hv) -{ - if (evmcs_vmptrld(hv->enlightened_vmcs_gpa, hv->enlightened_vmcs)) - return false; - - current_evmcs->revision_id = EVMCS_VERSION; - - return true; -} - -static inline int evmcs_vmptrst(uint64_t *value) -{ - *value = current_vp_assist->current_nested_vmcs & - ~HV_X64_MSR_VP_ASSIST_PAGE_ENABLE; - - return 0; -} - -static inline int evmcs_vmread(uint64_t encoding, uint64_t *value) -{ - switch (encoding) { - case GUEST_RIP: - *value = current_evmcs->guest_rip; - break; - case GUEST_RSP: - *value = current_evmcs->guest_rsp; - break; - case GUEST_RFLAGS: - *value = current_evmcs->guest_rflags; - break; - case HOST_IA32_PAT: - *value = current_evmcs->host_ia32_pat; - break; - case HOST_IA32_EFER: - *value = current_evmcs->host_ia32_efer; - break; - case HOST_CR0: - *value = current_evmcs->host_cr0; - break; - case HOST_CR3: - *value = current_evmcs->host_cr3; - break; - case HOST_CR4: - *value = current_evmcs->host_cr4; - break; - case HOST_IA32_SYSENTER_ESP: - *value = current_evmcs->host_ia32_sysenter_esp; - break; - case HOST_IA32_SYSENTER_EIP: - *value = current_evmcs->host_ia32_sysenter_eip; - break; - case HOST_RIP: - *value = current_evmcs->host_rip; - break; - case IO_BITMAP_A: - *value = current_evmcs->io_bitmap_a; - break; - case IO_BITMAP_B: - *value = current_evmcs->io_bitmap_b; - break; - case MSR_BITMAP: - *value = current_evmcs->msr_bitmap; - break; - case GUEST_ES_BASE: - *value = current_evmcs->guest_es_base; - break; - case GUEST_CS_BASE: - *value = current_evmcs->guest_cs_base; - break; - case GUEST_SS_BASE: - *value = current_evmcs->guest_ss_base; - break; - case GUEST_DS_BASE: - *value = current_evmcs->guest_ds_base; - break; - case GUEST_FS_BASE: - *value = current_evmcs->guest_fs_base; - break; - case GUEST_GS_BASE: - *value = current_evmcs->guest_gs_base; - break; - case GUEST_LDTR_BASE: - *value = current_evmcs->guest_ldtr_base; - break; - case GUEST_TR_BASE: - *value = current_evmcs->guest_tr_base; - break; - case GUEST_GDTR_BASE: - *value = current_evmcs->guest_gdtr_base; - break; - case GUEST_IDTR_BASE: - *value = current_evmcs->guest_idtr_base; - break; - case TSC_OFFSET: - *value = current_evmcs->tsc_offset; - break; - case VIRTUAL_APIC_PAGE_ADDR: - *value = current_evmcs->virtual_apic_page_addr; - break; - case VMCS_LINK_POINTER: - *value = current_evmcs->vmcs_link_pointer; - break; - case GUEST_IA32_DEBUGCTL: - *value = current_evmcs->guest_ia32_debugctl; - break; - case GUEST_IA32_PAT: - *value = current_evmcs->guest_ia32_pat; - break; - case GUEST_IA32_EFER: - *value = current_evmcs->guest_ia32_efer; - break; - case GUEST_PDPTR0: - *value = current_evmcs->guest_pdptr0; - break; - case GUEST_PDPTR1: - *value = current_evmcs->guest_pdptr1; - break; - case GUEST_PDPTR2: - *value = current_evmcs->guest_pdptr2; - break; - case GUEST_PDPTR3: - *value = current_evmcs->guest_pdptr3; - break; - case GUEST_PENDING_DBG_EXCEPTIONS: - *value = current_evmcs->guest_pending_dbg_exceptions; - break; - case GUEST_SYSENTER_ESP: - *value = current_evmcs->guest_sysenter_esp; - break; - case GUEST_SYSENTER_EIP: - *value = current_evmcs->guest_sysenter_eip; - break; - case CR0_GUEST_HOST_MASK: - *value = current_evmcs->cr0_guest_host_mask; - break; - case CR4_GUEST_HOST_MASK: - *value = current_evmcs->cr4_guest_host_mask; - break; - case CR0_READ_SHADOW: - *value = current_evmcs->cr0_read_shadow; - break; - case CR4_READ_SHADOW: - *value = current_evmcs->cr4_read_shadow; - break; - case GUEST_CR0: - *value = current_evmcs->guest_cr0; - break; - case GUEST_CR3: - *value = current_evmcs->guest_cr3; - break; - case GUEST_CR4: - *value = current_evmcs->guest_cr4; - break; - case GUEST_DR7: - *value = current_evmcs->guest_dr7; - break; - case HOST_FS_BASE: - *value = current_evmcs->host_fs_base; - break; - case HOST_GS_BASE: - *value = current_evmcs->host_gs_base; - break; - case HOST_TR_BASE: - *value = current_evmcs->host_tr_base; - break; - case HOST_GDTR_BASE: - *value = current_evmcs->host_gdtr_base; - break; - case HOST_IDTR_BASE: - *value = current_evmcs->host_idtr_base; - break; - case HOST_RSP: - *value = current_evmcs->host_rsp; - break; - case EPT_POINTER: - *value = current_evmcs->ept_pointer; - break; - case GUEST_BNDCFGS: - *value = current_evmcs->guest_bndcfgs; - break; - case XSS_EXIT_BITMAP: - *value = current_evmcs->xss_exit_bitmap; - break; - case GUEST_PHYSICAL_ADDRESS: - *value = current_evmcs->guest_physical_address; - break; - case EXIT_QUALIFICATION: - *value = current_evmcs->exit_qualification; - break; - case GUEST_LINEAR_ADDRESS: - *value = current_evmcs->guest_linear_address; - break; - case VM_EXIT_MSR_STORE_ADDR: - *value = current_evmcs->vm_exit_msr_store_addr; - break; - case VM_EXIT_MSR_LOAD_ADDR: - *value = current_evmcs->vm_exit_msr_load_addr; - break; - case VM_ENTRY_MSR_LOAD_ADDR: - *value = current_evmcs->vm_entry_msr_load_addr; - break; - case CR3_TARGET_VALUE0: - *value = current_evmcs->cr3_target_value0; - break; - case CR3_TARGET_VALUE1: - *value = current_evmcs->cr3_target_value1; - break; - case CR3_TARGET_VALUE2: - *value = current_evmcs->cr3_target_value2; - break; - case CR3_TARGET_VALUE3: - *value = current_evmcs->cr3_target_value3; - break; - case TPR_THRESHOLD: - *value = current_evmcs->tpr_threshold; - break; - case GUEST_INTERRUPTIBILITY_INFO: - *value = current_evmcs->guest_interruptibility_info; - break; - case CPU_BASED_VM_EXEC_CONTROL: - *value = current_evmcs->cpu_based_vm_exec_control; - break; - case EXCEPTION_BITMAP: - *value = current_evmcs->exception_bitmap; - break; - case VM_ENTRY_CONTROLS: - *value = current_evmcs->vm_entry_controls; - break; - case VM_ENTRY_INTR_INFO_FIELD: - *value = current_evmcs->vm_entry_intr_info_field; - break; - case VM_ENTRY_EXCEPTION_ERROR_CODE: - *value = current_evmcs->vm_entry_exception_error_code; - break; - case VM_ENTRY_INSTRUCTION_LEN: - *value = current_evmcs->vm_entry_instruction_len; - break; - case HOST_IA32_SYSENTER_CS: - *value = current_evmcs->host_ia32_sysenter_cs; - break; - case PIN_BASED_VM_EXEC_CONTROL: - *value = current_evmcs->pin_based_vm_exec_control; - break; - case VM_EXIT_CONTROLS: - *value = current_evmcs->vm_exit_controls; - break; - case SECONDARY_VM_EXEC_CONTROL: - *value = current_evmcs->secondary_vm_exec_control; - break; - case GUEST_ES_LIMIT: - *value = current_evmcs->guest_es_limit; - break; - case GUEST_CS_LIMIT: - *value = current_evmcs->guest_cs_limit; - break; - case GUEST_SS_LIMIT: - *value = current_evmcs->guest_ss_limit; - break; - case GUEST_DS_LIMIT: - *value = current_evmcs->guest_ds_limit; - break; - case GUEST_FS_LIMIT: - *value = current_evmcs->guest_fs_limit; - break; - case GUEST_GS_LIMIT: - *value = current_evmcs->guest_gs_limit; - break; - case GUEST_LDTR_LIMIT: - *value = current_evmcs->guest_ldtr_limit; - break; - case GUEST_TR_LIMIT: - *value = current_evmcs->guest_tr_limit; - break; - case GUEST_GDTR_LIMIT: - *value = current_evmcs->guest_gdtr_limit; - break; - case GUEST_IDTR_LIMIT: - *value = current_evmcs->guest_idtr_limit; - break; - case GUEST_ES_AR_BYTES: - *value = current_evmcs->guest_es_ar_bytes; - break; - case GUEST_CS_AR_BYTES: - *value = current_evmcs->guest_cs_ar_bytes; - break; - case GUEST_SS_AR_BYTES: - *value = current_evmcs->guest_ss_ar_bytes; - break; - case GUEST_DS_AR_BYTES: - *value = current_evmcs->guest_ds_ar_bytes; - break; - case GUEST_FS_AR_BYTES: - *value = current_evmcs->guest_fs_ar_bytes; - break; - case GUEST_GS_AR_BYTES: - *value = current_evmcs->guest_gs_ar_bytes; - break; - case GUEST_LDTR_AR_BYTES: - *value = current_evmcs->guest_ldtr_ar_bytes; - break; - case GUEST_TR_AR_BYTES: - *value = current_evmcs->guest_tr_ar_bytes; - break; - case GUEST_ACTIVITY_STATE: - *value = current_evmcs->guest_activity_state; - break; - case GUEST_SYSENTER_CS: - *value = current_evmcs->guest_sysenter_cs; - break; - case VM_INSTRUCTION_ERROR: - *value = current_evmcs->vm_instruction_error; - break; - case VM_EXIT_REASON: - *value = current_evmcs->vm_exit_reason; - break; - case VM_EXIT_INTR_INFO: - *value = current_evmcs->vm_exit_intr_info; - break; - case VM_EXIT_INTR_ERROR_CODE: - *value = current_evmcs->vm_exit_intr_error_code; - break; - case IDT_VECTORING_INFO_FIELD: - *value = current_evmcs->idt_vectoring_info_field; - break; - case IDT_VECTORING_ERROR_CODE: - *value = current_evmcs->idt_vectoring_error_code; - break; - case VM_EXIT_INSTRUCTION_LEN: - *value = current_evmcs->vm_exit_instruction_len; - break; - case VMX_INSTRUCTION_INFO: - *value = current_evmcs->vmx_instruction_info; - break; - case PAGE_FAULT_ERROR_CODE_MASK: - *value = current_evmcs->page_fault_error_code_mask; - break; - case PAGE_FAULT_ERROR_CODE_MATCH: - *value = current_evmcs->page_fault_error_code_match; - break; - case CR3_TARGET_COUNT: - *value = current_evmcs->cr3_target_count; - break; - case VM_EXIT_MSR_STORE_COUNT: - *value = current_evmcs->vm_exit_msr_store_count; - break; - case VM_EXIT_MSR_LOAD_COUNT: - *value = current_evmcs->vm_exit_msr_load_count; - break; - case VM_ENTRY_MSR_LOAD_COUNT: - *value = current_evmcs->vm_entry_msr_load_count; - break; - case HOST_ES_SELECTOR: - *value = current_evmcs->host_es_selector; - break; - case HOST_CS_SELECTOR: - *value = current_evmcs->host_cs_selector; - break; - case HOST_SS_SELECTOR: - *value = current_evmcs->host_ss_selector; - break; - case HOST_DS_SELECTOR: - *value = current_evmcs->host_ds_selector; - break; - case HOST_FS_SELECTOR: - *value = current_evmcs->host_fs_selector; - break; - case HOST_GS_SELECTOR: - *value = current_evmcs->host_gs_selector; - break; - case HOST_TR_SELECTOR: - *value = current_evmcs->host_tr_selector; - break; - case GUEST_ES_SELECTOR: - *value = current_evmcs->guest_es_selector; - break; - case GUEST_CS_SELECTOR: - *value = current_evmcs->guest_cs_selector; - break; - case GUEST_SS_SELECTOR: - *value = current_evmcs->guest_ss_selector; - break; - case GUEST_DS_SELECTOR: - *value = current_evmcs->guest_ds_selector; - break; - case GUEST_FS_SELECTOR: - *value = current_evmcs->guest_fs_selector; - break; - case GUEST_GS_SELECTOR: - *value = current_evmcs->guest_gs_selector; - break; - case GUEST_LDTR_SELECTOR: - *value = current_evmcs->guest_ldtr_selector; - break; - case GUEST_TR_SELECTOR: - *value = current_evmcs->guest_tr_selector; - break; - case VIRTUAL_PROCESSOR_ID: - *value = current_evmcs->virtual_processor_id; - break; - case HOST_IA32_PERF_GLOBAL_CTRL: - *value = current_evmcs->host_ia32_perf_global_ctrl; - break; - case GUEST_IA32_PERF_GLOBAL_CTRL: - *value = current_evmcs->guest_ia32_perf_global_ctrl; - break; - case ENCLS_EXITING_BITMAP: - *value = current_evmcs->encls_exiting_bitmap; - break; - case TSC_MULTIPLIER: - *value = current_evmcs->tsc_multiplier; - break; - default: return 1; - } - - return 0; -} - -static inline int evmcs_vmwrite(uint64_t encoding, uint64_t value) -{ - switch (encoding) { - case GUEST_RIP: - current_evmcs->guest_rip = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE; - break; - case GUEST_RSP: - current_evmcs->guest_rsp = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC; - break; - case GUEST_RFLAGS: - current_evmcs->guest_rflags = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC; - break; - case HOST_IA32_PAT: - current_evmcs->host_ia32_pat = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1; - break; - case HOST_IA32_EFER: - current_evmcs->host_ia32_efer = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1; - break; - case HOST_CR0: - current_evmcs->host_cr0 = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1; - break; - case HOST_CR3: - current_evmcs->host_cr3 = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1; - break; - case HOST_CR4: - current_evmcs->host_cr4 = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1; - break; - case HOST_IA32_SYSENTER_ESP: - current_evmcs->host_ia32_sysenter_esp = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1; - break; - case HOST_IA32_SYSENTER_EIP: - current_evmcs->host_ia32_sysenter_eip = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1; - break; - case HOST_RIP: - current_evmcs->host_rip = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1; - break; - case IO_BITMAP_A: - current_evmcs->io_bitmap_a = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP; - break; - case IO_BITMAP_B: - current_evmcs->io_bitmap_b = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP; - break; - case MSR_BITMAP: - current_evmcs->msr_bitmap = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP; - break; - case GUEST_ES_BASE: - current_evmcs->guest_es_base = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; - break; - case GUEST_CS_BASE: - current_evmcs->guest_cs_base = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; - break; - case GUEST_SS_BASE: - current_evmcs->guest_ss_base = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; - break; - case GUEST_DS_BASE: - current_evmcs->guest_ds_base = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; - break; - case GUEST_FS_BASE: - current_evmcs->guest_fs_base = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; - break; - case GUEST_GS_BASE: - current_evmcs->guest_gs_base = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; - break; - case GUEST_LDTR_BASE: - current_evmcs->guest_ldtr_base = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; - break; - case GUEST_TR_BASE: - current_evmcs->guest_tr_base = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; - break; - case GUEST_GDTR_BASE: - current_evmcs->guest_gdtr_base = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; - break; - case GUEST_IDTR_BASE: - current_evmcs->guest_idtr_base = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; - break; - case TSC_OFFSET: - current_evmcs->tsc_offset = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2; - break; - case VIRTUAL_APIC_PAGE_ADDR: - current_evmcs->virtual_apic_page_addr = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2; - break; - case VMCS_LINK_POINTER: - current_evmcs->vmcs_link_pointer = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1; - break; - case GUEST_IA32_DEBUGCTL: - current_evmcs->guest_ia32_debugctl = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1; - break; - case GUEST_IA32_PAT: - current_evmcs->guest_ia32_pat = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1; - break; - case GUEST_IA32_EFER: - current_evmcs->guest_ia32_efer = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1; - break; - case GUEST_PDPTR0: - current_evmcs->guest_pdptr0 = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1; - break; - case GUEST_PDPTR1: - current_evmcs->guest_pdptr1 = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1; - break; - case GUEST_PDPTR2: - current_evmcs->guest_pdptr2 = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1; - break; - case GUEST_PDPTR3: - current_evmcs->guest_pdptr3 = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1; - break; - case GUEST_PENDING_DBG_EXCEPTIONS: - current_evmcs->guest_pending_dbg_exceptions = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1; - break; - case GUEST_SYSENTER_ESP: - current_evmcs->guest_sysenter_esp = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1; - break; - case GUEST_SYSENTER_EIP: - current_evmcs->guest_sysenter_eip = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1; - break; - case CR0_GUEST_HOST_MASK: - current_evmcs->cr0_guest_host_mask = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR; - break; - case CR4_GUEST_HOST_MASK: - current_evmcs->cr4_guest_host_mask = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR; - break; - case CR0_READ_SHADOW: - current_evmcs->cr0_read_shadow = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR; - break; - case CR4_READ_SHADOW: - current_evmcs->cr4_read_shadow = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR; - break; - case GUEST_CR0: - current_evmcs->guest_cr0 = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR; - break; - case GUEST_CR3: - current_evmcs->guest_cr3 = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR; - break; - case GUEST_CR4: - current_evmcs->guest_cr4 = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR; - break; - case GUEST_DR7: - current_evmcs->guest_dr7 = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR; - break; - case HOST_FS_BASE: - current_evmcs->host_fs_base = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER; - break; - case HOST_GS_BASE: - current_evmcs->host_gs_base = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER; - break; - case HOST_TR_BASE: - current_evmcs->host_tr_base = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER; - break; - case HOST_GDTR_BASE: - current_evmcs->host_gdtr_base = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER; - break; - case HOST_IDTR_BASE: - current_evmcs->host_idtr_base = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER; - break; - case HOST_RSP: - current_evmcs->host_rsp = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER; - break; - case EPT_POINTER: - current_evmcs->ept_pointer = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT; - break; - case GUEST_BNDCFGS: - current_evmcs->guest_bndcfgs = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1; - break; - case XSS_EXIT_BITMAP: - current_evmcs->xss_exit_bitmap = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2; - break; - case GUEST_PHYSICAL_ADDRESS: - current_evmcs->guest_physical_address = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE; - break; - case EXIT_QUALIFICATION: - current_evmcs->exit_qualification = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE; - break; - case GUEST_LINEAR_ADDRESS: - current_evmcs->guest_linear_address = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE; - break; - case VM_EXIT_MSR_STORE_ADDR: - current_evmcs->vm_exit_msr_store_addr = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; - break; - case VM_EXIT_MSR_LOAD_ADDR: - current_evmcs->vm_exit_msr_load_addr = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; - break; - case VM_ENTRY_MSR_LOAD_ADDR: - current_evmcs->vm_entry_msr_load_addr = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; - break; - case CR3_TARGET_VALUE0: - current_evmcs->cr3_target_value0 = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; - break; - case CR3_TARGET_VALUE1: - current_evmcs->cr3_target_value1 = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; - break; - case CR3_TARGET_VALUE2: - current_evmcs->cr3_target_value2 = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; - break; - case CR3_TARGET_VALUE3: - current_evmcs->cr3_target_value3 = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; - break; - case TPR_THRESHOLD: - current_evmcs->tpr_threshold = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE; - break; - case GUEST_INTERRUPTIBILITY_INFO: - current_evmcs->guest_interruptibility_info = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC; - break; - case CPU_BASED_VM_EXEC_CONTROL: - current_evmcs->cpu_based_vm_exec_control = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC; - break; - case EXCEPTION_BITMAP: - current_evmcs->exception_bitmap = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN; - break; - case VM_ENTRY_CONTROLS: - current_evmcs->vm_entry_controls = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY; - break; - case VM_ENTRY_INTR_INFO_FIELD: - current_evmcs->vm_entry_intr_info_field = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT; - break; - case VM_ENTRY_EXCEPTION_ERROR_CODE: - current_evmcs->vm_entry_exception_error_code = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT; - break; - case VM_ENTRY_INSTRUCTION_LEN: - current_evmcs->vm_entry_instruction_len = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT; - break; - case HOST_IA32_SYSENTER_CS: - current_evmcs->host_ia32_sysenter_cs = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1; - break; - case PIN_BASED_VM_EXEC_CONTROL: - current_evmcs->pin_based_vm_exec_control = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1; - break; - case VM_EXIT_CONTROLS: - current_evmcs->vm_exit_controls = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1; - break; - case SECONDARY_VM_EXEC_CONTROL: - current_evmcs->secondary_vm_exec_control = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1; - break; - case GUEST_ES_LIMIT: - current_evmcs->guest_es_limit = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; - break; - case GUEST_CS_LIMIT: - current_evmcs->guest_cs_limit = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; - break; - case GUEST_SS_LIMIT: - current_evmcs->guest_ss_limit = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; - break; - case GUEST_DS_LIMIT: - current_evmcs->guest_ds_limit = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; - break; - case GUEST_FS_LIMIT: - current_evmcs->guest_fs_limit = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; - break; - case GUEST_GS_LIMIT: - current_evmcs->guest_gs_limit = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; - break; - case GUEST_LDTR_LIMIT: - current_evmcs->guest_ldtr_limit = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; - break; - case GUEST_TR_LIMIT: - current_evmcs->guest_tr_limit = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; - break; - case GUEST_GDTR_LIMIT: - current_evmcs->guest_gdtr_limit = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; - break; - case GUEST_IDTR_LIMIT: - current_evmcs->guest_idtr_limit = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; - break; - case GUEST_ES_AR_BYTES: - current_evmcs->guest_es_ar_bytes = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; - break; - case GUEST_CS_AR_BYTES: - current_evmcs->guest_cs_ar_bytes = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; - break; - case GUEST_SS_AR_BYTES: - current_evmcs->guest_ss_ar_bytes = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; - break; - case GUEST_DS_AR_BYTES: - current_evmcs->guest_ds_ar_bytes = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; - break; - case GUEST_FS_AR_BYTES: - current_evmcs->guest_fs_ar_bytes = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; - break; - case GUEST_GS_AR_BYTES: - current_evmcs->guest_gs_ar_bytes = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; - break; - case GUEST_LDTR_AR_BYTES: - current_evmcs->guest_ldtr_ar_bytes = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; - break; - case GUEST_TR_AR_BYTES: - current_evmcs->guest_tr_ar_bytes = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; - break; - case GUEST_ACTIVITY_STATE: - current_evmcs->guest_activity_state = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1; - break; - case GUEST_SYSENTER_CS: - current_evmcs->guest_sysenter_cs = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1; - break; - case VM_INSTRUCTION_ERROR: - current_evmcs->vm_instruction_error = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE; - break; - case VM_EXIT_REASON: - current_evmcs->vm_exit_reason = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE; - break; - case VM_EXIT_INTR_INFO: - current_evmcs->vm_exit_intr_info = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE; - break; - case VM_EXIT_INTR_ERROR_CODE: - current_evmcs->vm_exit_intr_error_code = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE; - break; - case IDT_VECTORING_INFO_FIELD: - current_evmcs->idt_vectoring_info_field = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE; - break; - case IDT_VECTORING_ERROR_CODE: - current_evmcs->idt_vectoring_error_code = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE; - break; - case VM_EXIT_INSTRUCTION_LEN: - current_evmcs->vm_exit_instruction_len = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE; - break; - case VMX_INSTRUCTION_INFO: - current_evmcs->vmx_instruction_info = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE; - break; - case PAGE_FAULT_ERROR_CODE_MASK: - current_evmcs->page_fault_error_code_mask = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; - break; - case PAGE_FAULT_ERROR_CODE_MATCH: - current_evmcs->page_fault_error_code_match = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; - break; - case CR3_TARGET_COUNT: - current_evmcs->cr3_target_count = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; - break; - case VM_EXIT_MSR_STORE_COUNT: - current_evmcs->vm_exit_msr_store_count = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; - break; - case VM_EXIT_MSR_LOAD_COUNT: - current_evmcs->vm_exit_msr_load_count = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; - break; - case VM_ENTRY_MSR_LOAD_COUNT: - current_evmcs->vm_entry_msr_load_count = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; - break; - case HOST_ES_SELECTOR: - current_evmcs->host_es_selector = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1; - break; - case HOST_CS_SELECTOR: - current_evmcs->host_cs_selector = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1; - break; - case HOST_SS_SELECTOR: - current_evmcs->host_ss_selector = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1; - break; - case HOST_DS_SELECTOR: - current_evmcs->host_ds_selector = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1; - break; - case HOST_FS_SELECTOR: - current_evmcs->host_fs_selector = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1; - break; - case HOST_GS_SELECTOR: - current_evmcs->host_gs_selector = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1; - break; - case HOST_TR_SELECTOR: - current_evmcs->host_tr_selector = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1; - break; - case GUEST_ES_SELECTOR: - current_evmcs->guest_es_selector = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; - break; - case GUEST_CS_SELECTOR: - current_evmcs->guest_cs_selector = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; - break; - case GUEST_SS_SELECTOR: - current_evmcs->guest_ss_selector = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; - break; - case GUEST_DS_SELECTOR: - current_evmcs->guest_ds_selector = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; - break; - case GUEST_FS_SELECTOR: - current_evmcs->guest_fs_selector = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; - break; - case GUEST_GS_SELECTOR: - current_evmcs->guest_gs_selector = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; - break; - case GUEST_LDTR_SELECTOR: - current_evmcs->guest_ldtr_selector = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; - break; - case GUEST_TR_SELECTOR: - current_evmcs->guest_tr_selector = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; - break; - case VIRTUAL_PROCESSOR_ID: - current_evmcs->virtual_processor_id = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT; - break; - case HOST_IA32_PERF_GLOBAL_CTRL: - current_evmcs->host_ia32_perf_global_ctrl = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1; - break; - case GUEST_IA32_PERF_GLOBAL_CTRL: - current_evmcs->guest_ia32_perf_global_ctrl = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1; - break; - case ENCLS_EXITING_BITMAP: - current_evmcs->encls_exiting_bitmap = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2; - break; - case TSC_MULTIPLIER: - current_evmcs->tsc_multiplier = value; - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2; - break; - default: return 1; - } - - return 0; -} - -static inline int evmcs_vmlaunch(void) -{ - int ret; - - current_evmcs->hv_clean_fields = 0; - - __asm__ __volatile__("push %%rbp;" - "push %%rcx;" - "push %%rdx;" - "push %%rsi;" - "push %%rdi;" - "push $0;" - "mov %%rsp, (%[host_rsp]);" - "lea 1f(%%rip), %%rax;" - "mov %%rax, (%[host_rip]);" - "vmlaunch;" - "incq (%%rsp);" - "1: pop %%rax;" - "pop %%rdi;" - "pop %%rsi;" - "pop %%rdx;" - "pop %%rcx;" - "pop %%rbp;" - : [ret]"=&a"(ret) - : [host_rsp]"r" - ((uint64_t)¤t_evmcs->host_rsp), - [host_rip]"r" - ((uint64_t)¤t_evmcs->host_rip) - : "memory", "cc", "rbx", "r8", "r9", "r10", - "r11", "r12", "r13", "r14", "r15"); - return ret; -} - -/* - * No guest state (e.g. GPRs) is established by this vmresume. - */ -static inline int evmcs_vmresume(void) -{ - int ret; - - /* HOST_RIP */ - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1; - /* HOST_RSP */ - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER; - - __asm__ __volatile__("push %%rbp;" - "push %%rcx;" - "push %%rdx;" - "push %%rsi;" - "push %%rdi;" - "push $0;" - "mov %%rsp, (%[host_rsp]);" - "lea 1f(%%rip), %%rax;" - "mov %%rax, (%[host_rip]);" - "vmresume;" - "incq (%%rsp);" - "1: pop %%rax;" - "pop %%rdi;" - "pop %%rsi;" - "pop %%rdx;" - "pop %%rcx;" - "pop %%rbp;" - : [ret]"=&a"(ret) - : [host_rsp]"r" - ((uint64_t)¤t_evmcs->host_rsp), - [host_rip]"r" - ((uint64_t)¤t_evmcs->host_rip) - : "memory", "cc", "rbx", "r8", "r9", "r10", - "r11", "r12", "r13", "r14", "r15"); - return ret; -} - -#endif /* !SELFTEST_KVM_EVMCS_H */ diff --git a/tools/testing/selftests/kvm/include/x86_64/hyperv.h b/tools/testing/selftests/kvm/include/x86_64/hyperv.h deleted file mode 100644 index 6849e2552f1b..000000000000 --- a/tools/testing/selftests/kvm/include/x86_64/hyperv.h +++ /dev/null @@ -1,364 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * tools/testing/selftests/kvm/include/x86_64/hyperv.h - * - * Copyright (C) 2021, Red Hat, Inc. - * - */ - -#ifndef SELFTEST_KVM_HYPERV_H -#define SELFTEST_KVM_HYPERV_H - -#include "processor.h" - -#define HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS 0x40000000 -#define HYPERV_CPUID_INTERFACE 0x40000001 -#define HYPERV_CPUID_VERSION 0x40000002 -#define HYPERV_CPUID_FEATURES 0x40000003 -#define HYPERV_CPUID_ENLIGHTMENT_INFO 0x40000004 -#define HYPERV_CPUID_IMPLEMENT_LIMITS 0x40000005 -#define HYPERV_CPUID_CPU_MANAGEMENT_FEATURES 0x40000007 -#define HYPERV_CPUID_NESTED_FEATURES 0x4000000A -#define HYPERV_CPUID_SYNDBG_VENDOR_AND_MAX_FUNCTIONS 0x40000080 -#define HYPERV_CPUID_SYNDBG_INTERFACE 0x40000081 -#define HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES 0x40000082 - -#define HV_X64_MSR_GUEST_OS_ID 0x40000000 -#define HV_X64_MSR_HYPERCALL 0x40000001 -#define HV_X64_MSR_VP_INDEX 0x40000002 -#define HV_X64_MSR_RESET 0x40000003 -#define HV_X64_MSR_VP_RUNTIME 0x40000010 -#define HV_X64_MSR_TIME_REF_COUNT 0x40000020 -#define HV_X64_MSR_REFERENCE_TSC 0x40000021 -#define HV_X64_MSR_TSC_FREQUENCY 0x40000022 -#define HV_X64_MSR_APIC_FREQUENCY 0x40000023 -#define HV_X64_MSR_EOI 0x40000070 -#define HV_X64_MSR_ICR 0x40000071 -#define HV_X64_MSR_TPR 0x40000072 -#define HV_X64_MSR_VP_ASSIST_PAGE 0x40000073 -#define HV_X64_MSR_SCONTROL 0x40000080 -#define HV_X64_MSR_SVERSION 0x40000081 -#define HV_X64_MSR_SIEFP 0x40000082 -#define HV_X64_MSR_SIMP 0x40000083 -#define HV_X64_MSR_EOM 0x40000084 -#define HV_X64_MSR_SINT0 0x40000090 -#define HV_X64_MSR_SINT1 0x40000091 -#define HV_X64_MSR_SINT2 0x40000092 -#define HV_X64_MSR_SINT3 0x40000093 -#define HV_X64_MSR_SINT4 0x40000094 -#define HV_X64_MSR_SINT5 0x40000095 -#define HV_X64_MSR_SINT6 0x40000096 -#define HV_X64_MSR_SINT7 0x40000097 -#define HV_X64_MSR_SINT8 0x40000098 -#define HV_X64_MSR_SINT9 0x40000099 -#define HV_X64_MSR_SINT10 0x4000009A -#define HV_X64_MSR_SINT11 0x4000009B -#define HV_X64_MSR_SINT12 0x4000009C -#define HV_X64_MSR_SINT13 0x4000009D -#define HV_X64_MSR_SINT14 0x4000009E -#define HV_X64_MSR_SINT15 0x4000009F -#define HV_X64_MSR_STIMER0_CONFIG 0x400000B0 -#define HV_X64_MSR_STIMER0_COUNT 0x400000B1 -#define HV_X64_MSR_STIMER1_CONFIG 0x400000B2 -#define HV_X64_MSR_STIMER1_COUNT 0x400000B3 -#define HV_X64_MSR_STIMER2_CONFIG 0x400000B4 -#define HV_X64_MSR_STIMER2_COUNT 0x400000B5 -#define HV_X64_MSR_STIMER3_CONFIG 0x400000B6 -#define HV_X64_MSR_STIMER3_COUNT 0x400000B7 -#define HV_X64_MSR_GUEST_IDLE 0x400000F0 -#define HV_X64_MSR_CRASH_P0 0x40000100 -#define HV_X64_MSR_CRASH_P1 0x40000101 -#define HV_X64_MSR_CRASH_P2 0x40000102 -#define HV_X64_MSR_CRASH_P3 0x40000103 -#define HV_X64_MSR_CRASH_P4 0x40000104 -#define HV_X64_MSR_CRASH_CTL 0x40000105 -#define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106 -#define HV_X64_MSR_TSC_EMULATION_CONTROL 0x40000107 -#define HV_X64_MSR_TSC_EMULATION_STATUS 0x40000108 -#define HV_X64_MSR_TSC_INVARIANT_CONTROL 0x40000118 - -#define HV_X64_MSR_SYNDBG_CONTROL 0x400000F1 -#define HV_X64_MSR_SYNDBG_STATUS 0x400000F2 -#define HV_X64_MSR_SYNDBG_SEND_BUFFER 0x400000F3 -#define HV_X64_MSR_SYNDBG_RECV_BUFFER 0x400000F4 -#define HV_X64_MSR_SYNDBG_PENDING_BUFFER 0x400000F5 -#define HV_X64_MSR_SYNDBG_OPTIONS 0x400000FF - -/* HYPERV_CPUID_FEATURES.EAX */ -#define HV_MSR_VP_RUNTIME_AVAILABLE \ - KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 0) -#define HV_MSR_TIME_REF_COUNT_AVAILABLE \ - KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 1) -#define HV_MSR_SYNIC_AVAILABLE \ - KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 2) -#define HV_MSR_SYNTIMER_AVAILABLE \ - KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 3) -#define HV_MSR_APIC_ACCESS_AVAILABLE \ - KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 4) -#define HV_MSR_HYPERCALL_AVAILABLE \ - KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 5) -#define HV_MSR_VP_INDEX_AVAILABLE \ - KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 6) -#define HV_MSR_RESET_AVAILABLE \ - KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 7) -#define HV_MSR_STAT_PAGES_AVAILABLE \ - KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 8) -#define HV_MSR_REFERENCE_TSC_AVAILABLE \ - KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 9) -#define HV_MSR_GUEST_IDLE_AVAILABLE \ - KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 10) -#define HV_ACCESS_FREQUENCY_MSRS \ - KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 11) -#define HV_ACCESS_REENLIGHTENMENT \ - KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 13) -#define HV_ACCESS_TSC_INVARIANT \ - KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 15) - -/* HYPERV_CPUID_FEATURES.EBX */ -#define HV_CREATE_PARTITIONS \ - KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 0) -#define HV_ACCESS_PARTITION_ID \ - KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 1) -#define HV_ACCESS_MEMORY_POOL \ - KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 2) -#define HV_ADJUST_MESSAGE_BUFFERS \ - KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 3) -#define HV_POST_MESSAGES \ - KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 4) -#define HV_SIGNAL_EVENTS \ - KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 5) -#define HV_CREATE_PORT \ - KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 6) -#define HV_CONNECT_PORT \ - KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 7) -#define HV_ACCESS_STATS \ - KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 8) -#define HV_DEBUGGING \ - KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 11) -#define HV_CPU_MANAGEMENT \ - KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 12) -#define HV_ENABLE_EXTENDED_HYPERCALLS \ - KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 20) -#define HV_ISOLATION \ - KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 22) - -/* HYPERV_CPUID_FEATURES.EDX */ -#define HV_X64_MWAIT_AVAILABLE \ - KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 0) -#define HV_X64_GUEST_DEBUGGING_AVAILABLE \ - KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 1) -#define HV_X64_PERF_MONITOR_AVAILABLE \ - KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 2) -#define HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLE \ - KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 3) -#define HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE \ - KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 4) -#define HV_X64_GUEST_IDLE_STATE_AVAILABLE \ - KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 5) -#define HV_FEATURE_FREQUENCY_MSRS_AVAILABLE \ - KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 8) -#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE \ - KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 10) -#define HV_FEATURE_DEBUG_MSRS_AVAILABLE \ - KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 11) -#define HV_STIMER_DIRECT_MODE_AVAILABLE \ - KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 19) - -/* HYPERV_CPUID_ENLIGHTMENT_INFO.EAX */ -#define HV_X64_AS_SWITCH_RECOMMENDED \ - KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 0) -#define HV_X64_LOCAL_TLB_FLUSH_RECOMMENDED \ - KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 1) -#define HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED \ - KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 2) -#define HV_X64_APIC_ACCESS_RECOMMENDED \ - KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 3) -#define HV_X64_SYSTEM_RESET_RECOMMENDED \ - KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 4) -#define HV_X64_RELAXED_TIMING_RECOMMENDED \ - KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 5) -#define HV_DEPRECATING_AEOI_RECOMMENDED \ - KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 9) -#define HV_X64_CLUSTER_IPI_RECOMMENDED \ - KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 10) -#define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED \ - KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 11) -#define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED \ - KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 14) - -/* HYPERV_CPUID_NESTED_FEATURES.EAX */ -#define HV_X64_NESTED_DIRECT_FLUSH \ - KVM_X86_CPU_FEATURE(HYPERV_CPUID_NESTED_FEATURES, 0, EAX, 17) -#define HV_X64_NESTED_GUEST_MAPPING_FLUSH \ - KVM_X86_CPU_FEATURE(HYPERV_CPUID_NESTED_FEATURES, 0, EAX, 18) -#define HV_X64_NESTED_MSR_BITMAP \ - KVM_X86_CPU_FEATURE(HYPERV_CPUID_NESTED_FEATURES, 0, EAX, 19) - -/* HYPERV_CPUID_NESTED_FEATURES.EBX */ -#define HV_X64_NESTED_EVMCS1_PERF_GLOBAL_CTRL \ - KVM_X86_CPU_FEATURE(HYPERV_CPUID_NESTED_FEATURES, 0, EBX, 0) - -/* HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES.EAX */ -#define HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING \ - KVM_X86_CPU_FEATURE(HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES, 0, EAX, 1) - -/* Hypercalls */ -#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE 0x0002 -#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST 0x0003 -#define HVCALL_NOTIFY_LONG_SPIN_WAIT 0x0008 -#define HVCALL_SEND_IPI 0x000b -#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX 0x0013 -#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX 0x0014 -#define HVCALL_SEND_IPI_EX 0x0015 -#define HVCALL_GET_PARTITION_ID 0x0046 -#define HVCALL_DEPOSIT_MEMORY 0x0048 -#define HVCALL_CREATE_VP 0x004e -#define HVCALL_GET_VP_REGISTERS 0x0050 -#define HVCALL_SET_VP_REGISTERS 0x0051 -#define HVCALL_POST_MESSAGE 0x005c -#define HVCALL_SIGNAL_EVENT 0x005d -#define HVCALL_POST_DEBUG_DATA 0x0069 -#define HVCALL_RETRIEVE_DEBUG_DATA 0x006a -#define HVCALL_RESET_DEBUG_SESSION 0x006b -#define HVCALL_ADD_LOGICAL_PROCESSOR 0x0076 -#define HVCALL_MAP_DEVICE_INTERRUPT 0x007c -#define HVCALL_UNMAP_DEVICE_INTERRUPT 0x007d -#define HVCALL_RETARGET_INTERRUPT 0x007e -#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af -#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0 - -/* Extended hypercalls */ -#define HV_EXT_CALL_QUERY_CAPABILITIES 0x8001 - -#define HV_FLUSH_ALL_PROCESSORS BIT(0) -#define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES BIT(1) -#define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY BIT(2) -#define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT BIT(3) - -/* hypercall status code */ -#define HV_STATUS_SUCCESS 0 -#define HV_STATUS_INVALID_HYPERCALL_CODE 2 -#define HV_STATUS_INVALID_HYPERCALL_INPUT 3 -#define HV_STATUS_INVALID_ALIGNMENT 4 -#define HV_STATUS_INVALID_PARAMETER 5 -#define HV_STATUS_ACCESS_DENIED 6 -#define HV_STATUS_OPERATION_DENIED 8 -#define HV_STATUS_INSUFFICIENT_MEMORY 11 -#define HV_STATUS_INVALID_PORT_ID 17 -#define HV_STATUS_INVALID_CONNECTION_ID 18 -#define HV_STATUS_INSUFFICIENT_BUFFERS 19 - -/* hypercall options */ -#define HV_HYPERCALL_FAST_BIT BIT(16) -#define HV_HYPERCALL_VARHEAD_OFFSET 17 -#define HV_HYPERCALL_REP_COMP_OFFSET 32 - -/* - * Issue a Hyper-V hypercall. Returns exception vector raised or 0, 'hv_status' - * is set to the hypercall status (if no exception occurred). - */ -static inline uint8_t __hyperv_hypercall(u64 control, vm_vaddr_t input_address, - vm_vaddr_t output_address, - uint64_t *hv_status) -{ - uint64_t error_code; - uint8_t vector; - - /* Note both the hypercall and the "asm safe" clobber r9-r11. */ - asm volatile("mov %[output_address], %%r8\n\t" - KVM_ASM_SAFE("vmcall") - : "=a" (*hv_status), - "+c" (control), "+d" (input_address), - KVM_ASM_SAFE_OUTPUTS(vector, error_code) - : [output_address] "r"(output_address), - "a" (-EFAULT) - : "cc", "memory", "r8", KVM_ASM_SAFE_CLOBBERS); - return vector; -} - -/* Issue a Hyper-V hypercall and assert that it succeeded. */ -static inline void hyperv_hypercall(u64 control, vm_vaddr_t input_address, - vm_vaddr_t output_address) -{ - uint64_t hv_status; - uint8_t vector; - - vector = __hyperv_hypercall(control, input_address, output_address, &hv_status); - - GUEST_ASSERT(!vector); - GUEST_ASSERT((hv_status & 0xffff) == 0); -} - -/* Write 'Fast' hypercall input 'data' to the first 'n_sse_regs' SSE regs */ -static inline void hyperv_write_xmm_input(void *data, int n_sse_regs) -{ - int i; - - for (i = 0; i < n_sse_regs; i++) - write_sse_reg(i, (sse128_t *)(data + sizeof(sse128_t) * i)); -} - -/* Proper HV_X64_MSR_GUEST_OS_ID value */ -#define HYPERV_LINUX_OS_ID ((u64)0x8100 << 48) - -#define HV_X64_MSR_VP_ASSIST_PAGE 0x40000073 -#define HV_X64_MSR_VP_ASSIST_PAGE_ENABLE 0x00000001 -#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT 12 -#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK \ - (~((1ull << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) - -struct hv_nested_enlightenments_control { - struct { - __u32 directhypercall:1; - __u32 reserved:31; - } features; - struct { - __u32 reserved; - } hypercallControls; -} __packed; - -/* Define virtual processor assist page structure. */ -struct hv_vp_assist_page { - __u32 apic_assist; - __u32 reserved1; - __u64 vtl_control[3]; - struct hv_nested_enlightenments_control nested_control; - __u8 enlighten_vmentry; - __u8 reserved2[7]; - __u64 current_nested_vmcs; -} __packed; - -extern struct hv_vp_assist_page *current_vp_assist; - -int enable_vp_assist(uint64_t vp_assist_pa, void *vp_assist); - -struct hyperv_test_pages { - /* VP assist page */ - void *vp_assist_hva; - uint64_t vp_assist_gpa; - void *vp_assist; - - /* Partition assist page */ - void *partition_assist_hva; - uint64_t partition_assist_gpa; - void *partition_assist; - - /* Enlightened VMCS */ - void *enlightened_vmcs_hva; - uint64_t enlightened_vmcs_gpa; - void *enlightened_vmcs; -}; - -struct hyperv_test_pages *vcpu_alloc_hyperv_test_pages(struct kvm_vm *vm, - vm_vaddr_t *p_hv_pages_gva); - -/* HV_X64_MSR_TSC_INVARIANT_CONTROL bits */ -#define HV_INVARIANT_TSC_EXPOSED BIT_ULL(0) - -const struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void); -const struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vcpu *vcpu); -void vcpu_set_hv_cpuid(struct kvm_vcpu *vcpu); - -bool kvm_hv_cpu_has(struct kvm_x86_cpu_feature feature); - -#endif /* !SELFTEST_KVM_HYPERV_H */ diff --git a/tools/testing/selftests/kvm/include/x86_64/kvm_util_arch.h b/tools/testing/selftests/kvm/include/x86_64/kvm_util_arch.h deleted file mode 100644 index 972bb1c4ab4c..000000000000 --- a/tools/testing/selftests/kvm/include/x86_64/kvm_util_arch.h +++ /dev/null @@ -1,51 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -#ifndef SELFTEST_KVM_UTIL_ARCH_H -#define SELFTEST_KVM_UTIL_ARCH_H - -#include -#include - -#include "kvm_util_types.h" -#include "test_util.h" - -extern bool is_forced_emulation_enabled; - -struct kvm_vm_arch { - vm_vaddr_t gdt; - vm_vaddr_t tss; - vm_vaddr_t idt; - - uint64_t c_bit; - uint64_t s_bit; - int sev_fd; - bool is_pt_protected; -}; - -static inline bool __vm_arch_has_protected_memory(struct kvm_vm_arch *arch) -{ - return arch->c_bit || arch->s_bit; -} - -#define vm_arch_has_protected_memory(vm) \ - __vm_arch_has_protected_memory(&(vm)->arch) - -#define vcpu_arch_put_guest(mem, __val) \ -do { \ - const typeof(mem) val = (__val); \ - \ - if (!is_forced_emulation_enabled || guest_random_bool(&guest_rng)) { \ - (mem) = val; \ - } else if (guest_random_bool(&guest_rng)) { \ - __asm__ __volatile__(KVM_FEP "mov %1, %0" \ - : "+m" (mem) \ - : "r" (val) : "memory"); \ - } else { \ - uint64_t __old = READ_ONCE(mem); \ - \ - __asm__ __volatile__(KVM_FEP LOCK_PREFIX "cmpxchg %[new], %[ptr]" \ - : [ptr] "+m" (mem), [old] "+a" (__old) \ - : [new]"r" (val) : "memory", "cc"); \ - } \ -} while (0) - -#endif // SELFTEST_KVM_UTIL_ARCH_H diff --git a/tools/testing/selftests/kvm/include/x86_64/mce.h b/tools/testing/selftests/kvm/include/x86_64/mce.h deleted file mode 100644 index 6119321f3f5d..000000000000 --- a/tools/testing/selftests/kvm/include/x86_64/mce.h +++ /dev/null @@ -1,25 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * tools/testing/selftests/kvm/include/x86_64/mce.h - * - * Copyright (C) 2022, Google LLC. - */ - -#ifndef SELFTEST_KVM_MCE_H -#define SELFTEST_KVM_MCE_H - -#define MCG_CTL_P BIT_ULL(8) /* MCG_CTL register available */ -#define MCG_SER_P BIT_ULL(24) /* MCA recovery/new status bits */ -#define MCG_LMCE_P BIT_ULL(27) /* Local machine check supported */ -#define MCG_CMCI_P BIT_ULL(10) /* CMCI supported */ -#define KVM_MAX_MCE_BANKS 32 -#define MCG_CAP_BANKS_MASK 0xff /* Bit 0-7 of the MCG_CAP register are #banks */ -#define MCI_STATUS_VAL (1ULL << 63) /* valid error */ -#define MCI_STATUS_UC (1ULL << 61) /* uncorrected error */ -#define MCI_STATUS_EN (1ULL << 60) /* error enabled */ -#define MCI_STATUS_MISCV (1ULL << 59) /* misc error reg. valid */ -#define MCI_STATUS_ADDRV (1ULL << 58) /* addr reg. valid */ -#define MCM_ADDR_PHYS 2 /* physical address */ -#define MCI_CTL2_CMCI_EN BIT_ULL(30) - -#endif /* SELFTEST_KVM_MCE_H */ diff --git a/tools/testing/selftests/kvm/include/x86_64/pmu.h b/tools/testing/selftests/kvm/include/x86_64/pmu.h deleted file mode 100644 index 3c10c4dc0ae8..000000000000 --- a/tools/testing/selftests/kvm/include/x86_64/pmu.h +++ /dev/null @@ -1,97 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2023, Tencent, Inc. - */ -#ifndef SELFTEST_KVM_PMU_H -#define SELFTEST_KVM_PMU_H - -#include - -#define KVM_PMU_EVENT_FILTER_MAX_EVENTS 300 - -/* - * Encode an eventsel+umask pair into event-select MSR format. Note, this is - * technically AMD's format, as Intel's format only supports 8 bits for the - * event selector, i.e. doesn't use bits 24:16 for the selector. But, OR-ing - * in '0' is a nop and won't clobber the CMASK. - */ -#define RAW_EVENT(eventsel, umask) (((eventsel & 0xf00UL) << 24) | \ - ((eventsel) & 0xff) | \ - ((umask) & 0xff) << 8) - -/* - * These are technically Intel's definitions, but except for CMASK (see above), - * AMD's layout is compatible with Intel's. - */ -#define ARCH_PERFMON_EVENTSEL_EVENT GENMASK_ULL(7, 0) -#define ARCH_PERFMON_EVENTSEL_UMASK GENMASK_ULL(15, 8) -#define ARCH_PERFMON_EVENTSEL_USR BIT_ULL(16) -#define ARCH_PERFMON_EVENTSEL_OS BIT_ULL(17) -#define ARCH_PERFMON_EVENTSEL_EDGE BIT_ULL(18) -#define ARCH_PERFMON_EVENTSEL_PIN_CONTROL BIT_ULL(19) -#define ARCH_PERFMON_EVENTSEL_INT BIT_ULL(20) -#define ARCH_PERFMON_EVENTSEL_ANY BIT_ULL(21) -#define ARCH_PERFMON_EVENTSEL_ENABLE BIT_ULL(22) -#define ARCH_PERFMON_EVENTSEL_INV BIT_ULL(23) -#define ARCH_PERFMON_EVENTSEL_CMASK GENMASK_ULL(31, 24) - -/* RDPMC control flags, Intel only. */ -#define INTEL_RDPMC_METRICS BIT_ULL(29) -#define INTEL_RDPMC_FIXED BIT_ULL(30) -#define INTEL_RDPMC_FAST BIT_ULL(31) - -/* Fixed PMC controls, Intel only. */ -#define FIXED_PMC_GLOBAL_CTRL_ENABLE(_idx) BIT_ULL((32 + (_idx))) - -#define FIXED_PMC_KERNEL BIT_ULL(0) -#define FIXED_PMC_USER BIT_ULL(1) -#define FIXED_PMC_ANYTHREAD BIT_ULL(2) -#define FIXED_PMC_ENABLE_PMI BIT_ULL(3) -#define FIXED_PMC_NR_BITS 4 -#define FIXED_PMC_CTRL(_idx, _val) ((_val) << ((_idx) * FIXED_PMC_NR_BITS)) - -#define PMU_CAP_FW_WRITES BIT_ULL(13) -#define PMU_CAP_LBR_FMT 0x3f - -#define INTEL_ARCH_CPU_CYCLES RAW_EVENT(0x3c, 0x00) -#define INTEL_ARCH_INSTRUCTIONS_RETIRED RAW_EVENT(0xc0, 0x00) -#define INTEL_ARCH_REFERENCE_CYCLES RAW_EVENT(0x3c, 0x01) -#define INTEL_ARCH_LLC_REFERENCES RAW_EVENT(0x2e, 0x4f) -#define INTEL_ARCH_LLC_MISSES RAW_EVENT(0x2e, 0x41) -#define INTEL_ARCH_BRANCHES_RETIRED RAW_EVENT(0xc4, 0x00) -#define INTEL_ARCH_BRANCHES_MISPREDICTED RAW_EVENT(0xc5, 0x00) -#define INTEL_ARCH_TOPDOWN_SLOTS RAW_EVENT(0xa4, 0x01) - -#define AMD_ZEN_CORE_CYCLES RAW_EVENT(0x76, 0x00) -#define AMD_ZEN_INSTRUCTIONS_RETIRED RAW_EVENT(0xc0, 0x00) -#define AMD_ZEN_BRANCHES_RETIRED RAW_EVENT(0xc2, 0x00) -#define AMD_ZEN_BRANCHES_MISPREDICTED RAW_EVENT(0xc3, 0x00) - -/* - * Note! The order and thus the index of the architectural events matters as - * support for each event is enumerated via CPUID using the index of the event. - */ -enum intel_pmu_architectural_events { - INTEL_ARCH_CPU_CYCLES_INDEX, - INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX, - INTEL_ARCH_REFERENCE_CYCLES_INDEX, - INTEL_ARCH_LLC_REFERENCES_INDEX, - INTEL_ARCH_LLC_MISSES_INDEX, - INTEL_ARCH_BRANCHES_RETIRED_INDEX, - INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX, - INTEL_ARCH_TOPDOWN_SLOTS_INDEX, - NR_INTEL_ARCH_EVENTS, -}; - -enum amd_pmu_zen_events { - AMD_ZEN_CORE_CYCLES_INDEX, - AMD_ZEN_INSTRUCTIONS_INDEX, - AMD_ZEN_BRANCHES_INDEX, - AMD_ZEN_BRANCH_MISSES_INDEX, - NR_AMD_ZEN_EVENTS, -}; - -extern const uint64_t intel_pmu_arch_events[]; -extern const uint64_t amd_pmu_zen_events[]; - -#endif /* SELFTEST_KVM_PMU_H */ diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h deleted file mode 100644 index 645200e95f89..000000000000 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ /dev/null @@ -1,1397 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * tools/testing/selftests/kvm/include/x86_64/processor.h - * - * Copyright (C) 2018, Google LLC. - */ - -#ifndef SELFTEST_KVM_PROCESSOR_H -#define SELFTEST_KVM_PROCESSOR_H - -#include -#include -#include - -#include -#include - -#include -#include - -#include "kvm_util.h" -#include "ucall_common.h" - -extern bool host_cpu_is_intel; -extern bool host_cpu_is_amd; -extern uint64_t guest_tsc_khz; - -#ifndef MAX_NR_CPUID_ENTRIES -#define MAX_NR_CPUID_ENTRIES 100 -#endif - -/* Forced emulation prefix, used to invoke the emulator unconditionally. */ -#define KVM_FEP "ud2; .byte 'k', 'v', 'm';" - -#define NMI_VECTOR 0x02 - -#define X86_EFLAGS_FIXED (1u << 1) - -#define X86_CR4_VME (1ul << 0) -#define X86_CR4_PVI (1ul << 1) -#define X86_CR4_TSD (1ul << 2) -#define X86_CR4_DE (1ul << 3) -#define X86_CR4_PSE (1ul << 4) -#define X86_CR4_PAE (1ul << 5) -#define X86_CR4_MCE (1ul << 6) -#define X86_CR4_PGE (1ul << 7) -#define X86_CR4_PCE (1ul << 8) -#define X86_CR4_OSFXSR (1ul << 9) -#define X86_CR4_OSXMMEXCPT (1ul << 10) -#define X86_CR4_UMIP (1ul << 11) -#define X86_CR4_LA57 (1ul << 12) -#define X86_CR4_VMXE (1ul << 13) -#define X86_CR4_SMXE (1ul << 14) -#define X86_CR4_FSGSBASE (1ul << 16) -#define X86_CR4_PCIDE (1ul << 17) -#define X86_CR4_OSXSAVE (1ul << 18) -#define X86_CR4_SMEP (1ul << 20) -#define X86_CR4_SMAP (1ul << 21) -#define X86_CR4_PKE (1ul << 22) - -struct xstate_header { - u64 xstate_bv; - u64 xcomp_bv; - u64 reserved[6]; -} __attribute__((packed)); - -struct xstate { - u8 i387[512]; - struct xstate_header header; - u8 extended_state_area[0]; -} __attribute__ ((packed, aligned (64))); - -#define XFEATURE_MASK_FP BIT_ULL(0) -#define XFEATURE_MASK_SSE BIT_ULL(1) -#define XFEATURE_MASK_YMM BIT_ULL(2) -#define XFEATURE_MASK_BNDREGS BIT_ULL(3) -#define XFEATURE_MASK_BNDCSR BIT_ULL(4) -#define XFEATURE_MASK_OPMASK BIT_ULL(5) -#define XFEATURE_MASK_ZMM_Hi256 BIT_ULL(6) -#define XFEATURE_MASK_Hi16_ZMM BIT_ULL(7) -#define XFEATURE_MASK_PT BIT_ULL(8) -#define XFEATURE_MASK_PKRU BIT_ULL(9) -#define XFEATURE_MASK_PASID BIT_ULL(10) -#define XFEATURE_MASK_CET_USER BIT_ULL(11) -#define XFEATURE_MASK_CET_KERNEL BIT_ULL(12) -#define XFEATURE_MASK_LBR BIT_ULL(15) -#define XFEATURE_MASK_XTILE_CFG BIT_ULL(17) -#define XFEATURE_MASK_XTILE_DATA BIT_ULL(18) - -#define XFEATURE_MASK_AVX512 (XFEATURE_MASK_OPMASK | \ - XFEATURE_MASK_ZMM_Hi256 | \ - XFEATURE_MASK_Hi16_ZMM) -#define XFEATURE_MASK_XTILE (XFEATURE_MASK_XTILE_DATA | \ - XFEATURE_MASK_XTILE_CFG) - -/* Note, these are ordered alphabetically to match kvm_cpuid_entry2. Eww. */ -enum cpuid_output_regs { - KVM_CPUID_EAX, - KVM_CPUID_EBX, - KVM_CPUID_ECX, - KVM_CPUID_EDX -}; - -/* - * Pack the information into a 64-bit value so that each X86_FEATURE_XXX can be - * passed by value with no overhead. - */ -struct kvm_x86_cpu_feature { - u32 function; - u16 index; - u8 reg; - u8 bit; -}; -#define KVM_X86_CPU_FEATURE(fn, idx, gpr, __bit) \ -({ \ - struct kvm_x86_cpu_feature feature = { \ - .function = fn, \ - .index = idx, \ - .reg = KVM_CPUID_##gpr, \ - .bit = __bit, \ - }; \ - \ - kvm_static_assert((fn & 0xc0000000) == 0 || \ - (fn & 0xc0000000) == 0x40000000 || \ - (fn & 0xc0000000) == 0x80000000 || \ - (fn & 0xc0000000) == 0xc0000000); \ - kvm_static_assert(idx < BIT(sizeof(feature.index) * BITS_PER_BYTE)); \ - feature; \ -}) - -/* - * Basic Leafs, a.k.a. Intel defined - */ -#define X86_FEATURE_MWAIT KVM_X86_CPU_FEATURE(0x1, 0, ECX, 3) -#define X86_FEATURE_VMX KVM_X86_CPU_FEATURE(0x1, 0, ECX, 5) -#define X86_FEATURE_SMX KVM_X86_CPU_FEATURE(0x1, 0, ECX, 6) -#define X86_FEATURE_PDCM KVM_X86_CPU_FEATURE(0x1, 0, ECX, 15) -#define X86_FEATURE_PCID KVM_X86_CPU_FEATURE(0x1, 0, ECX, 17) -#define X86_FEATURE_X2APIC KVM_X86_CPU_FEATURE(0x1, 0, ECX, 21) -#define X86_FEATURE_MOVBE KVM_X86_CPU_FEATURE(0x1, 0, ECX, 22) -#define X86_FEATURE_TSC_DEADLINE_TIMER KVM_X86_CPU_FEATURE(0x1, 0, ECX, 24) -#define X86_FEATURE_XSAVE KVM_X86_CPU_FEATURE(0x1, 0, ECX, 26) -#define X86_FEATURE_OSXSAVE KVM_X86_CPU_FEATURE(0x1, 0, ECX, 27) -#define X86_FEATURE_RDRAND KVM_X86_CPU_FEATURE(0x1, 0, ECX, 30) -#define X86_FEATURE_HYPERVISOR KVM_X86_CPU_FEATURE(0x1, 0, ECX, 31) -#define X86_FEATURE_PAE KVM_X86_CPU_FEATURE(0x1, 0, EDX, 6) -#define X86_FEATURE_MCE KVM_X86_CPU_FEATURE(0x1, 0, EDX, 7) -#define X86_FEATURE_APIC KVM_X86_CPU_FEATURE(0x1, 0, EDX, 9) -#define X86_FEATURE_CLFLUSH KVM_X86_CPU_FEATURE(0x1, 0, EDX, 19) -#define X86_FEATURE_XMM KVM_X86_CPU_FEATURE(0x1, 0, EDX, 25) -#define X86_FEATURE_XMM2 KVM_X86_CPU_FEATURE(0x1, 0, EDX, 26) -#define X86_FEATURE_FSGSBASE KVM_X86_CPU_FEATURE(0x7, 0, EBX, 0) -#define X86_FEATURE_TSC_ADJUST KVM_X86_CPU_FEATURE(0x7, 0, EBX, 1) -#define X86_FEATURE_SGX KVM_X86_CPU_FEATURE(0x7, 0, EBX, 2) -#define X86_FEATURE_HLE KVM_X86_CPU_FEATURE(0x7, 0, EBX, 4) -#define X86_FEATURE_SMEP KVM_X86_CPU_FEATURE(0x7, 0, EBX, 7) -#define X86_FEATURE_INVPCID KVM_X86_CPU_FEATURE(0x7, 0, EBX, 10) -#define X86_FEATURE_RTM KVM_X86_CPU_FEATURE(0x7, 0, EBX, 11) -#define X86_FEATURE_MPX KVM_X86_CPU_FEATURE(0x7, 0, EBX, 14) -#define X86_FEATURE_SMAP KVM_X86_CPU_FEATURE(0x7, 0, EBX, 20) -#define X86_FEATURE_PCOMMIT KVM_X86_CPU_FEATURE(0x7, 0, EBX, 22) -#define X86_FEATURE_CLFLUSHOPT KVM_X86_CPU_FEATURE(0x7, 0, EBX, 23) -#define X86_FEATURE_CLWB KVM_X86_CPU_FEATURE(0x7, 0, EBX, 24) -#define X86_FEATURE_UMIP KVM_X86_CPU_FEATURE(0x7, 0, ECX, 2) -#define X86_FEATURE_PKU KVM_X86_CPU_FEATURE(0x7, 0, ECX, 3) -#define X86_FEATURE_OSPKE KVM_X86_CPU_FEATURE(0x7, 0, ECX, 4) -#define X86_FEATURE_LA57 KVM_X86_CPU_FEATURE(0x7, 0, ECX, 16) -#define X86_FEATURE_RDPID KVM_X86_CPU_FEATURE(0x7, 0, ECX, 22) -#define X86_FEATURE_SGX_LC KVM_X86_CPU_FEATURE(0x7, 0, ECX, 30) -#define X86_FEATURE_SHSTK KVM_X86_CPU_FEATURE(0x7, 0, ECX, 7) -#define X86_FEATURE_IBT KVM_X86_CPU_FEATURE(0x7, 0, EDX, 20) -#define X86_FEATURE_AMX_TILE KVM_X86_CPU_FEATURE(0x7, 0, EDX, 24) -#define X86_FEATURE_SPEC_CTRL KVM_X86_CPU_FEATURE(0x7, 0, EDX, 26) -#define X86_FEATURE_ARCH_CAPABILITIES KVM_X86_CPU_FEATURE(0x7, 0, EDX, 29) -#define X86_FEATURE_PKS KVM_X86_CPU_FEATURE(0x7, 0, ECX, 31) -#define X86_FEATURE_XTILECFG KVM_X86_CPU_FEATURE(0xD, 0, EAX, 17) -#define X86_FEATURE_XTILEDATA KVM_X86_CPU_FEATURE(0xD, 0, EAX, 18) -#define X86_FEATURE_XSAVES KVM_X86_CPU_FEATURE(0xD, 1, EAX, 3) -#define X86_FEATURE_XFD KVM_X86_CPU_FEATURE(0xD, 1, EAX, 4) -#define X86_FEATURE_XTILEDATA_XFD KVM_X86_CPU_FEATURE(0xD, 18, ECX, 2) - -/* - * Extended Leafs, a.k.a. AMD defined - */ -#define X86_FEATURE_SVM KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 2) -#define X86_FEATURE_NX KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 20) -#define X86_FEATURE_GBPAGES KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 26) -#define X86_FEATURE_RDTSCP KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 27) -#define X86_FEATURE_LM KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 29) -#define X86_FEATURE_INVTSC KVM_X86_CPU_FEATURE(0x80000007, 0, EDX, 8) -#define X86_FEATURE_RDPRU KVM_X86_CPU_FEATURE(0x80000008, 0, EBX, 4) -#define X86_FEATURE_AMD_IBPB KVM_X86_CPU_FEATURE(0x80000008, 0, EBX, 12) -#define X86_FEATURE_NPT KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 0) -#define X86_FEATURE_LBRV KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 1) -#define X86_FEATURE_NRIPS KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 3) -#define X86_FEATURE_TSCRATEMSR KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 4) -#define X86_FEATURE_PAUSEFILTER KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 10) -#define X86_FEATURE_PFTHRESHOLD KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 12) -#define X86_FEATURE_VGIF KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 16) -#define X86_FEATURE_SEV KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 1) -#define X86_FEATURE_SEV_ES KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 3) - -/* - * KVM defined paravirt features. - */ -#define X86_FEATURE_KVM_CLOCKSOURCE KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 0) -#define X86_FEATURE_KVM_NOP_IO_DELAY KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 1) -#define X86_FEATURE_KVM_MMU_OP KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 2) -#define X86_FEATURE_KVM_CLOCKSOURCE2 KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 3) -#define X86_FEATURE_KVM_ASYNC_PF KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 4) -#define X86_FEATURE_KVM_STEAL_TIME KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 5) -#define X86_FEATURE_KVM_PV_EOI KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 6) -#define X86_FEATURE_KVM_PV_UNHALT KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 7) -/* Bit 8 apparently isn't used?!?! */ -#define X86_FEATURE_KVM_PV_TLB_FLUSH KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 9) -#define X86_FEATURE_KVM_ASYNC_PF_VMEXIT KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 10) -#define X86_FEATURE_KVM_PV_SEND_IPI KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 11) -#define X86_FEATURE_KVM_POLL_CONTROL KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 12) -#define X86_FEATURE_KVM_PV_SCHED_YIELD KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 13) -#define X86_FEATURE_KVM_ASYNC_PF_INT KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 14) -#define X86_FEATURE_KVM_MSI_EXT_DEST_ID KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 15) -#define X86_FEATURE_KVM_HC_MAP_GPA_RANGE KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 16) -#define X86_FEATURE_KVM_MIGRATION_CONTROL KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 17) - -/* - * Same idea as X86_FEATURE_XXX, but X86_PROPERTY_XXX retrieves a multi-bit - * value/property as opposed to a single-bit feature. Again, pack the info - * into a 64-bit value to pass by value with no overhead. - */ -struct kvm_x86_cpu_property { - u32 function; - u8 index; - u8 reg; - u8 lo_bit; - u8 hi_bit; -}; -#define KVM_X86_CPU_PROPERTY(fn, idx, gpr, low_bit, high_bit) \ -({ \ - struct kvm_x86_cpu_property property = { \ - .function = fn, \ - .index = idx, \ - .reg = KVM_CPUID_##gpr, \ - .lo_bit = low_bit, \ - .hi_bit = high_bit, \ - }; \ - \ - kvm_static_assert(low_bit < high_bit); \ - kvm_static_assert((fn & 0xc0000000) == 0 || \ - (fn & 0xc0000000) == 0x40000000 || \ - (fn & 0xc0000000) == 0x80000000 || \ - (fn & 0xc0000000) == 0xc0000000); \ - kvm_static_assert(idx < BIT(sizeof(property.index) * BITS_PER_BYTE)); \ - property; \ -}) - -#define X86_PROPERTY_MAX_BASIC_LEAF KVM_X86_CPU_PROPERTY(0, 0, EAX, 0, 31) -#define X86_PROPERTY_PMU_VERSION KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 0, 7) -#define X86_PROPERTY_PMU_NR_GP_COUNTERS KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 8, 15) -#define X86_PROPERTY_PMU_GP_COUNTERS_BIT_WIDTH KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 16, 23) -#define X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 24, 31) -#define X86_PROPERTY_PMU_EVENTS_MASK KVM_X86_CPU_PROPERTY(0xa, 0, EBX, 0, 7) -#define X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK KVM_X86_CPU_PROPERTY(0xa, 0, ECX, 0, 31) -#define X86_PROPERTY_PMU_NR_FIXED_COUNTERS KVM_X86_CPU_PROPERTY(0xa, 0, EDX, 0, 4) -#define X86_PROPERTY_PMU_FIXED_COUNTERS_BIT_WIDTH KVM_X86_CPU_PROPERTY(0xa, 0, EDX, 5, 12) - -#define X86_PROPERTY_SUPPORTED_XCR0_LO KVM_X86_CPU_PROPERTY(0xd, 0, EAX, 0, 31) -#define X86_PROPERTY_XSTATE_MAX_SIZE_XCR0 KVM_X86_CPU_PROPERTY(0xd, 0, EBX, 0, 31) -#define X86_PROPERTY_XSTATE_MAX_SIZE KVM_X86_CPU_PROPERTY(0xd, 0, ECX, 0, 31) -#define X86_PROPERTY_SUPPORTED_XCR0_HI KVM_X86_CPU_PROPERTY(0xd, 0, EDX, 0, 31) - -#define X86_PROPERTY_XSTATE_TILE_SIZE KVM_X86_CPU_PROPERTY(0xd, 18, EAX, 0, 31) -#define X86_PROPERTY_XSTATE_TILE_OFFSET KVM_X86_CPU_PROPERTY(0xd, 18, EBX, 0, 31) -#define X86_PROPERTY_AMX_MAX_PALETTE_TABLES KVM_X86_CPU_PROPERTY(0x1d, 0, EAX, 0, 31) -#define X86_PROPERTY_AMX_TOTAL_TILE_BYTES KVM_X86_CPU_PROPERTY(0x1d, 1, EAX, 0, 15) -#define X86_PROPERTY_AMX_BYTES_PER_TILE KVM_X86_CPU_PROPERTY(0x1d, 1, EAX, 16, 31) -#define X86_PROPERTY_AMX_BYTES_PER_ROW KVM_X86_CPU_PROPERTY(0x1d, 1, EBX, 0, 15) -#define X86_PROPERTY_AMX_NR_TILE_REGS KVM_X86_CPU_PROPERTY(0x1d, 1, EBX, 16, 31) -#define X86_PROPERTY_AMX_MAX_ROWS KVM_X86_CPU_PROPERTY(0x1d, 1, ECX, 0, 15) - -#define X86_PROPERTY_MAX_KVM_LEAF KVM_X86_CPU_PROPERTY(0x40000000, 0, EAX, 0, 31) - -#define X86_PROPERTY_MAX_EXT_LEAF KVM_X86_CPU_PROPERTY(0x80000000, 0, EAX, 0, 31) -#define X86_PROPERTY_MAX_PHY_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 0, 7) -#define X86_PROPERTY_MAX_VIRT_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 8, 15) -#define X86_PROPERTY_GUEST_MAX_PHY_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 16, 23) -#define X86_PROPERTY_SEV_C_BIT KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 0, 5) -#define X86_PROPERTY_PHYS_ADDR_REDUCTION KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 6, 11) - -#define X86_PROPERTY_MAX_CENTAUR_LEAF KVM_X86_CPU_PROPERTY(0xC0000000, 0, EAX, 0, 31) - -/* - * Intel's architectural PMU events are bizarre. They have a "feature" bit - * that indicates the feature is _not_ supported, and a property that states - * the length of the bit mask of unsupported features. A feature is supported - * if the size of the bit mask is larger than the "unavailable" bit, and said - * bit is not set. Fixed counters also bizarre enumeration, but inverted from - * arch events for general purpose counters. Fixed counters are supported if a - * feature flag is set **OR** the total number of fixed counters is greater - * than index of the counter. - * - * Wrap the events for general purpose and fixed counters to simplify checking - * whether or not a given architectural event is supported. - */ -struct kvm_x86_pmu_feature { - struct kvm_x86_cpu_feature f; -}; -#define KVM_X86_PMU_FEATURE(__reg, __bit) \ -({ \ - struct kvm_x86_pmu_feature feature = { \ - .f = KVM_X86_CPU_FEATURE(0xa, 0, __reg, __bit), \ - }; \ - \ - kvm_static_assert(KVM_CPUID_##__reg == KVM_CPUID_EBX || \ - KVM_CPUID_##__reg == KVM_CPUID_ECX); \ - feature; \ -}) - -#define X86_PMU_FEATURE_CPU_CYCLES KVM_X86_PMU_FEATURE(EBX, 0) -#define X86_PMU_FEATURE_INSNS_RETIRED KVM_X86_PMU_FEATURE(EBX, 1) -#define X86_PMU_FEATURE_REFERENCE_CYCLES KVM_X86_PMU_FEATURE(EBX, 2) -#define X86_PMU_FEATURE_LLC_REFERENCES KVM_X86_PMU_FEATURE(EBX, 3) -#define X86_PMU_FEATURE_LLC_MISSES KVM_X86_PMU_FEATURE(EBX, 4) -#define X86_PMU_FEATURE_BRANCH_INSNS_RETIRED KVM_X86_PMU_FEATURE(EBX, 5) -#define X86_PMU_FEATURE_BRANCHES_MISPREDICTED KVM_X86_PMU_FEATURE(EBX, 6) -#define X86_PMU_FEATURE_TOPDOWN_SLOTS KVM_X86_PMU_FEATURE(EBX, 7) - -#define X86_PMU_FEATURE_INSNS_RETIRED_FIXED KVM_X86_PMU_FEATURE(ECX, 0) -#define X86_PMU_FEATURE_CPU_CYCLES_FIXED KVM_X86_PMU_FEATURE(ECX, 1) -#define X86_PMU_FEATURE_REFERENCE_TSC_CYCLES_FIXED KVM_X86_PMU_FEATURE(ECX, 2) -#define X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED KVM_X86_PMU_FEATURE(ECX, 3) - -static inline unsigned int x86_family(unsigned int eax) -{ - unsigned int x86; - - x86 = (eax >> 8) & 0xf; - - if (x86 == 0xf) - x86 += (eax >> 20) & 0xff; - - return x86; -} - -static inline unsigned int x86_model(unsigned int eax) -{ - return ((eax >> 12) & 0xf0) | ((eax >> 4) & 0x0f); -} - -/* Page table bitfield declarations */ -#define PTE_PRESENT_MASK BIT_ULL(0) -#define PTE_WRITABLE_MASK BIT_ULL(1) -#define PTE_USER_MASK BIT_ULL(2) -#define PTE_ACCESSED_MASK BIT_ULL(5) -#define PTE_DIRTY_MASK BIT_ULL(6) -#define PTE_LARGE_MASK BIT_ULL(7) -#define PTE_GLOBAL_MASK BIT_ULL(8) -#define PTE_NX_MASK BIT_ULL(63) - -#define PHYSICAL_PAGE_MASK GENMASK_ULL(51, 12) - -#define PAGE_SHIFT 12 -#define PAGE_SIZE (1ULL << PAGE_SHIFT) -#define PAGE_MASK (~(PAGE_SIZE-1) & PHYSICAL_PAGE_MASK) - -#define HUGEPAGE_SHIFT(x) (PAGE_SHIFT + (((x) - 1) * 9)) -#define HUGEPAGE_SIZE(x) (1UL << HUGEPAGE_SHIFT(x)) -#define HUGEPAGE_MASK(x) (~(HUGEPAGE_SIZE(x) - 1) & PHYSICAL_PAGE_MASK) - -#define PTE_GET_PA(pte) ((pte) & PHYSICAL_PAGE_MASK) -#define PTE_GET_PFN(pte) (PTE_GET_PA(pte) >> PAGE_SHIFT) - -/* General Registers in 64-Bit Mode */ -struct gpr64_regs { - u64 rax; - u64 rcx; - u64 rdx; - u64 rbx; - u64 rsp; - u64 rbp; - u64 rsi; - u64 rdi; - u64 r8; - u64 r9; - u64 r10; - u64 r11; - u64 r12; - u64 r13; - u64 r14; - u64 r15; -}; - -struct desc64 { - uint16_t limit0; - uint16_t base0; - unsigned base1:8, type:4, s:1, dpl:2, p:1; - unsigned limit1:4, avl:1, l:1, db:1, g:1, base2:8; - uint32_t base3; - uint32_t zero1; -} __attribute__((packed)); - -struct desc_ptr { - uint16_t size; - uint64_t address; -} __attribute__((packed)); - -struct kvm_x86_state { - struct kvm_xsave *xsave; - struct kvm_vcpu_events events; - struct kvm_mp_state mp_state; - struct kvm_regs regs; - struct kvm_xcrs xcrs; - struct kvm_sregs sregs; - struct kvm_debugregs debugregs; - union { - struct kvm_nested_state nested; - char nested_[16384]; - }; - struct kvm_msrs msrs; -}; - -static inline uint64_t get_desc64_base(const struct desc64 *desc) -{ - return ((uint64_t)desc->base3 << 32) | - (desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24)); -} - -static inline uint64_t rdtsc(void) -{ - uint32_t eax, edx; - uint64_t tsc_val; - /* - * The lfence is to wait (on Intel CPUs) until all previous - * instructions have been executed. If software requires RDTSC to be - * executed prior to execution of any subsequent instruction, it can - * execute LFENCE immediately after RDTSC - */ - __asm__ __volatile__("lfence; rdtsc; lfence" : "=a"(eax), "=d"(edx)); - tsc_val = ((uint64_t)edx) << 32 | eax; - return tsc_val; -} - -static inline uint64_t rdtscp(uint32_t *aux) -{ - uint32_t eax, edx; - - __asm__ __volatile__("rdtscp" : "=a"(eax), "=d"(edx), "=c"(*aux)); - return ((uint64_t)edx) << 32 | eax; -} - -static inline uint64_t rdmsr(uint32_t msr) -{ - uint32_t a, d; - - __asm__ __volatile__("rdmsr" : "=a"(a), "=d"(d) : "c"(msr) : "memory"); - - return a | ((uint64_t) d << 32); -} - -static inline void wrmsr(uint32_t msr, uint64_t value) -{ - uint32_t a = value; - uint32_t d = value >> 32; - - __asm__ __volatile__("wrmsr" :: "a"(a), "d"(d), "c"(msr) : "memory"); -} - - -static inline uint16_t inw(uint16_t port) -{ - uint16_t tmp; - - __asm__ __volatile__("in %%dx, %%ax" - : /* output */ "=a" (tmp) - : /* input */ "d" (port)); - - return tmp; -} - -static inline uint16_t get_es(void) -{ - uint16_t es; - - __asm__ __volatile__("mov %%es, %[es]" - : /* output */ [es]"=rm"(es)); - return es; -} - -static inline uint16_t get_cs(void) -{ - uint16_t cs; - - __asm__ __volatile__("mov %%cs, %[cs]" - : /* output */ [cs]"=rm"(cs)); - return cs; -} - -static inline uint16_t get_ss(void) -{ - uint16_t ss; - - __asm__ __volatile__("mov %%ss, %[ss]" - : /* output */ [ss]"=rm"(ss)); - return ss; -} - -static inline uint16_t get_ds(void) -{ - uint16_t ds; - - __asm__ __volatile__("mov %%ds, %[ds]" - : /* output */ [ds]"=rm"(ds)); - return ds; -} - -static inline uint16_t get_fs(void) -{ - uint16_t fs; - - __asm__ __volatile__("mov %%fs, %[fs]" - : /* output */ [fs]"=rm"(fs)); - return fs; -} - -static inline uint16_t get_gs(void) -{ - uint16_t gs; - - __asm__ __volatile__("mov %%gs, %[gs]" - : /* output */ [gs]"=rm"(gs)); - return gs; -} - -static inline uint16_t get_tr(void) -{ - uint16_t tr; - - __asm__ __volatile__("str %[tr]" - : /* output */ [tr]"=rm"(tr)); - return tr; -} - -static inline uint64_t get_cr0(void) -{ - uint64_t cr0; - - __asm__ __volatile__("mov %%cr0, %[cr0]" - : /* output */ [cr0]"=r"(cr0)); - return cr0; -} - -static inline uint64_t get_cr3(void) -{ - uint64_t cr3; - - __asm__ __volatile__("mov %%cr3, %[cr3]" - : /* output */ [cr3]"=r"(cr3)); - return cr3; -} - -static inline uint64_t get_cr4(void) -{ - uint64_t cr4; - - __asm__ __volatile__("mov %%cr4, %[cr4]" - : /* output */ [cr4]"=r"(cr4)); - return cr4; -} - -static inline void set_cr4(uint64_t val) -{ - __asm__ __volatile__("mov %0, %%cr4" : : "r" (val) : "memory"); -} - -static inline u64 xgetbv(u32 index) -{ - u32 eax, edx; - - __asm__ __volatile__("xgetbv;" - : "=a" (eax), "=d" (edx) - : "c" (index)); - return eax | ((u64)edx << 32); -} - -static inline void xsetbv(u32 index, u64 value) -{ - u32 eax = value; - u32 edx = value >> 32; - - __asm__ __volatile__("xsetbv" :: "a" (eax), "d" (edx), "c" (index)); -} - -static inline void wrpkru(u32 pkru) -{ - /* Note, ECX and EDX are architecturally required to be '0'. */ - asm volatile(".byte 0x0f,0x01,0xef\n\t" - : : "a" (pkru), "c"(0), "d"(0)); -} - -static inline struct desc_ptr get_gdt(void) -{ - struct desc_ptr gdt; - __asm__ __volatile__("sgdt %[gdt]" - : /* output */ [gdt]"=m"(gdt)); - return gdt; -} - -static inline struct desc_ptr get_idt(void) -{ - struct desc_ptr idt; - __asm__ __volatile__("sidt %[idt]" - : /* output */ [idt]"=m"(idt)); - return idt; -} - -static inline void outl(uint16_t port, uint32_t value) -{ - __asm__ __volatile__("outl %%eax, %%dx" : : "d"(port), "a"(value)); -} - -static inline void __cpuid(uint32_t function, uint32_t index, - uint32_t *eax, uint32_t *ebx, - uint32_t *ecx, uint32_t *edx) -{ - *eax = function; - *ecx = index; - - asm volatile("cpuid" - : "=a" (*eax), - "=b" (*ebx), - "=c" (*ecx), - "=d" (*edx) - : "0" (*eax), "2" (*ecx) - : "memory"); -} - -static inline void cpuid(uint32_t function, - uint32_t *eax, uint32_t *ebx, - uint32_t *ecx, uint32_t *edx) -{ - return __cpuid(function, 0, eax, ebx, ecx, edx); -} - -static inline uint32_t this_cpu_fms(void) -{ - uint32_t eax, ebx, ecx, edx; - - cpuid(1, &eax, &ebx, &ecx, &edx); - return eax; -} - -static inline uint32_t this_cpu_family(void) -{ - return x86_family(this_cpu_fms()); -} - -static inline uint32_t this_cpu_model(void) -{ - return x86_model(this_cpu_fms()); -} - -static inline bool this_cpu_vendor_string_is(const char *vendor) -{ - const uint32_t *chunk = (const uint32_t *)vendor; - uint32_t eax, ebx, ecx, edx; - - cpuid(0, &eax, &ebx, &ecx, &edx); - return (ebx == chunk[0] && edx == chunk[1] && ecx == chunk[2]); -} - -static inline bool this_cpu_is_intel(void) -{ - return this_cpu_vendor_string_is("GenuineIntel"); -} - -/* - * Exclude early K5 samples with a vendor string of "AMDisbetter!" - */ -static inline bool this_cpu_is_amd(void) -{ - return this_cpu_vendor_string_is("AuthenticAMD"); -} - -static inline uint32_t __this_cpu_has(uint32_t function, uint32_t index, - uint8_t reg, uint8_t lo, uint8_t hi) -{ - uint32_t gprs[4]; - - __cpuid(function, index, - &gprs[KVM_CPUID_EAX], &gprs[KVM_CPUID_EBX], - &gprs[KVM_CPUID_ECX], &gprs[KVM_CPUID_EDX]); - - return (gprs[reg] & GENMASK(hi, lo)) >> lo; -} - -static inline bool this_cpu_has(struct kvm_x86_cpu_feature feature) -{ - return __this_cpu_has(feature.function, feature.index, - feature.reg, feature.bit, feature.bit); -} - -static inline uint32_t this_cpu_property(struct kvm_x86_cpu_property property) -{ - return __this_cpu_has(property.function, property.index, - property.reg, property.lo_bit, property.hi_bit); -} - -static __always_inline bool this_cpu_has_p(struct kvm_x86_cpu_property property) -{ - uint32_t max_leaf; - - switch (property.function & 0xc0000000) { - case 0: - max_leaf = this_cpu_property(X86_PROPERTY_MAX_BASIC_LEAF); - break; - case 0x40000000: - max_leaf = this_cpu_property(X86_PROPERTY_MAX_KVM_LEAF); - break; - case 0x80000000: - max_leaf = this_cpu_property(X86_PROPERTY_MAX_EXT_LEAF); - break; - case 0xc0000000: - max_leaf = this_cpu_property(X86_PROPERTY_MAX_CENTAUR_LEAF); - } - return max_leaf >= property.function; -} - -static inline bool this_pmu_has(struct kvm_x86_pmu_feature feature) -{ - uint32_t nr_bits; - - if (feature.f.reg == KVM_CPUID_EBX) { - nr_bits = this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH); - return nr_bits > feature.f.bit && !this_cpu_has(feature.f); - } - - GUEST_ASSERT(feature.f.reg == KVM_CPUID_ECX); - nr_bits = this_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS); - return nr_bits > feature.f.bit || this_cpu_has(feature.f); -} - -static __always_inline uint64_t this_cpu_supported_xcr0(void) -{ - if (!this_cpu_has_p(X86_PROPERTY_SUPPORTED_XCR0_LO)) - return 0; - - return this_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_LO) | - ((uint64_t)this_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_HI) << 32); -} - -typedef u32 __attribute__((vector_size(16))) sse128_t; -#define __sse128_u union { sse128_t vec; u64 as_u64[2]; u32 as_u32[4]; } -#define sse128_lo(x) ({ __sse128_u t; t.vec = x; t.as_u64[0]; }) -#define sse128_hi(x) ({ __sse128_u t; t.vec = x; t.as_u64[1]; }) - -static inline void read_sse_reg(int reg, sse128_t *data) -{ - switch (reg) { - case 0: - asm("movdqa %%xmm0, %0" : "=m"(*data)); - break; - case 1: - asm("movdqa %%xmm1, %0" : "=m"(*data)); - break; - case 2: - asm("movdqa %%xmm2, %0" : "=m"(*data)); - break; - case 3: - asm("movdqa %%xmm3, %0" : "=m"(*data)); - break; - case 4: - asm("movdqa %%xmm4, %0" : "=m"(*data)); - break; - case 5: - asm("movdqa %%xmm5, %0" : "=m"(*data)); - break; - case 6: - asm("movdqa %%xmm6, %0" : "=m"(*data)); - break; - case 7: - asm("movdqa %%xmm7, %0" : "=m"(*data)); - break; - default: - BUG(); - } -} - -static inline void write_sse_reg(int reg, const sse128_t *data) -{ - switch (reg) { - case 0: - asm("movdqa %0, %%xmm0" : : "m"(*data)); - break; - case 1: - asm("movdqa %0, %%xmm1" : : "m"(*data)); - break; - case 2: - asm("movdqa %0, %%xmm2" : : "m"(*data)); - break; - case 3: - asm("movdqa %0, %%xmm3" : : "m"(*data)); - break; - case 4: - asm("movdqa %0, %%xmm4" : : "m"(*data)); - break; - case 5: - asm("movdqa %0, %%xmm5" : : "m"(*data)); - break; - case 6: - asm("movdqa %0, %%xmm6" : : "m"(*data)); - break; - case 7: - asm("movdqa %0, %%xmm7" : : "m"(*data)); - break; - default: - BUG(); - } -} - -static inline void cpu_relax(void) -{ - asm volatile("rep; nop" ::: "memory"); -} - -static inline void udelay(unsigned long usec) -{ - uint64_t start, now, cycles; - - GUEST_ASSERT(guest_tsc_khz); - cycles = guest_tsc_khz / 1000 * usec; - - /* - * Deliberately don't PAUSE, a.k.a. cpu_relax(), so that the delay is - * as accurate as possible, e.g. doesn't trigger PAUSE-Loop VM-Exits. - */ - start = rdtsc(); - do { - now = rdtsc(); - } while (now - start < cycles); -} - -#define ud2() \ - __asm__ __volatile__( \ - "ud2\n" \ - ) - -#define hlt() \ - __asm__ __volatile__( \ - "hlt\n" \ - ) - -struct kvm_x86_state *vcpu_save_state(struct kvm_vcpu *vcpu); -void vcpu_load_state(struct kvm_vcpu *vcpu, struct kvm_x86_state *state); -void kvm_x86_state_cleanup(struct kvm_x86_state *state); - -const struct kvm_msr_list *kvm_get_msr_index_list(void); -const struct kvm_msr_list *kvm_get_feature_msr_index_list(void); -bool kvm_msr_is_in_save_restore_list(uint32_t msr_index); -uint64_t kvm_get_feature_msr(uint64_t msr_index); - -static inline void vcpu_msrs_get(struct kvm_vcpu *vcpu, - struct kvm_msrs *msrs) -{ - int r = __vcpu_ioctl(vcpu, KVM_GET_MSRS, msrs); - - TEST_ASSERT(r == msrs->nmsrs, - "KVM_GET_MSRS failed, r: %i (failed on MSR %x)", - r, r < 0 || r >= msrs->nmsrs ? -1 : msrs->entries[r].index); -} -static inline void vcpu_msrs_set(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs) -{ - int r = __vcpu_ioctl(vcpu, KVM_SET_MSRS, msrs); - - TEST_ASSERT(r == msrs->nmsrs, - "KVM_SET_MSRS failed, r: %i (failed on MSR %x)", - r, r < 0 || r >= msrs->nmsrs ? -1 : msrs->entries[r].index); -} -static inline void vcpu_debugregs_get(struct kvm_vcpu *vcpu, - struct kvm_debugregs *debugregs) -{ - vcpu_ioctl(vcpu, KVM_GET_DEBUGREGS, debugregs); -} -static inline void vcpu_debugregs_set(struct kvm_vcpu *vcpu, - struct kvm_debugregs *debugregs) -{ - vcpu_ioctl(vcpu, KVM_SET_DEBUGREGS, debugregs); -} -static inline void vcpu_xsave_get(struct kvm_vcpu *vcpu, - struct kvm_xsave *xsave) -{ - vcpu_ioctl(vcpu, KVM_GET_XSAVE, xsave); -} -static inline void vcpu_xsave2_get(struct kvm_vcpu *vcpu, - struct kvm_xsave *xsave) -{ - vcpu_ioctl(vcpu, KVM_GET_XSAVE2, xsave); -} -static inline void vcpu_xsave_set(struct kvm_vcpu *vcpu, - struct kvm_xsave *xsave) -{ - vcpu_ioctl(vcpu, KVM_SET_XSAVE, xsave); -} -static inline void vcpu_xcrs_get(struct kvm_vcpu *vcpu, - struct kvm_xcrs *xcrs) -{ - vcpu_ioctl(vcpu, KVM_GET_XCRS, xcrs); -} -static inline void vcpu_xcrs_set(struct kvm_vcpu *vcpu, struct kvm_xcrs *xcrs) -{ - vcpu_ioctl(vcpu, KVM_SET_XCRS, xcrs); -} - -const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid, - uint32_t function, uint32_t index); -const struct kvm_cpuid2 *kvm_get_supported_cpuid(void); - -static inline uint32_t kvm_cpu_fms(void) -{ - return get_cpuid_entry(kvm_get_supported_cpuid(), 0x1, 0)->eax; -} - -static inline uint32_t kvm_cpu_family(void) -{ - return x86_family(kvm_cpu_fms()); -} - -static inline uint32_t kvm_cpu_model(void) -{ - return x86_model(kvm_cpu_fms()); -} - -bool kvm_cpuid_has(const struct kvm_cpuid2 *cpuid, - struct kvm_x86_cpu_feature feature); - -static inline bool kvm_cpu_has(struct kvm_x86_cpu_feature feature) -{ - return kvm_cpuid_has(kvm_get_supported_cpuid(), feature); -} - -uint32_t kvm_cpuid_property(const struct kvm_cpuid2 *cpuid, - struct kvm_x86_cpu_property property); - -static inline uint32_t kvm_cpu_property(struct kvm_x86_cpu_property property) -{ - return kvm_cpuid_property(kvm_get_supported_cpuid(), property); -} - -static __always_inline bool kvm_cpu_has_p(struct kvm_x86_cpu_property property) -{ - uint32_t max_leaf; - - switch (property.function & 0xc0000000) { - case 0: - max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_BASIC_LEAF); - break; - case 0x40000000: - max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_KVM_LEAF); - break; - case 0x80000000: - max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_EXT_LEAF); - break; - case 0xc0000000: - max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_CENTAUR_LEAF); - } - return max_leaf >= property.function; -} - -static inline bool kvm_pmu_has(struct kvm_x86_pmu_feature feature) -{ - uint32_t nr_bits; - - if (feature.f.reg == KVM_CPUID_EBX) { - nr_bits = kvm_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH); - return nr_bits > feature.f.bit && !kvm_cpu_has(feature.f); - } - - TEST_ASSERT_EQ(feature.f.reg, KVM_CPUID_ECX); - nr_bits = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS); - return nr_bits > feature.f.bit || kvm_cpu_has(feature.f); -} - -static __always_inline uint64_t kvm_cpu_supported_xcr0(void) -{ - if (!kvm_cpu_has_p(X86_PROPERTY_SUPPORTED_XCR0_LO)) - return 0; - - return kvm_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_LO) | - ((uint64_t)kvm_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_HI) << 32); -} - -static inline size_t kvm_cpuid2_size(int nr_entries) -{ - return sizeof(struct kvm_cpuid2) + - sizeof(struct kvm_cpuid_entry2) * nr_entries; -} - -/* - * Allocate a "struct kvm_cpuid2* instance, with the 0-length arrary of - * entries sized to hold @nr_entries. The caller is responsible for freeing - * the struct. - */ -static inline struct kvm_cpuid2 *allocate_kvm_cpuid2(int nr_entries) -{ - struct kvm_cpuid2 *cpuid; - - cpuid = malloc(kvm_cpuid2_size(nr_entries)); - TEST_ASSERT(cpuid, "-ENOMEM when allocating kvm_cpuid2"); - - cpuid->nent = nr_entries; - - return cpuid; -} - -void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid); - -static inline struct kvm_cpuid_entry2 *__vcpu_get_cpuid_entry(struct kvm_vcpu *vcpu, - uint32_t function, - uint32_t index) -{ - return (struct kvm_cpuid_entry2 *)get_cpuid_entry(vcpu->cpuid, - function, index); -} - -static inline struct kvm_cpuid_entry2 *vcpu_get_cpuid_entry(struct kvm_vcpu *vcpu, - uint32_t function) -{ - return __vcpu_get_cpuid_entry(vcpu, function, 0); -} - -static inline int __vcpu_set_cpuid(struct kvm_vcpu *vcpu) -{ - int r; - - TEST_ASSERT(vcpu->cpuid, "Must do vcpu_init_cpuid() first"); - r = __vcpu_ioctl(vcpu, KVM_SET_CPUID2, vcpu->cpuid); - if (r) - return r; - - /* On success, refresh the cache to pick up adjustments made by KVM. */ - vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid); - return 0; -} - -static inline void vcpu_set_cpuid(struct kvm_vcpu *vcpu) -{ - TEST_ASSERT(vcpu->cpuid, "Must do vcpu_init_cpuid() first"); - vcpu_ioctl(vcpu, KVM_SET_CPUID2, vcpu->cpuid); - - /* Refresh the cache to pick up adjustments made by KVM. */ - vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid); -} - -static inline void vcpu_get_cpuid(struct kvm_vcpu *vcpu) -{ - vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid); -} - -void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu, - struct kvm_x86_cpu_property property, - uint32_t value); -void vcpu_set_cpuid_maxphyaddr(struct kvm_vcpu *vcpu, uint8_t maxphyaddr); - -void vcpu_clear_cpuid_entry(struct kvm_vcpu *vcpu, uint32_t function); - -static inline bool vcpu_cpuid_has(struct kvm_vcpu *vcpu, - struct kvm_x86_cpu_feature feature) -{ - struct kvm_cpuid_entry2 *entry; - - entry = __vcpu_get_cpuid_entry(vcpu, feature.function, feature.index); - return *((&entry->eax) + feature.reg) & BIT(feature.bit); -} - -void vcpu_set_or_clear_cpuid_feature(struct kvm_vcpu *vcpu, - struct kvm_x86_cpu_feature feature, - bool set); - -static inline void vcpu_set_cpuid_feature(struct kvm_vcpu *vcpu, - struct kvm_x86_cpu_feature feature) -{ - vcpu_set_or_clear_cpuid_feature(vcpu, feature, true); - -} - -static inline void vcpu_clear_cpuid_feature(struct kvm_vcpu *vcpu, - struct kvm_x86_cpu_feature feature) -{ - vcpu_set_or_clear_cpuid_feature(vcpu, feature, false); -} - -uint64_t vcpu_get_msr(struct kvm_vcpu *vcpu, uint64_t msr_index); -int _vcpu_set_msr(struct kvm_vcpu *vcpu, uint64_t msr_index, uint64_t msr_value); - -/* - * Assert on an MSR access(es) and pretty print the MSR name when possible. - * Note, the caller provides the stringified name so that the name of macro is - * printed, not the value the macro resolves to (due to macro expansion). - */ -#define TEST_ASSERT_MSR(cond, fmt, msr, str, args...) \ -do { \ - if (__builtin_constant_p(msr)) { \ - TEST_ASSERT(cond, fmt, str, args); \ - } else if (!(cond)) { \ - char buf[16]; \ - \ - snprintf(buf, sizeof(buf), "MSR 0x%x", msr); \ - TEST_ASSERT(cond, fmt, buf, args); \ - } \ -} while (0) - -/* - * Returns true if KVM should return the last written value when reading an MSR - * from userspace, e.g. the MSR isn't a command MSR, doesn't emulate state that - * is changing, etc. This is NOT an exhaustive list! The intent is to filter - * out MSRs that are not durable _and_ that a selftest wants to write. - */ -static inline bool is_durable_msr(uint32_t msr) -{ - return msr != MSR_IA32_TSC; -} - -#define vcpu_set_msr(vcpu, msr, val) \ -do { \ - uint64_t r, v = val; \ - \ - TEST_ASSERT_MSR(_vcpu_set_msr(vcpu, msr, v) == 1, \ - "KVM_SET_MSRS failed on %s, value = 0x%lx", msr, #msr, v); \ - if (!is_durable_msr(msr)) \ - break; \ - r = vcpu_get_msr(vcpu, msr); \ - TEST_ASSERT_MSR(r == v, "Set %s to '0x%lx', got back '0x%lx'", msr, #msr, v, r);\ -} while (0) - -void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits); -void kvm_init_vm_address_properties(struct kvm_vm *vm); -bool vm_is_unrestricted_guest(struct kvm_vm *vm); - -struct ex_regs { - uint64_t rax, rcx, rdx, rbx; - uint64_t rbp, rsi, rdi; - uint64_t r8, r9, r10, r11; - uint64_t r12, r13, r14, r15; - uint64_t vector; - uint64_t error_code; - uint64_t rip; - uint64_t cs; - uint64_t rflags; -}; - -struct idt_entry { - uint16_t offset0; - uint16_t selector; - uint16_t ist : 3; - uint16_t : 5; - uint16_t type : 4; - uint16_t : 1; - uint16_t dpl : 2; - uint16_t p : 1; - uint16_t offset1; - uint32_t offset2; uint32_t reserved; -}; - -void vm_install_exception_handler(struct kvm_vm *vm, int vector, - void (*handler)(struct ex_regs *)); - -/* If a toddler were to say "abracadabra". */ -#define KVM_EXCEPTION_MAGIC 0xabacadabaULL - -/* - * KVM selftest exception fixup uses registers to coordinate with the exception - * handler, versus the kernel's in-memory tables and KVM-Unit-Tests's in-memory - * per-CPU data. Using only registers avoids having to map memory into the - * guest, doesn't require a valid, stable GS.base, and reduces the risk of - * for recursive faults when accessing memory in the handler. The downside to - * using registers is that it restricts what registers can be used by the actual - * instruction. But, selftests are 64-bit only, making register* pressure a - * minor concern. Use r9-r11 as they are volatile, i.e. don't need to be saved - * by the callee, and except for r11 are not implicit parameters to any - * instructions. Ideally, fixup would use r8-r10 and thus avoid implicit - * parameters entirely, but Hyper-V's hypercall ABI uses r8 and testing Hyper-V - * is higher priority than testing non-faulting SYSCALL/SYSRET. - * - * Note, the fixup handler deliberately does not handle #DE, i.e. the vector - * is guaranteed to be non-zero on fault. - * - * REGISTER INPUTS: - * r9 = MAGIC - * r10 = RIP - * r11 = new RIP on fault - * - * REGISTER OUTPUTS: - * r9 = exception vector (non-zero) - * r10 = error code - */ -#define __KVM_ASM_SAFE(insn, fep) \ - "mov $" __stringify(KVM_EXCEPTION_MAGIC) ", %%r9\n\t" \ - "lea 1f(%%rip), %%r10\n\t" \ - "lea 2f(%%rip), %%r11\n\t" \ - fep "1: " insn "\n\t" \ - "xor %%r9, %%r9\n\t" \ - "2:\n\t" \ - "mov %%r9b, %[vector]\n\t" \ - "mov %%r10, %[error_code]\n\t" - -#define KVM_ASM_SAFE(insn) __KVM_ASM_SAFE(insn, "") -#define KVM_ASM_SAFE_FEP(insn) __KVM_ASM_SAFE(insn, KVM_FEP) - -#define KVM_ASM_SAFE_OUTPUTS(v, ec) [vector] "=qm"(v), [error_code] "=rm"(ec) -#define KVM_ASM_SAFE_CLOBBERS "r9", "r10", "r11" - -#define kvm_asm_safe(insn, inputs...) \ -({ \ - uint64_t ign_error_code; \ - uint8_t vector; \ - \ - asm volatile(KVM_ASM_SAFE(insn) \ - : KVM_ASM_SAFE_OUTPUTS(vector, ign_error_code) \ - : inputs \ - : KVM_ASM_SAFE_CLOBBERS); \ - vector; \ -}) - -#define kvm_asm_safe_ec(insn, error_code, inputs...) \ -({ \ - uint8_t vector; \ - \ - asm volatile(KVM_ASM_SAFE(insn) \ - : KVM_ASM_SAFE_OUTPUTS(vector, error_code) \ - : inputs \ - : KVM_ASM_SAFE_CLOBBERS); \ - vector; \ -}) - -#define kvm_asm_safe_fep(insn, inputs...) \ -({ \ - uint64_t ign_error_code; \ - uint8_t vector; \ - \ - asm volatile(KVM_ASM_SAFE(insn) \ - : KVM_ASM_SAFE_OUTPUTS(vector, ign_error_code) \ - : inputs \ - : KVM_ASM_SAFE_CLOBBERS); \ - vector; \ -}) - -#define kvm_asm_safe_ec_fep(insn, error_code, inputs...) \ -({ \ - uint8_t vector; \ - \ - asm volatile(KVM_ASM_SAFE_FEP(insn) \ - : KVM_ASM_SAFE_OUTPUTS(vector, error_code) \ - : inputs \ - : KVM_ASM_SAFE_CLOBBERS); \ - vector; \ -}) - -#define BUILD_READ_U64_SAFE_HELPER(insn, _fep, _FEP) \ -static inline uint8_t insn##_safe ##_fep(uint32_t idx, uint64_t *val) \ -{ \ - uint64_t error_code; \ - uint8_t vector; \ - uint32_t a, d; \ - \ - asm volatile(KVM_ASM_SAFE##_FEP(#insn) \ - : "=a"(a), "=d"(d), \ - KVM_ASM_SAFE_OUTPUTS(vector, error_code) \ - : "c"(idx) \ - : KVM_ASM_SAFE_CLOBBERS); \ - \ - *val = (uint64_t)a | ((uint64_t)d << 32); \ - return vector; \ -} - -/* - * Generate {insn}_safe() and {insn}_safe_fep() helpers for instructions that - * use ECX as in input index, and EDX:EAX as a 64-bit output. - */ -#define BUILD_READ_U64_SAFE_HELPERS(insn) \ - BUILD_READ_U64_SAFE_HELPER(insn, , ) \ - BUILD_READ_U64_SAFE_HELPER(insn, _fep, _FEP) \ - -BUILD_READ_U64_SAFE_HELPERS(rdmsr) -BUILD_READ_U64_SAFE_HELPERS(rdpmc) -BUILD_READ_U64_SAFE_HELPERS(xgetbv) - -static inline uint8_t wrmsr_safe(uint32_t msr, uint64_t val) -{ - return kvm_asm_safe("wrmsr", "a"(val & -1u), "d"(val >> 32), "c"(msr)); -} - -static inline uint8_t xsetbv_safe(uint32_t index, uint64_t value) -{ - u32 eax = value; - u32 edx = value >> 32; - - return kvm_asm_safe("xsetbv", "a" (eax), "d" (edx), "c" (index)); -} - -bool kvm_is_tdp_enabled(void); - -static inline bool kvm_is_pmu_enabled(void) -{ - return get_kvm_param_bool("enable_pmu"); -} - -static inline bool kvm_is_forced_emulation_enabled(void) -{ - return !!get_kvm_param_integer("force_emulation_prefix"); -} - -uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr, - int *level); -uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr); - -uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2, - uint64_t a3); -uint64_t __xen_hypercall(uint64_t nr, uint64_t a0, void *a1); -void xen_hypercall(uint64_t nr, uint64_t a0, void *a1); - -static inline uint64_t __kvm_hypercall_map_gpa_range(uint64_t gpa, - uint64_t size, uint64_t flags) -{ - return kvm_hypercall(KVM_HC_MAP_GPA_RANGE, gpa, size >> PAGE_SHIFT, flags, 0); -} - -static inline void kvm_hypercall_map_gpa_range(uint64_t gpa, uint64_t size, - uint64_t flags) -{ - uint64_t ret = __kvm_hypercall_map_gpa_range(gpa, size, flags); - - GUEST_ASSERT(!ret); -} - -void __vm_xsave_require_permission(uint64_t xfeature, const char *name); - -#define vm_xsave_require_permission(xfeature) \ - __vm_xsave_require_permission(xfeature, #xfeature) - -enum pg_level { - PG_LEVEL_NONE, - PG_LEVEL_4K, - PG_LEVEL_2M, - PG_LEVEL_1G, - PG_LEVEL_512G, - PG_LEVEL_NUM -}; - -#define PG_LEVEL_SHIFT(_level) ((_level - 1) * 9 + 12) -#define PG_LEVEL_SIZE(_level) (1ull << PG_LEVEL_SHIFT(_level)) - -#define PG_SIZE_4K PG_LEVEL_SIZE(PG_LEVEL_4K) -#define PG_SIZE_2M PG_LEVEL_SIZE(PG_LEVEL_2M) -#define PG_SIZE_1G PG_LEVEL_SIZE(PG_LEVEL_1G) - -void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level); -void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, - uint64_t nr_bytes, int level); - -/* - * Basic CPU control in CR0 - */ -#define X86_CR0_PE (1UL<<0) /* Protection Enable */ -#define X86_CR0_MP (1UL<<1) /* Monitor Coprocessor */ -#define X86_CR0_EM (1UL<<2) /* Emulation */ -#define X86_CR0_TS (1UL<<3) /* Task Switched */ -#define X86_CR0_ET (1UL<<4) /* Extension Type */ -#define X86_CR0_NE (1UL<<5) /* Numeric Error */ -#define X86_CR0_WP (1UL<<16) /* Write Protect */ -#define X86_CR0_AM (1UL<<18) /* Alignment Mask */ -#define X86_CR0_NW (1UL<<29) /* Not Write-through */ -#define X86_CR0_CD (1UL<<30) /* Cache Disable */ -#define X86_CR0_PG (1UL<<31) /* Paging */ - -#define PFERR_PRESENT_BIT 0 -#define PFERR_WRITE_BIT 1 -#define PFERR_USER_BIT 2 -#define PFERR_RSVD_BIT 3 -#define PFERR_FETCH_BIT 4 -#define PFERR_PK_BIT 5 -#define PFERR_SGX_BIT 15 -#define PFERR_GUEST_FINAL_BIT 32 -#define PFERR_GUEST_PAGE_BIT 33 -#define PFERR_IMPLICIT_ACCESS_BIT 48 - -#define PFERR_PRESENT_MASK BIT(PFERR_PRESENT_BIT) -#define PFERR_WRITE_MASK BIT(PFERR_WRITE_BIT) -#define PFERR_USER_MASK BIT(PFERR_USER_BIT) -#define PFERR_RSVD_MASK BIT(PFERR_RSVD_BIT) -#define PFERR_FETCH_MASK BIT(PFERR_FETCH_BIT) -#define PFERR_PK_MASK BIT(PFERR_PK_BIT) -#define PFERR_SGX_MASK BIT(PFERR_SGX_BIT) -#define PFERR_GUEST_FINAL_MASK BIT_ULL(PFERR_GUEST_FINAL_BIT) -#define PFERR_GUEST_PAGE_MASK BIT_ULL(PFERR_GUEST_PAGE_BIT) -#define PFERR_IMPLICIT_ACCESS BIT_ULL(PFERR_IMPLICIT_ACCESS_BIT) - -bool sys_clocksource_is_based_on_tsc(void); - -#endif /* SELFTEST_KVM_PROCESSOR_H */ diff --git a/tools/testing/selftests/kvm/include/x86_64/sev.h b/tools/testing/selftests/kvm/include/x86_64/sev.h deleted file mode 100644 index 82c11c81a956..000000000000 --- a/tools/testing/selftests/kvm/include/x86_64/sev.h +++ /dev/null @@ -1,96 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Helpers used for SEV guests - * - */ -#ifndef SELFTEST_KVM_SEV_H -#define SELFTEST_KVM_SEV_H - -#include -#include - -#include "linux/psp-sev.h" - -#include "kvm_util.h" -#include "svm_util.h" -#include "processor.h" - -enum sev_guest_state { - SEV_GUEST_STATE_UNINITIALIZED = 0, - SEV_GUEST_STATE_LAUNCH_UPDATE, - SEV_GUEST_STATE_LAUNCH_SECRET, - SEV_GUEST_STATE_RUNNING, -}; - -#define SEV_POLICY_NO_DBG (1UL << 0) -#define SEV_POLICY_ES (1UL << 2) - -#define GHCB_MSR_TERM_REQ 0x100 - -void sev_vm_launch(struct kvm_vm *vm, uint32_t policy); -void sev_vm_launch_measure(struct kvm_vm *vm, uint8_t *measurement); -void sev_vm_launch_finish(struct kvm_vm *vm); - -struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t type, void *guest_code, - struct kvm_vcpu **cpu); -void vm_sev_launch(struct kvm_vm *vm, uint32_t policy, uint8_t *measurement); - -kvm_static_assert(SEV_RET_SUCCESS == 0); - -/* - * The KVM_MEMORY_ENCRYPT_OP uAPI is utter garbage and takes an "unsigned long" - * instead of a proper struct. The size of the parameter is embedded in the - * ioctl number, i.e. is ABI and thus immutable. Hack around the mess by - * creating an overlay to pass in an "unsigned long" without a cast (casting - * will make the compiler unhappy due to dereferencing an aliased pointer). - */ -#define __vm_sev_ioctl(vm, cmd, arg) \ -({ \ - int r; \ - \ - union { \ - struct kvm_sev_cmd c; \ - unsigned long raw; \ - } sev_cmd = { .c = { \ - .id = (cmd), \ - .data = (uint64_t)(arg), \ - .sev_fd = (vm)->arch.sev_fd, \ - } }; \ - \ - r = __vm_ioctl(vm, KVM_MEMORY_ENCRYPT_OP, &sev_cmd.raw); \ - r ?: sev_cmd.c.error; \ -}) - -#define vm_sev_ioctl(vm, cmd, arg) \ -({ \ - int ret = __vm_sev_ioctl(vm, cmd, arg); \ - \ - __TEST_ASSERT_VM_VCPU_IOCTL(!ret, #cmd, ret, vm); \ -}) - -void sev_vm_init(struct kvm_vm *vm); -void sev_es_vm_init(struct kvm_vm *vm); - -static inline void sev_register_encrypted_memory(struct kvm_vm *vm, - struct userspace_mem_region *region) -{ - struct kvm_enc_region range = { - .addr = region->region.userspace_addr, - .size = region->region.memory_size, - }; - - vm_ioctl(vm, KVM_MEMORY_ENCRYPT_REG_REGION, &range); -} - -static inline void sev_launch_update_data(struct kvm_vm *vm, vm_paddr_t gpa, - uint64_t size) -{ - struct kvm_sev_launch_update_data update_data = { - .uaddr = (unsigned long)addr_gpa2hva(vm, gpa), - .len = size, - }; - - vm_sev_ioctl(vm, KVM_SEV_LAUNCH_UPDATE_DATA, &update_data); -} - -#endif /* SELFTEST_KVM_SEV_H */ diff --git a/tools/testing/selftests/kvm/include/x86_64/svm.h b/tools/testing/selftests/kvm/include/x86_64/svm.h deleted file mode 100644 index 4803e1056055..000000000000 --- a/tools/testing/selftests/kvm/include/x86_64/svm.h +++ /dev/null @@ -1,326 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * tools/testing/selftests/kvm/include/x86_64/svm.h - * This is a copy of arch/x86/include/asm/svm.h - * - */ - -#ifndef SELFTEST_KVM_SVM_H -#define SELFTEST_KVM_SVM_H - -enum { - INTERCEPT_INTR, - INTERCEPT_NMI, - INTERCEPT_SMI, - INTERCEPT_INIT, - INTERCEPT_VINTR, - INTERCEPT_SELECTIVE_CR0, - INTERCEPT_STORE_IDTR, - INTERCEPT_STORE_GDTR, - INTERCEPT_STORE_LDTR, - INTERCEPT_STORE_TR, - INTERCEPT_LOAD_IDTR, - INTERCEPT_LOAD_GDTR, - INTERCEPT_LOAD_LDTR, - INTERCEPT_LOAD_TR, - INTERCEPT_RDTSC, - INTERCEPT_RDPMC, - INTERCEPT_PUSHF, - INTERCEPT_POPF, - INTERCEPT_CPUID, - INTERCEPT_RSM, - INTERCEPT_IRET, - INTERCEPT_INTn, - INTERCEPT_INVD, - INTERCEPT_PAUSE, - INTERCEPT_HLT, - INTERCEPT_INVLPG, - INTERCEPT_INVLPGA, - INTERCEPT_IOIO_PROT, - INTERCEPT_MSR_PROT, - INTERCEPT_TASK_SWITCH, - INTERCEPT_FERR_FREEZE, - INTERCEPT_SHUTDOWN, - INTERCEPT_VMRUN, - INTERCEPT_VMMCALL, - INTERCEPT_VMLOAD, - INTERCEPT_VMSAVE, - INTERCEPT_STGI, - INTERCEPT_CLGI, - INTERCEPT_SKINIT, - INTERCEPT_RDTSCP, - INTERCEPT_ICEBP, - INTERCEPT_WBINVD, - INTERCEPT_MONITOR, - INTERCEPT_MWAIT, - INTERCEPT_MWAIT_COND, - INTERCEPT_XSETBV, - INTERCEPT_RDPRU, -}; - -struct hv_vmcb_enlightenments { - struct __packed hv_enlightenments_control { - u32 nested_flush_hypercall:1; - u32 msr_bitmap:1; - u32 enlightened_npt_tlb: 1; - u32 reserved:29; - } __packed hv_enlightenments_control; - u32 hv_vp_id; - u64 hv_vm_id; - u64 partition_assist_page; - u64 reserved; -} __packed; - -/* - * Hyper-V uses the software reserved clean bit in VMCB - */ -#define HV_VMCB_NESTED_ENLIGHTENMENTS (1U << 31) - -/* Synthetic VM-Exit */ -#define HV_SVM_EXITCODE_ENL 0xf0000000 -#define HV_SVM_ENL_EXITCODE_TRAP_AFTER_FLUSH (1) - -struct __attribute__ ((__packed__)) vmcb_control_area { - u32 intercept_cr; - u32 intercept_dr; - u32 intercept_exceptions; - u64 intercept; - u8 reserved_1[40]; - u16 pause_filter_thresh; - u16 pause_filter_count; - u64 iopm_base_pa; - u64 msrpm_base_pa; - u64 tsc_offset; - u32 asid; - u8 tlb_ctl; - u8 reserved_2[3]; - u32 int_ctl; - u32 int_vector; - u32 int_state; - u8 reserved_3[4]; - u32 exit_code; - u32 exit_code_hi; - u64 exit_info_1; - u64 exit_info_2; - u32 exit_int_info; - u32 exit_int_info_err; - u64 nested_ctl; - u64 avic_vapic_bar; - u8 reserved_4[8]; - u32 event_inj; - u32 event_inj_err; - u64 nested_cr3; - u64 virt_ext; - u32 clean; - u32 reserved_5; - u64 next_rip; - u8 insn_len; - u8 insn_bytes[15]; - u64 avic_backing_page; /* Offset 0xe0 */ - u8 reserved_6[8]; /* Offset 0xe8 */ - u64 avic_logical_id; /* Offset 0xf0 */ - u64 avic_physical_id; /* Offset 0xf8 */ - u8 reserved_7[8]; - u64 vmsa_pa; /* Used for an SEV-ES guest */ - u8 reserved_8[720]; - /* - * Offset 0x3e0, 32 bytes reserved - * for use by hypervisor/software. - */ - union { - struct hv_vmcb_enlightenments hv_enlightenments; - u8 reserved_sw[32]; - }; -}; - - -#define TLB_CONTROL_DO_NOTHING 0 -#define TLB_CONTROL_FLUSH_ALL_ASID 1 -#define TLB_CONTROL_FLUSH_ASID 3 -#define TLB_CONTROL_FLUSH_ASID_LOCAL 7 - -#define V_TPR_MASK 0x0f - -#define V_IRQ_SHIFT 8 -#define V_IRQ_MASK (1 << V_IRQ_SHIFT) - -#define V_GIF_SHIFT 9 -#define V_GIF_MASK (1 << V_GIF_SHIFT) - -#define V_INTR_PRIO_SHIFT 16 -#define V_INTR_PRIO_MASK (0x0f << V_INTR_PRIO_SHIFT) - -#define V_IGN_TPR_SHIFT 20 -#define V_IGN_TPR_MASK (1 << V_IGN_TPR_SHIFT) - -#define V_INTR_MASKING_SHIFT 24 -#define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT) - -#define V_GIF_ENABLE_SHIFT 25 -#define V_GIF_ENABLE_MASK (1 << V_GIF_ENABLE_SHIFT) - -#define AVIC_ENABLE_SHIFT 31 -#define AVIC_ENABLE_MASK (1 << AVIC_ENABLE_SHIFT) - -#define LBR_CTL_ENABLE_MASK BIT_ULL(0) -#define VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK BIT_ULL(1) - -#define SVM_INTERRUPT_SHADOW_MASK 1 - -#define SVM_IOIO_STR_SHIFT 2 -#define SVM_IOIO_REP_SHIFT 3 -#define SVM_IOIO_SIZE_SHIFT 4 -#define SVM_IOIO_ASIZE_SHIFT 7 - -#define SVM_IOIO_TYPE_MASK 1 -#define SVM_IOIO_STR_MASK (1 << SVM_IOIO_STR_SHIFT) -#define SVM_IOIO_REP_MASK (1 << SVM_IOIO_REP_SHIFT) -#define SVM_IOIO_SIZE_MASK (7 << SVM_IOIO_SIZE_SHIFT) -#define SVM_IOIO_ASIZE_MASK (7 << SVM_IOIO_ASIZE_SHIFT) - -#define SVM_VM_CR_VALID_MASK 0x001fULL -#define SVM_VM_CR_SVM_LOCK_MASK 0x0008ULL -#define SVM_VM_CR_SVM_DIS_MASK 0x0010ULL - -#define SVM_NESTED_CTL_NP_ENABLE BIT(0) -#define SVM_NESTED_CTL_SEV_ENABLE BIT(1) - -struct __attribute__ ((__packed__)) vmcb_seg { - u16 selector; - u16 attrib; - u32 limit; - u64 base; -}; - -struct __attribute__ ((__packed__)) vmcb_save_area { - struct vmcb_seg es; - struct vmcb_seg cs; - struct vmcb_seg ss; - struct vmcb_seg ds; - struct vmcb_seg fs; - struct vmcb_seg gs; - struct vmcb_seg gdtr; - struct vmcb_seg ldtr; - struct vmcb_seg idtr; - struct vmcb_seg tr; - u8 reserved_1[43]; - u8 cpl; - u8 reserved_2[4]; - u64 efer; - u8 reserved_3[112]; - u64 cr4; - u64 cr3; - u64 cr0; - u64 dr7; - u64 dr6; - u64 rflags; - u64 rip; - u8 reserved_4[88]; - u64 rsp; - u8 reserved_5[24]; - u64 rax; - u64 star; - u64 lstar; - u64 cstar; - u64 sfmask; - u64 kernel_gs_base; - u64 sysenter_cs; - u64 sysenter_esp; - u64 sysenter_eip; - u64 cr2; - u8 reserved_6[32]; - u64 g_pat; - u64 dbgctl; - u64 br_from; - u64 br_to; - u64 last_excp_from; - u64 last_excp_to; -}; - -struct __attribute__ ((__packed__)) vmcb { - struct vmcb_control_area control; - struct vmcb_save_area save; -}; - -#define SVM_VM_CR_SVM_DISABLE 4 - -#define SVM_SELECTOR_S_SHIFT 4 -#define SVM_SELECTOR_DPL_SHIFT 5 -#define SVM_SELECTOR_P_SHIFT 7 -#define SVM_SELECTOR_AVL_SHIFT 8 -#define SVM_SELECTOR_L_SHIFT 9 -#define SVM_SELECTOR_DB_SHIFT 10 -#define SVM_SELECTOR_G_SHIFT 11 - -#define SVM_SELECTOR_TYPE_MASK (0xf) -#define SVM_SELECTOR_S_MASK (1 << SVM_SELECTOR_S_SHIFT) -#define SVM_SELECTOR_DPL_MASK (3 << SVM_SELECTOR_DPL_SHIFT) -#define SVM_SELECTOR_P_MASK (1 << SVM_SELECTOR_P_SHIFT) -#define SVM_SELECTOR_AVL_MASK (1 << SVM_SELECTOR_AVL_SHIFT) -#define SVM_SELECTOR_L_MASK (1 << SVM_SELECTOR_L_SHIFT) -#define SVM_SELECTOR_DB_MASK (1 << SVM_SELECTOR_DB_SHIFT) -#define SVM_SELECTOR_G_MASK (1 << SVM_SELECTOR_G_SHIFT) - -#define SVM_SELECTOR_WRITE_MASK (1 << 1) -#define SVM_SELECTOR_READ_MASK SVM_SELECTOR_WRITE_MASK -#define SVM_SELECTOR_CODE_MASK (1 << 3) - -#define INTERCEPT_CR0_READ 0 -#define INTERCEPT_CR3_READ 3 -#define INTERCEPT_CR4_READ 4 -#define INTERCEPT_CR8_READ 8 -#define INTERCEPT_CR0_WRITE (16 + 0) -#define INTERCEPT_CR3_WRITE (16 + 3) -#define INTERCEPT_CR4_WRITE (16 + 4) -#define INTERCEPT_CR8_WRITE (16 + 8) - -#define INTERCEPT_DR0_READ 0 -#define INTERCEPT_DR1_READ 1 -#define INTERCEPT_DR2_READ 2 -#define INTERCEPT_DR3_READ 3 -#define INTERCEPT_DR4_READ 4 -#define INTERCEPT_DR5_READ 5 -#define INTERCEPT_DR6_READ 6 -#define INTERCEPT_DR7_READ 7 -#define INTERCEPT_DR0_WRITE (16 + 0) -#define INTERCEPT_DR1_WRITE (16 + 1) -#define INTERCEPT_DR2_WRITE (16 + 2) -#define INTERCEPT_DR3_WRITE (16 + 3) -#define INTERCEPT_DR4_WRITE (16 + 4) -#define INTERCEPT_DR5_WRITE (16 + 5) -#define INTERCEPT_DR6_WRITE (16 + 6) -#define INTERCEPT_DR7_WRITE (16 + 7) - -#define SVM_EVTINJ_VEC_MASK 0xff - -#define SVM_EVTINJ_TYPE_SHIFT 8 -#define SVM_EVTINJ_TYPE_MASK (7 << SVM_EVTINJ_TYPE_SHIFT) - -#define SVM_EVTINJ_TYPE_INTR (0 << SVM_EVTINJ_TYPE_SHIFT) -#define SVM_EVTINJ_TYPE_NMI (2 << SVM_EVTINJ_TYPE_SHIFT) -#define SVM_EVTINJ_TYPE_EXEPT (3 << SVM_EVTINJ_TYPE_SHIFT) -#define SVM_EVTINJ_TYPE_SOFT (4 << SVM_EVTINJ_TYPE_SHIFT) - -#define SVM_EVTINJ_VALID (1 << 31) -#define SVM_EVTINJ_VALID_ERR (1 << 11) - -#define SVM_EXITINTINFO_VEC_MASK SVM_EVTINJ_VEC_MASK -#define SVM_EXITINTINFO_TYPE_MASK SVM_EVTINJ_TYPE_MASK - -#define SVM_EXITINTINFO_TYPE_INTR SVM_EVTINJ_TYPE_INTR -#define SVM_EXITINTINFO_TYPE_NMI SVM_EVTINJ_TYPE_NMI -#define SVM_EXITINTINFO_TYPE_EXEPT SVM_EVTINJ_TYPE_EXEPT -#define SVM_EXITINTINFO_TYPE_SOFT SVM_EVTINJ_TYPE_SOFT - -#define SVM_EXITINTINFO_VALID SVM_EVTINJ_VALID -#define SVM_EXITINTINFO_VALID_ERR SVM_EVTINJ_VALID_ERR - -#define SVM_EXITINFOSHIFT_TS_REASON_IRET 36 -#define SVM_EXITINFOSHIFT_TS_REASON_JMP 38 -#define SVM_EXITINFOSHIFT_TS_HAS_ERROR_CODE 44 - -#define SVM_EXITINFO_REG_MASK 0x0F - -#define SVM_CR0_SELECTIVE_MASK (X86_CR0_TS | X86_CR0_MP) - -#endif /* SELFTEST_KVM_SVM_H */ diff --git a/tools/testing/selftests/kvm/include/x86_64/svm_util.h b/tools/testing/selftests/kvm/include/x86_64/svm_util.h deleted file mode 100644 index 044f0f872ba9..000000000000 --- a/tools/testing/selftests/kvm/include/x86_64/svm_util.h +++ /dev/null @@ -1,65 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * tools/testing/selftests/kvm/include/x86_64/svm_utils.h - * Header for nested SVM testing - * - * Copyright (C) 2020, Red Hat, Inc. - */ - -#ifndef SELFTEST_KVM_SVM_UTILS_H -#define SELFTEST_KVM_SVM_UTILS_H - -#include - -#include -#include "svm.h" -#include "processor.h" - -struct svm_test_data { - /* VMCB */ - struct vmcb *vmcb; /* gva */ - void *vmcb_hva; - uint64_t vmcb_gpa; - - /* host state-save area */ - struct vmcb_save_area *save_area; /* gva */ - void *save_area_hva; - uint64_t save_area_gpa; - - /* MSR-Bitmap */ - void *msr; /* gva */ - void *msr_hva; - uint64_t msr_gpa; -}; - -static inline void vmmcall(void) -{ - /* - * Stuff RAX and RCX with "safe" values to make sure L0 doesn't handle - * it as a valid hypercall (e.g. Hyper-V L2 TLB flush) as the intended - * use of this function is to exit to L1 from L2. Clobber all other - * GPRs as L1 doesn't correctly preserve them during vmexits. - */ - __asm__ __volatile__("push %%rbp; vmmcall; pop %%rbp" - : : "a"(0xdeadbeef), "c"(0xbeefdead) - : "rbx", "rdx", "rsi", "rdi", "r8", "r9", - "r10", "r11", "r12", "r13", "r14", "r15"); -} - -#define stgi() \ - __asm__ __volatile__( \ - "stgi\n" \ - ) - -#define clgi() \ - __asm__ __volatile__( \ - "clgi\n" \ - ) - -struct svm_test_data *vcpu_alloc_svm(struct kvm_vm *vm, vm_vaddr_t *p_svm_gva); -void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_rsp); -void run_guest(struct vmcb *vmcb, uint64_t vmcb_gpa); - -int open_sev_dev_path_or_exit(void); - -#endif /* SELFTEST_KVM_SVM_UTILS_H */ diff --git a/tools/testing/selftests/kvm/include/x86_64/ucall.h b/tools/testing/selftests/kvm/include/x86_64/ucall.h deleted file mode 100644 index d3825dcc3cd9..000000000000 --- a/tools/testing/selftests/kvm/include/x86_64/ucall.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -#ifndef SELFTEST_KVM_UCALL_H -#define SELFTEST_KVM_UCALL_H - -#include "kvm_util.h" - -#define UCALL_EXIT_REASON KVM_EXIT_IO - -static inline void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) -{ -} - -#endif diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h deleted file mode 100644 index 5f0c0a29c556..000000000000 --- a/tools/testing/selftests/kvm/include/x86_64/vmx.h +++ /dev/null @@ -1,577 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * tools/testing/selftests/kvm/include/x86_64/vmx.h - * - * Copyright (C) 2018, Google LLC. - */ - -#ifndef SELFTEST_KVM_VMX_H -#define SELFTEST_KVM_VMX_H - -#include - -#include -#include "processor.h" -#include "apic.h" - -/* - * Definitions of Primary Processor-Based VM-Execution Controls. - */ -#define CPU_BASED_INTR_WINDOW_EXITING 0x00000004 -#define CPU_BASED_USE_TSC_OFFSETTING 0x00000008 -#define CPU_BASED_HLT_EXITING 0x00000080 -#define CPU_BASED_INVLPG_EXITING 0x00000200 -#define CPU_BASED_MWAIT_EXITING 0x00000400 -#define CPU_BASED_RDPMC_EXITING 0x00000800 -#define CPU_BASED_RDTSC_EXITING 0x00001000 -#define CPU_BASED_CR3_LOAD_EXITING 0x00008000 -#define CPU_BASED_CR3_STORE_EXITING 0x00010000 -#define CPU_BASED_CR8_LOAD_EXITING 0x00080000 -#define CPU_BASED_CR8_STORE_EXITING 0x00100000 -#define CPU_BASED_TPR_SHADOW 0x00200000 -#define CPU_BASED_NMI_WINDOW_EXITING 0x00400000 -#define CPU_BASED_MOV_DR_EXITING 0x00800000 -#define CPU_BASED_UNCOND_IO_EXITING 0x01000000 -#define CPU_BASED_USE_IO_BITMAPS 0x02000000 -#define CPU_BASED_MONITOR_TRAP 0x08000000 -#define CPU_BASED_USE_MSR_BITMAPS 0x10000000 -#define CPU_BASED_MONITOR_EXITING 0x20000000 -#define CPU_BASED_PAUSE_EXITING 0x40000000 -#define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS 0x80000000 - -#define CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x0401e172 - -/* - * Definitions of Secondary Processor-Based VM-Execution Controls. - */ -#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 -#define SECONDARY_EXEC_ENABLE_EPT 0x00000002 -#define SECONDARY_EXEC_DESC 0x00000004 -#define SECONDARY_EXEC_ENABLE_RDTSCP 0x00000008 -#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE 0x00000010 -#define SECONDARY_EXEC_ENABLE_VPID 0x00000020 -#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 -#define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 -#define SECONDARY_EXEC_APIC_REGISTER_VIRT 0x00000100 -#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200 -#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 -#define SECONDARY_EXEC_RDRAND_EXITING 0x00000800 -#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 -#define SECONDARY_EXEC_ENABLE_VMFUNC 0x00002000 -#define SECONDARY_EXEC_SHADOW_VMCS 0x00004000 -#define SECONDARY_EXEC_RDSEED_EXITING 0x00010000 -#define SECONDARY_EXEC_ENABLE_PML 0x00020000 -#define SECONDARY_EPT_VE 0x00040000 -#define SECONDARY_ENABLE_XSAV_RESTORE 0x00100000 -#define SECONDARY_EXEC_TSC_SCALING 0x02000000 - -#define PIN_BASED_EXT_INTR_MASK 0x00000001 -#define PIN_BASED_NMI_EXITING 0x00000008 -#define PIN_BASED_VIRTUAL_NMIS 0x00000020 -#define PIN_BASED_VMX_PREEMPTION_TIMER 0x00000040 -#define PIN_BASED_POSTED_INTR 0x00000080 - -#define PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x00000016 - -#define VM_EXIT_SAVE_DEBUG_CONTROLS 0x00000004 -#define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200 -#define VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL 0x00001000 -#define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000 -#define VM_EXIT_SAVE_IA32_PAT 0x00040000 -#define VM_EXIT_LOAD_IA32_PAT 0x00080000 -#define VM_EXIT_SAVE_IA32_EFER 0x00100000 -#define VM_EXIT_LOAD_IA32_EFER 0x00200000 -#define VM_EXIT_SAVE_VMX_PREEMPTION_TIMER 0x00400000 - -#define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR 0x00036dff - -#define VM_ENTRY_LOAD_DEBUG_CONTROLS 0x00000004 -#define VM_ENTRY_IA32E_MODE 0x00000200 -#define VM_ENTRY_SMM 0x00000400 -#define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800 -#define VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL 0x00002000 -#define VM_ENTRY_LOAD_IA32_PAT 0x00004000 -#define VM_ENTRY_LOAD_IA32_EFER 0x00008000 - -#define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x000011ff - -#define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f -#define VMX_MISC_SAVE_EFER_LMA 0x00000020 - -#define VMX_EPT_VPID_CAP_1G_PAGES 0x00020000 -#define VMX_EPT_VPID_CAP_AD_BITS 0x00200000 - -#define EXIT_REASON_FAILED_VMENTRY 0x80000000 - -enum vmcs_field { - VIRTUAL_PROCESSOR_ID = 0x00000000, - POSTED_INTR_NV = 0x00000002, - GUEST_ES_SELECTOR = 0x00000800, - GUEST_CS_SELECTOR = 0x00000802, - GUEST_SS_SELECTOR = 0x00000804, - GUEST_DS_SELECTOR = 0x00000806, - GUEST_FS_SELECTOR = 0x00000808, - GUEST_GS_SELECTOR = 0x0000080a, - GUEST_LDTR_SELECTOR = 0x0000080c, - GUEST_TR_SELECTOR = 0x0000080e, - GUEST_INTR_STATUS = 0x00000810, - GUEST_PML_INDEX = 0x00000812, - HOST_ES_SELECTOR = 0x00000c00, - HOST_CS_SELECTOR = 0x00000c02, - HOST_SS_SELECTOR = 0x00000c04, - HOST_DS_SELECTOR = 0x00000c06, - HOST_FS_SELECTOR = 0x00000c08, - HOST_GS_SELECTOR = 0x00000c0a, - HOST_TR_SELECTOR = 0x00000c0c, - IO_BITMAP_A = 0x00002000, - IO_BITMAP_A_HIGH = 0x00002001, - IO_BITMAP_B = 0x00002002, - IO_BITMAP_B_HIGH = 0x00002003, - MSR_BITMAP = 0x00002004, - MSR_BITMAP_HIGH = 0x00002005, - VM_EXIT_MSR_STORE_ADDR = 0x00002006, - VM_EXIT_MSR_STORE_ADDR_HIGH = 0x00002007, - VM_EXIT_MSR_LOAD_ADDR = 0x00002008, - VM_EXIT_MSR_LOAD_ADDR_HIGH = 0x00002009, - VM_ENTRY_MSR_LOAD_ADDR = 0x0000200a, - VM_ENTRY_MSR_LOAD_ADDR_HIGH = 0x0000200b, - PML_ADDRESS = 0x0000200e, - PML_ADDRESS_HIGH = 0x0000200f, - TSC_OFFSET = 0x00002010, - TSC_OFFSET_HIGH = 0x00002011, - VIRTUAL_APIC_PAGE_ADDR = 0x00002012, - VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x00002013, - APIC_ACCESS_ADDR = 0x00002014, - APIC_ACCESS_ADDR_HIGH = 0x00002015, - POSTED_INTR_DESC_ADDR = 0x00002016, - POSTED_INTR_DESC_ADDR_HIGH = 0x00002017, - EPT_POINTER = 0x0000201a, - EPT_POINTER_HIGH = 0x0000201b, - EOI_EXIT_BITMAP0 = 0x0000201c, - EOI_EXIT_BITMAP0_HIGH = 0x0000201d, - EOI_EXIT_BITMAP1 = 0x0000201e, - EOI_EXIT_BITMAP1_HIGH = 0x0000201f, - EOI_EXIT_BITMAP2 = 0x00002020, - EOI_EXIT_BITMAP2_HIGH = 0x00002021, - EOI_EXIT_BITMAP3 = 0x00002022, - EOI_EXIT_BITMAP3_HIGH = 0x00002023, - VMREAD_BITMAP = 0x00002026, - VMREAD_BITMAP_HIGH = 0x00002027, - VMWRITE_BITMAP = 0x00002028, - VMWRITE_BITMAP_HIGH = 0x00002029, - XSS_EXIT_BITMAP = 0x0000202C, - XSS_EXIT_BITMAP_HIGH = 0x0000202D, - ENCLS_EXITING_BITMAP = 0x0000202E, - ENCLS_EXITING_BITMAP_HIGH = 0x0000202F, - TSC_MULTIPLIER = 0x00002032, - TSC_MULTIPLIER_HIGH = 0x00002033, - GUEST_PHYSICAL_ADDRESS = 0x00002400, - GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401, - VMCS_LINK_POINTER = 0x00002800, - VMCS_LINK_POINTER_HIGH = 0x00002801, - GUEST_IA32_DEBUGCTL = 0x00002802, - GUEST_IA32_DEBUGCTL_HIGH = 0x00002803, - GUEST_IA32_PAT = 0x00002804, - GUEST_IA32_PAT_HIGH = 0x00002805, - GUEST_IA32_EFER = 0x00002806, - GUEST_IA32_EFER_HIGH = 0x00002807, - GUEST_IA32_PERF_GLOBAL_CTRL = 0x00002808, - GUEST_IA32_PERF_GLOBAL_CTRL_HIGH= 0x00002809, - GUEST_PDPTR0 = 0x0000280a, - GUEST_PDPTR0_HIGH = 0x0000280b, - GUEST_PDPTR1 = 0x0000280c, - GUEST_PDPTR1_HIGH = 0x0000280d, - GUEST_PDPTR2 = 0x0000280e, - GUEST_PDPTR2_HIGH = 0x0000280f, - GUEST_PDPTR3 = 0x00002810, - GUEST_PDPTR3_HIGH = 0x00002811, - GUEST_BNDCFGS = 0x00002812, - GUEST_BNDCFGS_HIGH = 0x00002813, - HOST_IA32_PAT = 0x00002c00, - HOST_IA32_PAT_HIGH = 0x00002c01, - HOST_IA32_EFER = 0x00002c02, - HOST_IA32_EFER_HIGH = 0x00002c03, - HOST_IA32_PERF_GLOBAL_CTRL = 0x00002c04, - HOST_IA32_PERF_GLOBAL_CTRL_HIGH = 0x00002c05, - PIN_BASED_VM_EXEC_CONTROL = 0x00004000, - CPU_BASED_VM_EXEC_CONTROL = 0x00004002, - EXCEPTION_BITMAP = 0x00004004, - PAGE_FAULT_ERROR_CODE_MASK = 0x00004006, - PAGE_FAULT_ERROR_CODE_MATCH = 0x00004008, - CR3_TARGET_COUNT = 0x0000400a, - VM_EXIT_CONTROLS = 0x0000400c, - VM_EXIT_MSR_STORE_COUNT = 0x0000400e, - VM_EXIT_MSR_LOAD_COUNT = 0x00004010, - VM_ENTRY_CONTROLS = 0x00004012, - VM_ENTRY_MSR_LOAD_COUNT = 0x00004014, - VM_ENTRY_INTR_INFO_FIELD = 0x00004016, - VM_ENTRY_EXCEPTION_ERROR_CODE = 0x00004018, - VM_ENTRY_INSTRUCTION_LEN = 0x0000401a, - TPR_THRESHOLD = 0x0000401c, - SECONDARY_VM_EXEC_CONTROL = 0x0000401e, - PLE_GAP = 0x00004020, - PLE_WINDOW = 0x00004022, - VM_INSTRUCTION_ERROR = 0x00004400, - VM_EXIT_REASON = 0x00004402, - VM_EXIT_INTR_INFO = 0x00004404, - VM_EXIT_INTR_ERROR_CODE = 0x00004406, - IDT_VECTORING_INFO_FIELD = 0x00004408, - IDT_VECTORING_ERROR_CODE = 0x0000440a, - VM_EXIT_INSTRUCTION_LEN = 0x0000440c, - VMX_INSTRUCTION_INFO = 0x0000440e, - GUEST_ES_LIMIT = 0x00004800, - GUEST_CS_LIMIT = 0x00004802, - GUEST_SS_LIMIT = 0x00004804, - GUEST_DS_LIMIT = 0x00004806, - GUEST_FS_LIMIT = 0x00004808, - GUEST_GS_LIMIT = 0x0000480a, - GUEST_LDTR_LIMIT = 0x0000480c, - GUEST_TR_LIMIT = 0x0000480e, - GUEST_GDTR_LIMIT = 0x00004810, - GUEST_IDTR_LIMIT = 0x00004812, - GUEST_ES_AR_BYTES = 0x00004814, - GUEST_CS_AR_BYTES = 0x00004816, - GUEST_SS_AR_BYTES = 0x00004818, - GUEST_DS_AR_BYTES = 0x0000481a, - GUEST_FS_AR_BYTES = 0x0000481c, - GUEST_GS_AR_BYTES = 0x0000481e, - GUEST_LDTR_AR_BYTES = 0x00004820, - GUEST_TR_AR_BYTES = 0x00004822, - GUEST_INTERRUPTIBILITY_INFO = 0x00004824, - GUEST_ACTIVITY_STATE = 0X00004826, - GUEST_SYSENTER_CS = 0x0000482A, - VMX_PREEMPTION_TIMER_VALUE = 0x0000482E, - HOST_IA32_SYSENTER_CS = 0x00004c00, - CR0_GUEST_HOST_MASK = 0x00006000, - CR4_GUEST_HOST_MASK = 0x00006002, - CR0_READ_SHADOW = 0x00006004, - CR4_READ_SHADOW = 0x00006006, - CR3_TARGET_VALUE0 = 0x00006008, - CR3_TARGET_VALUE1 = 0x0000600a, - CR3_TARGET_VALUE2 = 0x0000600c, - CR3_TARGET_VALUE3 = 0x0000600e, - EXIT_QUALIFICATION = 0x00006400, - GUEST_LINEAR_ADDRESS = 0x0000640a, - GUEST_CR0 = 0x00006800, - GUEST_CR3 = 0x00006802, - GUEST_CR4 = 0x00006804, - GUEST_ES_BASE = 0x00006806, - GUEST_CS_BASE = 0x00006808, - GUEST_SS_BASE = 0x0000680a, - GUEST_DS_BASE = 0x0000680c, - GUEST_FS_BASE = 0x0000680e, - GUEST_GS_BASE = 0x00006810, - GUEST_LDTR_BASE = 0x00006812, - GUEST_TR_BASE = 0x00006814, - GUEST_GDTR_BASE = 0x00006816, - GUEST_IDTR_BASE = 0x00006818, - GUEST_DR7 = 0x0000681a, - GUEST_RSP = 0x0000681c, - GUEST_RIP = 0x0000681e, - GUEST_RFLAGS = 0x00006820, - GUEST_PENDING_DBG_EXCEPTIONS = 0x00006822, - GUEST_SYSENTER_ESP = 0x00006824, - GUEST_SYSENTER_EIP = 0x00006826, - HOST_CR0 = 0x00006c00, - HOST_CR3 = 0x00006c02, - HOST_CR4 = 0x00006c04, - HOST_FS_BASE = 0x00006c06, - HOST_GS_BASE = 0x00006c08, - HOST_TR_BASE = 0x00006c0a, - HOST_GDTR_BASE = 0x00006c0c, - HOST_IDTR_BASE = 0x00006c0e, - HOST_IA32_SYSENTER_ESP = 0x00006c10, - HOST_IA32_SYSENTER_EIP = 0x00006c12, - HOST_RSP = 0x00006c14, - HOST_RIP = 0x00006c16, -}; - -struct vmx_msr_entry { - uint32_t index; - uint32_t reserved; - uint64_t value; -} __attribute__ ((aligned(16))); - -#include "evmcs.h" - -static inline int vmxon(uint64_t phys) -{ - uint8_t ret; - - __asm__ __volatile__ ("vmxon %[pa]; setna %[ret]" - : [ret]"=rm"(ret) - : [pa]"m"(phys) - : "cc", "memory"); - - return ret; -} - -static inline void vmxoff(void) -{ - __asm__ __volatile__("vmxoff"); -} - -static inline int vmclear(uint64_t vmcs_pa) -{ - uint8_t ret; - - __asm__ __volatile__ ("vmclear %[pa]; setna %[ret]" - : [ret]"=rm"(ret) - : [pa]"m"(vmcs_pa) - : "cc", "memory"); - - return ret; -} - -static inline int vmptrld(uint64_t vmcs_pa) -{ - uint8_t ret; - - if (enable_evmcs) - return -1; - - __asm__ __volatile__ ("vmptrld %[pa]; setna %[ret]" - : [ret]"=rm"(ret) - : [pa]"m"(vmcs_pa) - : "cc", "memory"); - - return ret; -} - -static inline int vmptrst(uint64_t *value) -{ - uint64_t tmp; - uint8_t ret; - - if (enable_evmcs) - return evmcs_vmptrst(value); - - __asm__ __volatile__("vmptrst %[value]; setna %[ret]" - : [value]"=m"(tmp), [ret]"=rm"(ret) - : : "cc", "memory"); - - *value = tmp; - return ret; -} - -/* - * A wrapper around vmptrst that ignores errors and returns zero if the - * vmptrst instruction fails. - */ -static inline uint64_t vmptrstz(void) -{ - uint64_t value = 0; - vmptrst(&value); - return value; -} - -/* - * No guest state (e.g. GPRs) is established by this vmlaunch. - */ -static inline int vmlaunch(void) -{ - int ret; - - if (enable_evmcs) - return evmcs_vmlaunch(); - - __asm__ __volatile__("push %%rbp;" - "push %%rcx;" - "push %%rdx;" - "push %%rsi;" - "push %%rdi;" - "push $0;" - "vmwrite %%rsp, %[host_rsp];" - "lea 1f(%%rip), %%rax;" - "vmwrite %%rax, %[host_rip];" - "vmlaunch;" - "incq (%%rsp);" - "1: pop %%rax;" - "pop %%rdi;" - "pop %%rsi;" - "pop %%rdx;" - "pop %%rcx;" - "pop %%rbp;" - : [ret]"=&a"(ret) - : [host_rsp]"r"((uint64_t)HOST_RSP), - [host_rip]"r"((uint64_t)HOST_RIP) - : "memory", "cc", "rbx", "r8", "r9", "r10", - "r11", "r12", "r13", "r14", "r15"); - return ret; -} - -/* - * No guest state (e.g. GPRs) is established by this vmresume. - */ -static inline int vmresume(void) -{ - int ret; - - if (enable_evmcs) - return evmcs_vmresume(); - - __asm__ __volatile__("push %%rbp;" - "push %%rcx;" - "push %%rdx;" - "push %%rsi;" - "push %%rdi;" - "push $0;" - "vmwrite %%rsp, %[host_rsp];" - "lea 1f(%%rip), %%rax;" - "vmwrite %%rax, %[host_rip];" - "vmresume;" - "incq (%%rsp);" - "1: pop %%rax;" - "pop %%rdi;" - "pop %%rsi;" - "pop %%rdx;" - "pop %%rcx;" - "pop %%rbp;" - : [ret]"=&a"(ret) - : [host_rsp]"r"((uint64_t)HOST_RSP), - [host_rip]"r"((uint64_t)HOST_RIP) - : "memory", "cc", "rbx", "r8", "r9", "r10", - "r11", "r12", "r13", "r14", "r15"); - return ret; -} - -static inline void vmcall(void) -{ - /* - * Stuff RAX and RCX with "safe" values to make sure L0 doesn't handle - * it as a valid hypercall (e.g. Hyper-V L2 TLB flush) as the intended - * use of this function is to exit to L1 from L2. Clobber all other - * GPRs as L1 doesn't correctly preserve them during vmexits. - */ - __asm__ __volatile__("push %%rbp; vmcall; pop %%rbp" - : : "a"(0xdeadbeef), "c"(0xbeefdead) - : "rbx", "rdx", "rsi", "rdi", "r8", "r9", - "r10", "r11", "r12", "r13", "r14", "r15"); -} - -static inline int vmread(uint64_t encoding, uint64_t *value) -{ - uint64_t tmp; - uint8_t ret; - - if (enable_evmcs) - return evmcs_vmread(encoding, value); - - __asm__ __volatile__("vmread %[encoding], %[value]; setna %[ret]" - : [value]"=rm"(tmp), [ret]"=rm"(ret) - : [encoding]"r"(encoding) - : "cc", "memory"); - - *value = tmp; - return ret; -} - -/* - * A wrapper around vmread that ignores errors and returns zero if the - * vmread instruction fails. - */ -static inline uint64_t vmreadz(uint64_t encoding) -{ - uint64_t value = 0; - vmread(encoding, &value); - return value; -} - -static inline int vmwrite(uint64_t encoding, uint64_t value) -{ - uint8_t ret; - - if (enable_evmcs) - return evmcs_vmwrite(encoding, value); - - __asm__ __volatile__ ("vmwrite %[value], %[encoding]; setna %[ret]" - : [ret]"=rm"(ret) - : [value]"rm"(value), [encoding]"r"(encoding) - : "cc", "memory"); - - return ret; -} - -static inline uint32_t vmcs_revision(void) -{ - return rdmsr(MSR_IA32_VMX_BASIC); -} - -struct vmx_pages { - void *vmxon_hva; - uint64_t vmxon_gpa; - void *vmxon; - - void *vmcs_hva; - uint64_t vmcs_gpa; - void *vmcs; - - void *msr_hva; - uint64_t msr_gpa; - void *msr; - - void *shadow_vmcs_hva; - uint64_t shadow_vmcs_gpa; - void *shadow_vmcs; - - void *vmread_hva; - uint64_t vmread_gpa; - void *vmread; - - void *vmwrite_hva; - uint64_t vmwrite_gpa; - void *vmwrite; - - void *eptp_hva; - uint64_t eptp_gpa; - void *eptp; - - void *apic_access_hva; - uint64_t apic_access_gpa; - void *apic_access; -}; - -union vmx_basic { - u64 val; - struct { - u32 revision; - u32 size:13, - reserved1:3, - width:1, - dual:1, - type:4, - insouts:1, - ctrl:1, - vm_entry_exception_ctrl:1, - reserved2:7; - }; -}; - -union vmx_ctrl_msr { - u64 val; - struct { - u32 set, clr; - }; -}; - -struct vmx_pages *vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva); -bool prepare_for_vmx_operation(struct vmx_pages *vmx); -void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp); -bool load_vmcs(struct vmx_pages *vmx); - -bool ept_1g_pages_supported(void); - -void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm, - uint64_t nested_paddr, uint64_t paddr); -void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm, - uint64_t nested_paddr, uint64_t paddr, uint64_t size); -void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm, - uint32_t memslot); -void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm, - uint64_t addr, uint64_t size); -bool kvm_cpu_has_ept(void); -void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm, - uint32_t eptp_memslot); -void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm); - -#endif /* SELFTEST_KVM_VMX_H */ diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic.c b/tools/testing/selftests/kvm/lib/aarch64/gic.c deleted file mode 100644 index 7abbf8866512..000000000000 --- a/tools/testing/selftests/kvm/lib/aarch64/gic.c +++ /dev/null @@ -1,157 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * ARM Generic Interrupt Controller (GIC) support - */ - -#include -#include -#include - -#include "kvm_util.h" - -#include -#include "gic_private.h" -#include "processor.h" -#include "spinlock.h" - -static const struct gic_common_ops *gic_common_ops; -static struct spinlock gic_lock; - -static void gic_cpu_init(unsigned int cpu) -{ - gic_common_ops->gic_cpu_init(cpu); -} - -static void gic_dist_init(enum gic_type type, unsigned int nr_cpus) -{ - const struct gic_common_ops *gic_ops = NULL; - - spin_lock(&gic_lock); - - /* Distributor initialization is needed only once per VM */ - if (gic_common_ops) { - spin_unlock(&gic_lock); - return; - } - - if (type == GIC_V3) - gic_ops = &gicv3_ops; - - GUEST_ASSERT(gic_ops); - - gic_ops->gic_init(nr_cpus); - gic_common_ops = gic_ops; - - /* Make sure that the initialized data is visible to all the vCPUs */ - dsb(sy); - - spin_unlock(&gic_lock); -} - -void gic_init(enum gic_type type, unsigned int nr_cpus) -{ - uint32_t cpu = guest_get_vcpuid(); - - GUEST_ASSERT(type < GIC_TYPE_MAX); - GUEST_ASSERT(nr_cpus); - - gic_dist_init(type, nr_cpus); - gic_cpu_init(cpu); -} - -void gic_irq_enable(unsigned int intid) -{ - GUEST_ASSERT(gic_common_ops); - gic_common_ops->gic_irq_enable(intid); -} - -void gic_irq_disable(unsigned int intid) -{ - GUEST_ASSERT(gic_common_ops); - gic_common_ops->gic_irq_disable(intid); -} - -unsigned int gic_get_and_ack_irq(void) -{ - uint64_t irqstat; - unsigned int intid; - - GUEST_ASSERT(gic_common_ops); - - irqstat = gic_common_ops->gic_read_iar(); - intid = irqstat & GENMASK(23, 0); - - return intid; -} - -void gic_set_eoi(unsigned int intid) -{ - GUEST_ASSERT(gic_common_ops); - gic_common_ops->gic_write_eoir(intid); -} - -void gic_set_dir(unsigned int intid) -{ - GUEST_ASSERT(gic_common_ops); - gic_common_ops->gic_write_dir(intid); -} - -void gic_set_eoi_split(bool split) -{ - GUEST_ASSERT(gic_common_ops); - gic_common_ops->gic_set_eoi_split(split); -} - -void gic_set_priority_mask(uint64_t pmr) -{ - GUEST_ASSERT(gic_common_ops); - gic_common_ops->gic_set_priority_mask(pmr); -} - -void gic_set_priority(unsigned int intid, unsigned int prio) -{ - GUEST_ASSERT(gic_common_ops); - gic_common_ops->gic_set_priority(intid, prio); -} - -void gic_irq_set_active(unsigned int intid) -{ - GUEST_ASSERT(gic_common_ops); - gic_common_ops->gic_irq_set_active(intid); -} - -void gic_irq_clear_active(unsigned int intid) -{ - GUEST_ASSERT(gic_common_ops); - gic_common_ops->gic_irq_clear_active(intid); -} - -bool gic_irq_get_active(unsigned int intid) -{ - GUEST_ASSERT(gic_common_ops); - return gic_common_ops->gic_irq_get_active(intid); -} - -void gic_irq_set_pending(unsigned int intid) -{ - GUEST_ASSERT(gic_common_ops); - gic_common_ops->gic_irq_set_pending(intid); -} - -void gic_irq_clear_pending(unsigned int intid) -{ - GUEST_ASSERT(gic_common_ops); - gic_common_ops->gic_irq_clear_pending(intid); -} - -bool gic_irq_get_pending(unsigned int intid) -{ - GUEST_ASSERT(gic_common_ops); - return gic_common_ops->gic_irq_get_pending(intid); -} - -void gic_irq_set_config(unsigned int intid, bool is_edge) -{ - GUEST_ASSERT(gic_common_ops); - gic_common_ops->gic_irq_set_config(intid, is_edge); -} diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_private.h b/tools/testing/selftests/kvm/lib/aarch64/gic_private.h deleted file mode 100644 index d24e9ecc96c6..000000000000 --- a/tools/testing/selftests/kvm/lib/aarch64/gic_private.h +++ /dev/null @@ -1,32 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * ARM Generic Interrupt Controller (GIC) private defines that's only - * shared among the GIC library code. - */ - -#ifndef SELFTEST_KVM_GIC_PRIVATE_H -#define SELFTEST_KVM_GIC_PRIVATE_H - -struct gic_common_ops { - void (*gic_init)(unsigned int nr_cpus); - void (*gic_cpu_init)(unsigned int cpu); - void (*gic_irq_enable)(unsigned int intid); - void (*gic_irq_disable)(unsigned int intid); - uint64_t (*gic_read_iar)(void); - void (*gic_write_eoir)(uint32_t irq); - void (*gic_write_dir)(uint32_t irq); - void (*gic_set_eoi_split)(bool split); - void (*gic_set_priority_mask)(uint64_t mask); - void (*gic_set_priority)(uint32_t intid, uint32_t prio); - void (*gic_irq_set_active)(uint32_t intid); - void (*gic_irq_clear_active)(uint32_t intid); - bool (*gic_irq_get_active)(uint32_t intid); - void (*gic_irq_set_pending)(uint32_t intid); - void (*gic_irq_clear_pending)(uint32_t intid); - bool (*gic_irq_get_pending)(uint32_t intid); - void (*gic_irq_set_config)(uint32_t intid, bool is_edge); -}; - -extern const struct gic_common_ops gicv3_ops; - -#endif /* SELFTEST_KVM_GIC_PRIVATE_H */ diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c b/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c deleted file mode 100644 index 66d05506f78b..000000000000 --- a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c +++ /dev/null @@ -1,427 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * ARM Generic Interrupt Controller (GIC) v3 support - */ - -#include - -#include "kvm_util.h" -#include "processor.h" -#include "delay.h" - -#include "gic.h" -#include "gic_v3.h" -#include "gic_private.h" - -#define GICV3_MAX_CPUS 512 - -#define GICD_INT_DEF_PRI 0xa0 -#define GICD_INT_DEF_PRI_X4 ((GICD_INT_DEF_PRI << 24) |\ - (GICD_INT_DEF_PRI << 16) |\ - (GICD_INT_DEF_PRI << 8) |\ - GICD_INT_DEF_PRI) - -#define ICC_PMR_DEF_PRIO 0xf0 - -struct gicv3_data { - unsigned int nr_cpus; - unsigned int nr_spis; -}; - -#define sgi_base_from_redist(redist_base) (redist_base + SZ_64K) -#define DIST_BIT (1U << 31) - -enum gicv3_intid_range { - SGI_RANGE, - PPI_RANGE, - SPI_RANGE, - INVALID_RANGE, -}; - -static struct gicv3_data gicv3_data; - -static void gicv3_gicd_wait_for_rwp(void) -{ - unsigned int count = 100000; /* 1s */ - - while (readl(GICD_BASE_GVA + GICD_CTLR) & GICD_CTLR_RWP) { - GUEST_ASSERT(count--); - udelay(10); - } -} - -static inline volatile void *gicr_base_cpu(uint32_t cpu) -{ - /* Align all the redistributors sequentially */ - return GICR_BASE_GVA + cpu * SZ_64K * 2; -} - -static void gicv3_gicr_wait_for_rwp(uint32_t cpu) -{ - unsigned int count = 100000; /* 1s */ - - while (readl(gicr_base_cpu(cpu) + GICR_CTLR) & GICR_CTLR_RWP) { - GUEST_ASSERT(count--); - udelay(10); - } -} - -static void gicv3_wait_for_rwp(uint32_t cpu_or_dist) -{ - if (cpu_or_dist & DIST_BIT) - gicv3_gicd_wait_for_rwp(); - else - gicv3_gicr_wait_for_rwp(cpu_or_dist); -} - -static enum gicv3_intid_range get_intid_range(unsigned int intid) -{ - switch (intid) { - case 0 ... 15: - return SGI_RANGE; - case 16 ... 31: - return PPI_RANGE; - case 32 ... 1019: - return SPI_RANGE; - } - - /* We should not be reaching here */ - GUEST_ASSERT(0); - - return INVALID_RANGE; -} - -static uint64_t gicv3_read_iar(void) -{ - uint64_t irqstat = read_sysreg_s(SYS_ICC_IAR1_EL1); - - dsb(sy); - return irqstat; -} - -static void gicv3_write_eoir(uint32_t irq) -{ - write_sysreg_s(irq, SYS_ICC_EOIR1_EL1); - isb(); -} - -static void gicv3_write_dir(uint32_t irq) -{ - write_sysreg_s(irq, SYS_ICC_DIR_EL1); - isb(); -} - -static void gicv3_set_priority_mask(uint64_t mask) -{ - write_sysreg_s(mask, SYS_ICC_PMR_EL1); -} - -static void gicv3_set_eoi_split(bool split) -{ - uint32_t val; - - /* - * All other fields are read-only, so no need to read CTLR first. In - * fact, the kernel does the same. - */ - val = split ? (1U << 1) : 0; - write_sysreg_s(val, SYS_ICC_CTLR_EL1); - isb(); -} - -uint32_t gicv3_reg_readl(uint32_t cpu_or_dist, uint64_t offset) -{ - volatile void *base = cpu_or_dist & DIST_BIT ? GICD_BASE_GVA - : sgi_base_from_redist(gicr_base_cpu(cpu_or_dist)); - return readl(base + offset); -} - -void gicv3_reg_writel(uint32_t cpu_or_dist, uint64_t offset, uint32_t reg_val) -{ - volatile void *base = cpu_or_dist & DIST_BIT ? GICD_BASE_GVA - : sgi_base_from_redist(gicr_base_cpu(cpu_or_dist)); - writel(reg_val, base + offset); -} - -uint32_t gicv3_getl_fields(uint32_t cpu_or_dist, uint64_t offset, uint32_t mask) -{ - return gicv3_reg_readl(cpu_or_dist, offset) & mask; -} - -void gicv3_setl_fields(uint32_t cpu_or_dist, uint64_t offset, - uint32_t mask, uint32_t reg_val) -{ - uint32_t tmp = gicv3_reg_readl(cpu_or_dist, offset) & ~mask; - - tmp |= (reg_val & mask); - gicv3_reg_writel(cpu_or_dist, offset, tmp); -} - -/* - * We use a single offset for the distributor and redistributor maps as they - * have the same value in both. The only exceptions are registers that only - * exist in one and not the other, like GICR_WAKER that doesn't exist in the - * distributor map. Such registers are conveniently marked as reserved in the - * map that doesn't implement it; like GICR_WAKER's offset of 0x0014 being - * marked as "Reserved" in the Distributor map. - */ -static void gicv3_access_reg(uint32_t intid, uint64_t offset, - uint32_t reg_bits, uint32_t bits_per_field, - bool write, uint32_t *val) -{ - uint32_t cpu = guest_get_vcpuid(); - enum gicv3_intid_range intid_range = get_intid_range(intid); - uint32_t fields_per_reg, index, mask, shift; - uint32_t cpu_or_dist; - - GUEST_ASSERT(bits_per_field <= reg_bits); - GUEST_ASSERT(!write || *val < (1U << bits_per_field)); - /* - * This function does not support 64 bit accesses. Just asserting here - * until we implement readq/writeq. - */ - GUEST_ASSERT(reg_bits == 32); - - fields_per_reg = reg_bits / bits_per_field; - index = intid % fields_per_reg; - shift = index * bits_per_field; - mask = ((1U << bits_per_field) - 1) << shift; - - /* Set offset to the actual register holding intid's config. */ - offset += (intid / fields_per_reg) * (reg_bits / 8); - - cpu_or_dist = (intid_range == SPI_RANGE) ? DIST_BIT : cpu; - - if (write) - gicv3_setl_fields(cpu_or_dist, offset, mask, *val << shift); - *val = gicv3_getl_fields(cpu_or_dist, offset, mask) >> shift; -} - -static void gicv3_write_reg(uint32_t intid, uint64_t offset, - uint32_t reg_bits, uint32_t bits_per_field, uint32_t val) -{ - gicv3_access_reg(intid, offset, reg_bits, - bits_per_field, true, &val); -} - -static uint32_t gicv3_read_reg(uint32_t intid, uint64_t offset, - uint32_t reg_bits, uint32_t bits_per_field) -{ - uint32_t val; - - gicv3_access_reg(intid, offset, reg_bits, - bits_per_field, false, &val); - return val; -} - -static void gicv3_set_priority(uint32_t intid, uint32_t prio) -{ - gicv3_write_reg(intid, GICD_IPRIORITYR, 32, 8, prio); -} - -/* Sets the intid to be level-sensitive or edge-triggered. */ -static void gicv3_irq_set_config(uint32_t intid, bool is_edge) -{ - uint32_t val; - - /* N/A for private interrupts. */ - GUEST_ASSERT(get_intid_range(intid) == SPI_RANGE); - val = is_edge ? 2 : 0; - gicv3_write_reg(intid, GICD_ICFGR, 32, 2, val); -} - -static void gicv3_irq_enable(uint32_t intid) -{ - bool is_spi = get_intid_range(intid) == SPI_RANGE; - uint32_t cpu = guest_get_vcpuid(); - - gicv3_write_reg(intid, GICD_ISENABLER, 32, 1, 1); - gicv3_wait_for_rwp(is_spi ? DIST_BIT : cpu); -} - -static void gicv3_irq_disable(uint32_t intid) -{ - bool is_spi = get_intid_range(intid) == SPI_RANGE; - uint32_t cpu = guest_get_vcpuid(); - - gicv3_write_reg(intid, GICD_ICENABLER, 32, 1, 1); - gicv3_wait_for_rwp(is_spi ? DIST_BIT : cpu); -} - -static void gicv3_irq_set_active(uint32_t intid) -{ - gicv3_write_reg(intid, GICD_ISACTIVER, 32, 1, 1); -} - -static void gicv3_irq_clear_active(uint32_t intid) -{ - gicv3_write_reg(intid, GICD_ICACTIVER, 32, 1, 1); -} - -static bool gicv3_irq_get_active(uint32_t intid) -{ - return gicv3_read_reg(intid, GICD_ISACTIVER, 32, 1); -} - -static void gicv3_irq_set_pending(uint32_t intid) -{ - gicv3_write_reg(intid, GICD_ISPENDR, 32, 1, 1); -} - -static void gicv3_irq_clear_pending(uint32_t intid) -{ - gicv3_write_reg(intid, GICD_ICPENDR, 32, 1, 1); -} - -static bool gicv3_irq_get_pending(uint32_t intid) -{ - return gicv3_read_reg(intid, GICD_ISPENDR, 32, 1); -} - -static void gicv3_enable_redist(volatile void *redist_base) -{ - uint32_t val = readl(redist_base + GICR_WAKER); - unsigned int count = 100000; /* 1s */ - - val &= ~GICR_WAKER_ProcessorSleep; - writel(val, redist_base + GICR_WAKER); - - /* Wait until the processor is 'active' */ - while (readl(redist_base + GICR_WAKER) & GICR_WAKER_ChildrenAsleep) { - GUEST_ASSERT(count--); - udelay(10); - } -} - -static void gicv3_cpu_init(unsigned int cpu) -{ - volatile void *sgi_base; - unsigned int i; - volatile void *redist_base_cpu; - - GUEST_ASSERT(cpu < gicv3_data.nr_cpus); - - redist_base_cpu = gicr_base_cpu(cpu); - sgi_base = sgi_base_from_redist(redist_base_cpu); - - gicv3_enable_redist(redist_base_cpu); - - /* - * Mark all the SGI and PPI interrupts as non-secure Group-1. - * Also, deactivate and disable them. - */ - writel(~0, sgi_base + GICR_IGROUPR0); - writel(~0, sgi_base + GICR_ICACTIVER0); - writel(~0, sgi_base + GICR_ICENABLER0); - - /* Set a default priority for all the SGIs and PPIs */ - for (i = 0; i < 32; i += 4) - writel(GICD_INT_DEF_PRI_X4, - sgi_base + GICR_IPRIORITYR0 + i); - - gicv3_gicr_wait_for_rwp(cpu); - - /* Enable the GIC system register (ICC_*) access */ - write_sysreg_s(read_sysreg_s(SYS_ICC_SRE_EL1) | ICC_SRE_EL1_SRE, - SYS_ICC_SRE_EL1); - - /* Set a default priority threshold */ - write_sysreg_s(ICC_PMR_DEF_PRIO, SYS_ICC_PMR_EL1); - - /* Enable non-secure Group-1 interrupts */ - write_sysreg_s(ICC_IGRPEN1_EL1_MASK, SYS_ICC_IGRPEN1_EL1); -} - -static void gicv3_dist_init(void) -{ - unsigned int i; - - /* Disable the distributor until we set things up */ - writel(0, GICD_BASE_GVA + GICD_CTLR); - gicv3_gicd_wait_for_rwp(); - - /* - * Mark all the SPI interrupts as non-secure Group-1. - * Also, deactivate and disable them. - */ - for (i = 32; i < gicv3_data.nr_spis; i += 32) { - writel(~0, GICD_BASE_GVA + GICD_IGROUPR + i / 8); - writel(~0, GICD_BASE_GVA + GICD_ICACTIVER + i / 8); - writel(~0, GICD_BASE_GVA + GICD_ICENABLER + i / 8); - } - - /* Set a default priority for all the SPIs */ - for (i = 32; i < gicv3_data.nr_spis; i += 4) - writel(GICD_INT_DEF_PRI_X4, - GICD_BASE_GVA + GICD_IPRIORITYR + i); - - /* Wait for the settings to sync-in */ - gicv3_gicd_wait_for_rwp(); - - /* Finally, enable the distributor globally with ARE */ - writel(GICD_CTLR_ARE_NS | GICD_CTLR_ENABLE_G1A | - GICD_CTLR_ENABLE_G1, GICD_BASE_GVA + GICD_CTLR); - gicv3_gicd_wait_for_rwp(); -} - -static void gicv3_init(unsigned int nr_cpus) -{ - GUEST_ASSERT(nr_cpus <= GICV3_MAX_CPUS); - - gicv3_data.nr_cpus = nr_cpus; - gicv3_data.nr_spis = GICD_TYPER_SPIS( - readl(GICD_BASE_GVA + GICD_TYPER)); - if (gicv3_data.nr_spis > 1020) - gicv3_data.nr_spis = 1020; - - /* - * Initialize only the distributor for now. - * The redistributor and CPU interfaces are initialized - * later for every PE. - */ - gicv3_dist_init(); -} - -const struct gic_common_ops gicv3_ops = { - .gic_init = gicv3_init, - .gic_cpu_init = gicv3_cpu_init, - .gic_irq_enable = gicv3_irq_enable, - .gic_irq_disable = gicv3_irq_disable, - .gic_read_iar = gicv3_read_iar, - .gic_write_eoir = gicv3_write_eoir, - .gic_write_dir = gicv3_write_dir, - .gic_set_priority_mask = gicv3_set_priority_mask, - .gic_set_eoi_split = gicv3_set_eoi_split, - .gic_set_priority = gicv3_set_priority, - .gic_irq_set_active = gicv3_irq_set_active, - .gic_irq_clear_active = gicv3_irq_clear_active, - .gic_irq_get_active = gicv3_irq_get_active, - .gic_irq_set_pending = gicv3_irq_set_pending, - .gic_irq_clear_pending = gicv3_irq_clear_pending, - .gic_irq_get_pending = gicv3_irq_get_pending, - .gic_irq_set_config = gicv3_irq_set_config, -}; - -void gic_rdist_enable_lpis(vm_paddr_t cfg_table, size_t cfg_table_size, - vm_paddr_t pend_table) -{ - volatile void *rdist_base = gicr_base_cpu(guest_get_vcpuid()); - - u32 ctlr; - u64 val; - - val = (cfg_table | - GICR_PROPBASER_InnerShareable | - GICR_PROPBASER_RaWaWb | - ((ilog2(cfg_table_size) - 1) & GICR_PROPBASER_IDBITS_MASK)); - writeq_relaxed(val, rdist_base + GICR_PROPBASER); - - val = (pend_table | - GICR_PENDBASER_InnerShareable | - GICR_PENDBASER_RaWaWb); - writeq_relaxed(val, rdist_base + GICR_PENDBASER); - - ctlr = readl_relaxed(rdist_base + GICR_CTLR); - ctlr |= GICR_CTLR_ENABLE_LPIS; - writel_relaxed(ctlr, rdist_base + GICR_CTLR); -} diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_v3_its.c b/tools/testing/selftests/kvm/lib/aarch64/gic_v3_its.c deleted file mode 100644 index 09f270545646..000000000000 --- a/tools/testing/selftests/kvm/lib/aarch64/gic_v3_its.c +++ /dev/null @@ -1,248 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Guest ITS library, generously donated by drivers/irqchip/irq-gic-v3-its.c - * over in the kernel tree. - */ - -#include -#include -#include -#include - -#include "kvm_util.h" -#include "vgic.h" -#include "gic.h" -#include "gic_v3.h" -#include "processor.h" - -static u64 its_read_u64(unsigned long offset) -{ - return readq_relaxed(GITS_BASE_GVA + offset); -} - -static void its_write_u64(unsigned long offset, u64 val) -{ - writeq_relaxed(val, GITS_BASE_GVA + offset); -} - -static u32 its_read_u32(unsigned long offset) -{ - return readl_relaxed(GITS_BASE_GVA + offset); -} - -static void its_write_u32(unsigned long offset, u32 val) -{ - writel_relaxed(val, GITS_BASE_GVA + offset); -} - -static unsigned long its_find_baser(unsigned int type) -{ - int i; - - for (i = 0; i < GITS_BASER_NR_REGS; i++) { - u64 baser; - unsigned long offset = GITS_BASER + (i * sizeof(baser)); - - baser = its_read_u64(offset); - if (GITS_BASER_TYPE(baser) == type) - return offset; - } - - GUEST_FAIL("Couldn't find an ITS BASER of type %u", type); - return -1; -} - -static void its_install_table(unsigned int type, vm_paddr_t base, size_t size) -{ - unsigned long offset = its_find_baser(type); - u64 baser; - - baser = ((size / SZ_64K) - 1) | - GITS_BASER_PAGE_SIZE_64K | - GITS_BASER_InnerShareable | - base | - GITS_BASER_RaWaWb | - GITS_BASER_VALID; - - its_write_u64(offset, baser); -} - -static void its_install_cmdq(vm_paddr_t base, size_t size) -{ - u64 cbaser; - - cbaser = ((size / SZ_4K) - 1) | - GITS_CBASER_InnerShareable | - base | - GITS_CBASER_RaWaWb | - GITS_CBASER_VALID; - - its_write_u64(GITS_CBASER, cbaser); -} - -void its_init(vm_paddr_t coll_tbl, size_t coll_tbl_sz, - vm_paddr_t device_tbl, size_t device_tbl_sz, - vm_paddr_t cmdq, size_t cmdq_size) -{ - u32 ctlr; - - its_install_table(GITS_BASER_TYPE_COLLECTION, coll_tbl, coll_tbl_sz); - its_install_table(GITS_BASER_TYPE_DEVICE, device_tbl, device_tbl_sz); - its_install_cmdq(cmdq, cmdq_size); - - ctlr = its_read_u32(GITS_CTLR); - ctlr |= GITS_CTLR_ENABLE; - its_write_u32(GITS_CTLR, ctlr); -} - -struct its_cmd_block { - union { - u64 raw_cmd[4]; - __le64 raw_cmd_le[4]; - }; -}; - -static inline void its_fixup_cmd(struct its_cmd_block *cmd) -{ - /* Let's fixup BE commands */ - cmd->raw_cmd_le[0] = cpu_to_le64(cmd->raw_cmd[0]); - cmd->raw_cmd_le[1] = cpu_to_le64(cmd->raw_cmd[1]); - cmd->raw_cmd_le[2] = cpu_to_le64(cmd->raw_cmd[2]); - cmd->raw_cmd_le[3] = cpu_to_le64(cmd->raw_cmd[3]); -} - -static void its_mask_encode(u64 *raw_cmd, u64 val, int h, int l) -{ - u64 mask = GENMASK_ULL(h, l); - *raw_cmd &= ~mask; - *raw_cmd |= (val << l) & mask; -} - -static void its_encode_cmd(struct its_cmd_block *cmd, u8 cmd_nr) -{ - its_mask_encode(&cmd->raw_cmd[0], cmd_nr, 7, 0); -} - -static void its_encode_devid(struct its_cmd_block *cmd, u32 devid) -{ - its_mask_encode(&cmd->raw_cmd[0], devid, 63, 32); -} - -static void its_encode_event_id(struct its_cmd_block *cmd, u32 id) -{ - its_mask_encode(&cmd->raw_cmd[1], id, 31, 0); -} - -static void its_encode_phys_id(struct its_cmd_block *cmd, u32 phys_id) -{ - its_mask_encode(&cmd->raw_cmd[1], phys_id, 63, 32); -} - -static void its_encode_size(struct its_cmd_block *cmd, u8 size) -{ - its_mask_encode(&cmd->raw_cmd[1], size, 4, 0); -} - -static void its_encode_itt(struct its_cmd_block *cmd, u64 itt_addr) -{ - its_mask_encode(&cmd->raw_cmd[2], itt_addr >> 8, 51, 8); -} - -static void its_encode_valid(struct its_cmd_block *cmd, int valid) -{ - its_mask_encode(&cmd->raw_cmd[2], !!valid, 63, 63); -} - -static void its_encode_target(struct its_cmd_block *cmd, u64 target_addr) -{ - its_mask_encode(&cmd->raw_cmd[2], target_addr >> 16, 51, 16); -} - -static void its_encode_collection(struct its_cmd_block *cmd, u16 col) -{ - its_mask_encode(&cmd->raw_cmd[2], col, 15, 0); -} - -#define GITS_CMDQ_POLL_ITERATIONS 0 - -static void its_send_cmd(void *cmdq_base, struct its_cmd_block *cmd) -{ - u64 cwriter = its_read_u64(GITS_CWRITER); - struct its_cmd_block *dst = cmdq_base + cwriter; - u64 cbaser = its_read_u64(GITS_CBASER); - size_t cmdq_size; - u64 next; - int i; - - cmdq_size = ((cbaser & 0xFF) + 1) * SZ_4K; - - its_fixup_cmd(cmd); - - WRITE_ONCE(*dst, *cmd); - dsb(ishst); - next = (cwriter + sizeof(*cmd)) % cmdq_size; - its_write_u64(GITS_CWRITER, next); - - /* - * Polling isn't necessary considering KVM's ITS emulation at the time - * of writing this, as the CMDQ is processed synchronously after a write - * to CWRITER. - */ - for (i = 0; its_read_u64(GITS_CREADR) != next; i++) { - __GUEST_ASSERT(i < GITS_CMDQ_POLL_ITERATIONS, - "ITS didn't process command at offset %lu after %d iterations\n", - cwriter, i); - - cpu_relax(); - } -} - -void its_send_mapd_cmd(void *cmdq_base, u32 device_id, vm_paddr_t itt_base, - size_t itt_size, bool valid) -{ - struct its_cmd_block cmd = {}; - - its_encode_cmd(&cmd, GITS_CMD_MAPD); - its_encode_devid(&cmd, device_id); - its_encode_size(&cmd, ilog2(itt_size) - 1); - its_encode_itt(&cmd, itt_base); - its_encode_valid(&cmd, valid); - - its_send_cmd(cmdq_base, &cmd); -} - -void its_send_mapc_cmd(void *cmdq_base, u32 vcpu_id, u32 collection_id, bool valid) -{ - struct its_cmd_block cmd = {}; - - its_encode_cmd(&cmd, GITS_CMD_MAPC); - its_encode_collection(&cmd, collection_id); - its_encode_target(&cmd, vcpu_id); - its_encode_valid(&cmd, valid); - - its_send_cmd(cmdq_base, &cmd); -} - -void its_send_mapti_cmd(void *cmdq_base, u32 device_id, u32 event_id, - u32 collection_id, u32 intid) -{ - struct its_cmd_block cmd = {}; - - its_encode_cmd(&cmd, GITS_CMD_MAPTI); - its_encode_devid(&cmd, device_id); - its_encode_event_id(&cmd, event_id); - its_encode_phys_id(&cmd, intid); - its_encode_collection(&cmd, collection_id); - - its_send_cmd(cmdq_base, &cmd); -} - -void its_send_invall_cmd(void *cmdq_base, u32 collection_id) -{ - struct its_cmd_block cmd = {}; - - its_encode_cmd(&cmd, GITS_CMD_INVALL); - its_encode_collection(&cmd, collection_id); - - its_send_cmd(cmdq_base, &cmd); -} diff --git a/tools/testing/selftests/kvm/lib/aarch64/handlers.S b/tools/testing/selftests/kvm/lib/aarch64/handlers.S deleted file mode 100644 index 0e443eadfac6..000000000000 --- a/tools/testing/selftests/kvm/lib/aarch64/handlers.S +++ /dev/null @@ -1,126 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -.macro save_registers - add sp, sp, #-16 * 17 - - stp x0, x1, [sp, #16 * 0] - stp x2, x3, [sp, #16 * 1] - stp x4, x5, [sp, #16 * 2] - stp x6, x7, [sp, #16 * 3] - stp x8, x9, [sp, #16 * 4] - stp x10, x11, [sp, #16 * 5] - stp x12, x13, [sp, #16 * 6] - stp x14, x15, [sp, #16 * 7] - stp x16, x17, [sp, #16 * 8] - stp x18, x19, [sp, #16 * 9] - stp x20, x21, [sp, #16 * 10] - stp x22, x23, [sp, #16 * 11] - stp x24, x25, [sp, #16 * 12] - stp x26, x27, [sp, #16 * 13] - stp x28, x29, [sp, #16 * 14] - - /* - * This stores sp_el1 into ex_regs.sp so exception handlers can "look" - * at it. It will _not_ be used to restore the sp on return from the - * exception so handlers can not update it. - */ - add x1, sp, #16 * 17 - stp x30, x1, [sp, #16 * 15] /* x30, SP */ - - mrs x1, elr_el1 - mrs x2, spsr_el1 - stp x1, x2, [sp, #16 * 16] /* PC, PSTATE */ -.endm - -.macro restore_registers - ldp x1, x2, [sp, #16 * 16] /* PC, PSTATE */ - msr elr_el1, x1 - msr spsr_el1, x2 - - /* sp is not restored */ - ldp x30, xzr, [sp, #16 * 15] /* x30, SP */ - - ldp x28, x29, [sp, #16 * 14] - ldp x26, x27, [sp, #16 * 13] - ldp x24, x25, [sp, #16 * 12] - ldp x22, x23, [sp, #16 * 11] - ldp x20, x21, [sp, #16 * 10] - ldp x18, x19, [sp, #16 * 9] - ldp x16, x17, [sp, #16 * 8] - ldp x14, x15, [sp, #16 * 7] - ldp x12, x13, [sp, #16 * 6] - ldp x10, x11, [sp, #16 * 5] - ldp x8, x9, [sp, #16 * 4] - ldp x6, x7, [sp, #16 * 3] - ldp x4, x5, [sp, #16 * 2] - ldp x2, x3, [sp, #16 * 1] - ldp x0, x1, [sp, #16 * 0] - - add sp, sp, #16 * 17 - - eret -.endm - -.pushsection ".entry.text", "ax" -.balign 0x800 -.global vectors -vectors: -.popsection - -.set vector, 0 - -/* - * Build an exception handler for vector and append a jump to it into - * vectors (while making sure that it's 0x80 aligned). - */ -.macro HANDLER, label -handler_\label: - save_registers - mov x0, sp - mov x1, #vector - bl route_exception - restore_registers - -.pushsection ".entry.text", "ax" -.balign 0x80 - b handler_\label -.popsection - -.set vector, vector + 1 -.endm - -.macro HANDLER_INVALID -.pushsection ".entry.text", "ax" -.balign 0x80 -/* This will abort so no need to save and restore registers. */ - mov x0, #vector - mov x1, #0 /* ec */ - mov x2, #0 /* valid_ec */ - b kvm_exit_unexpected_exception -.popsection - -.set vector, vector + 1 -.endm - -/* - * Caution: be sure to not add anything between the declaration of vectors - * above and these macro calls that will build the vectors table below it. - */ - HANDLER_INVALID // Synchronous EL1t - HANDLER_INVALID // IRQ EL1t - HANDLER_INVALID // FIQ EL1t - HANDLER_INVALID // Error EL1t - - HANDLER el1h_sync // Synchronous EL1h - HANDLER el1h_irq // IRQ EL1h - HANDLER el1h_fiq // FIQ EL1h - HANDLER el1h_error // Error EL1h - - HANDLER el0_sync_64 // Synchronous 64-bit EL0 - HANDLER el0_irq_64 // IRQ 64-bit EL0 - HANDLER el0_fiq_64 // FIQ 64-bit EL0 - HANDLER el0_error_64 // Error 64-bit EL0 - - HANDLER el0_sync_32 // Synchronous 32-bit EL0 - HANDLER el0_irq_32 // IRQ 32-bit EL0 - HANDLER el0_fiq_32 // FIQ 32-bit EL0 - HANDLER el0_error_32 // Error 32-bit EL0 diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c deleted file mode 100644 index 7ba3aa3755f3..000000000000 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ /dev/null @@ -1,647 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * AArch64 code - * - * Copyright (C) 2018, Red Hat, Inc. - */ - -#include -#include - -#include "guest_modes.h" -#include "kvm_util.h" -#include "processor.h" -#include "ucall_common.h" - -#include -#include - -#define DEFAULT_ARM64_GUEST_STACK_VADDR_MIN 0xac0000 - -static vm_vaddr_t exception_handlers; - -static uint64_t page_align(struct kvm_vm *vm, uint64_t v) -{ - return (v + vm->page_size) & ~(vm->page_size - 1); -} - -static uint64_t pgd_index(struct kvm_vm *vm, vm_vaddr_t gva) -{ - unsigned int shift = (vm->pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift; - uint64_t mask = (1UL << (vm->va_bits - shift)) - 1; - - return (gva >> shift) & mask; -} - -static uint64_t pud_index(struct kvm_vm *vm, vm_vaddr_t gva) -{ - unsigned int shift = 2 * (vm->page_shift - 3) + vm->page_shift; - uint64_t mask = (1UL << (vm->page_shift - 3)) - 1; - - TEST_ASSERT(vm->pgtable_levels == 4, - "Mode %d does not have 4 page table levels", vm->mode); - - return (gva >> shift) & mask; -} - -static uint64_t pmd_index(struct kvm_vm *vm, vm_vaddr_t gva) -{ - unsigned int shift = (vm->page_shift - 3) + vm->page_shift; - uint64_t mask = (1UL << (vm->page_shift - 3)) - 1; - - TEST_ASSERT(vm->pgtable_levels >= 3, - "Mode %d does not have >= 3 page table levels", vm->mode); - - return (gva >> shift) & mask; -} - -static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva) -{ - uint64_t mask = (1UL << (vm->page_shift - 3)) - 1; - return (gva >> vm->page_shift) & mask; -} - -static inline bool use_lpa2_pte_format(struct kvm_vm *vm) -{ - return (vm->page_size == SZ_4K || vm->page_size == SZ_16K) && - (vm->pa_bits > 48 || vm->va_bits > 48); -} - -static uint64_t addr_pte(struct kvm_vm *vm, uint64_t pa, uint64_t attrs) -{ - uint64_t pte; - - if (use_lpa2_pte_format(vm)) { - pte = pa & GENMASK(49, vm->page_shift); - pte |= FIELD_GET(GENMASK(51, 50), pa) << 8; - attrs &= ~GENMASK(9, 8); - } else { - pte = pa & GENMASK(47, vm->page_shift); - if (vm->page_shift == 16) - pte |= FIELD_GET(GENMASK(51, 48), pa) << 12; - } - pte |= attrs; - - return pte; -} - -static uint64_t pte_addr(struct kvm_vm *vm, uint64_t pte) -{ - uint64_t pa; - - if (use_lpa2_pte_format(vm)) { - pa = pte & GENMASK(49, vm->page_shift); - pa |= FIELD_GET(GENMASK(9, 8), pte) << 50; - } else { - pa = pte & GENMASK(47, vm->page_shift); - if (vm->page_shift == 16) - pa |= FIELD_GET(GENMASK(15, 12), pte) << 48; - } - - return pa; -} - -static uint64_t ptrs_per_pgd(struct kvm_vm *vm) -{ - unsigned int shift = (vm->pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift; - return 1 << (vm->va_bits - shift); -} - -static uint64_t __maybe_unused ptrs_per_pte(struct kvm_vm *vm) -{ - return 1 << (vm->page_shift - 3); -} - -void virt_arch_pgd_alloc(struct kvm_vm *vm) -{ - size_t nr_pages = page_align(vm, ptrs_per_pgd(vm) * 8) / vm->page_size; - - if (vm->pgd_created) - return; - - vm->pgd = vm_phy_pages_alloc(vm, nr_pages, - KVM_GUEST_PAGE_TABLE_MIN_PADDR, - vm->memslots[MEM_REGION_PT]); - vm->pgd_created = true; -} - -static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, - uint64_t flags) -{ - uint8_t attr_idx = flags & 7; - uint64_t *ptep; - - TEST_ASSERT((vaddr % vm->page_size) == 0, - "Virtual address not on page boundary,\n" - " vaddr: 0x%lx vm->page_size: 0x%x", vaddr, vm->page_size); - TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, - (vaddr >> vm->page_shift)), - "Invalid virtual address, vaddr: 0x%lx", vaddr); - TEST_ASSERT((paddr % vm->page_size) == 0, - "Physical address not on page boundary,\n" - " paddr: 0x%lx vm->page_size: 0x%x", paddr, vm->page_size); - TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn, - "Physical address beyond beyond maximum supported,\n" - " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", - paddr, vm->max_gfn, vm->page_size); - - ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, vaddr) * 8; - if (!*ptep) - *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3); - - switch (vm->pgtable_levels) { - case 4: - ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, vaddr) * 8; - if (!*ptep) - *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3); - /* fall through */ - case 3: - ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, vaddr) * 8; - if (!*ptep) - *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3); - /* fall through */ - case 2: - ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, vaddr) * 8; - break; - default: - TEST_FAIL("Page table levels must be 2, 3, or 4"); - } - - *ptep = addr_pte(vm, paddr, (attr_idx << 2) | (1 << 10) | 3); /* AF */ -} - -void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) -{ - uint64_t attr_idx = MT_NORMAL; - - _virt_pg_map(vm, vaddr, paddr, attr_idx); -} - -uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva) -{ - uint64_t *ptep; - - if (!vm->pgd_created) - goto unmapped_gva; - - ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, gva) * 8; - if (!ptep) - goto unmapped_gva; - - switch (vm->pgtable_levels) { - case 4: - ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, gva) * 8; - if (!ptep) - goto unmapped_gva; - /* fall through */ - case 3: - ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, gva) * 8; - if (!ptep) - goto unmapped_gva; - /* fall through */ - case 2: - ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, gva) * 8; - if (!ptep) - goto unmapped_gva; - break; - default: - TEST_FAIL("Page table levels must be 2, 3, or 4"); - } - - return ptep; - -unmapped_gva: - TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva); - exit(EXIT_FAILURE); -} - -vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) -{ - uint64_t *ptep = virt_get_pte_hva(vm, gva); - - return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1)); -} - -static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, uint64_t page, int level) -{ -#ifdef DEBUG - static const char * const type[] = { "", "pud", "pmd", "pte" }; - uint64_t pte, *ptep; - - if (level == 4) - return; - - for (pte = page; pte < page + ptrs_per_pte(vm) * 8; pte += 8) { - ptep = addr_gpa2hva(vm, pte); - if (!*ptep) - continue; - fprintf(stream, "%*s%s: %lx: %lx at %p\n", indent, "", type[level], pte, *ptep, ptep); - pte_dump(stream, vm, indent + 1, pte_addr(vm, *ptep), level + 1); - } -#endif -} - -void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) -{ - int level = 4 - (vm->pgtable_levels - 1); - uint64_t pgd, *ptep; - - if (!vm->pgd_created) - return; - - for (pgd = vm->pgd; pgd < vm->pgd + ptrs_per_pgd(vm) * 8; pgd += 8) { - ptep = addr_gpa2hva(vm, pgd); - if (!*ptep) - continue; - fprintf(stream, "%*spgd: %lx: %lx at %p\n", indent, "", pgd, *ptep, ptep); - pte_dump(stream, vm, indent + 1, pte_addr(vm, *ptep), level); - } -} - -void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init) -{ - struct kvm_vcpu_init default_init = { .target = -1, }; - struct kvm_vm *vm = vcpu->vm; - uint64_t sctlr_el1, tcr_el1, ttbr0_el1; - - if (!init) - init = &default_init; - - if (init->target == -1) { - struct kvm_vcpu_init preferred; - vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &preferred); - init->target = preferred.target; - } - - vcpu_ioctl(vcpu, KVM_ARM_VCPU_INIT, init); - - /* - * Enable FP/ASIMD to avoid trapping when accessing Q0-Q15 - * registers, which the variable argument list macros do. - */ - vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CPACR_EL1), 3 << 20); - - sctlr_el1 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1)); - tcr_el1 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1)); - - /* Configure base granule size */ - switch (vm->mode) { - case VM_MODE_PXXV48_4K: - TEST_FAIL("AArch64 does not support 4K sized pages " - "with ANY-bit physical address ranges"); - case VM_MODE_P52V48_64K: - case VM_MODE_P48V48_64K: - case VM_MODE_P40V48_64K: - case VM_MODE_P36V48_64K: - tcr_el1 |= 1ul << 14; /* TG0 = 64KB */ - break; - case VM_MODE_P52V48_16K: - case VM_MODE_P48V48_16K: - case VM_MODE_P40V48_16K: - case VM_MODE_P36V48_16K: - case VM_MODE_P36V47_16K: - tcr_el1 |= 2ul << 14; /* TG0 = 16KB */ - break; - case VM_MODE_P52V48_4K: - case VM_MODE_P48V48_4K: - case VM_MODE_P40V48_4K: - case VM_MODE_P36V48_4K: - tcr_el1 |= 0ul << 14; /* TG0 = 4KB */ - break; - default: - TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode); - } - - ttbr0_el1 = vm->pgd & GENMASK(47, vm->page_shift); - - /* Configure output size */ - switch (vm->mode) { - case VM_MODE_P52V48_4K: - case VM_MODE_P52V48_16K: - case VM_MODE_P52V48_64K: - tcr_el1 |= 6ul << 32; /* IPS = 52 bits */ - ttbr0_el1 |= FIELD_GET(GENMASK(51, 48), vm->pgd) << 2; - break; - case VM_MODE_P48V48_4K: - case VM_MODE_P48V48_16K: - case VM_MODE_P48V48_64K: - tcr_el1 |= 5ul << 32; /* IPS = 48 bits */ - break; - case VM_MODE_P40V48_4K: - case VM_MODE_P40V48_16K: - case VM_MODE_P40V48_64K: - tcr_el1 |= 2ul << 32; /* IPS = 40 bits */ - break; - case VM_MODE_P36V48_4K: - case VM_MODE_P36V48_16K: - case VM_MODE_P36V48_64K: - case VM_MODE_P36V47_16K: - tcr_el1 |= 1ul << 32; /* IPS = 36 bits */ - break; - default: - TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode); - } - - sctlr_el1 |= (1 << 0) | (1 << 2) | (1 << 12) /* M | C | I */; - /* TCR_EL1 |= IRGN0:WBWA | ORGN0:WBWA | SH0:Inner-Shareable */; - tcr_el1 |= (1 << 8) | (1 << 10) | (3 << 12); - tcr_el1 |= (64 - vm->va_bits) /* T0SZ */; - if (use_lpa2_pte_format(vm)) - tcr_el1 |= (1ul << 59) /* DS */; - - vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), sctlr_el1); - vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1), tcr_el1); - vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MAIR_EL1), DEFAULT_MAIR_EL1); - vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TTBR0_EL1), ttbr0_el1); - vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TPIDR_EL1), vcpu->id); -} - -void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent) -{ - uint64_t pstate, pc; - - pstate = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pstate)); - pc = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc)); - - fprintf(stream, "%*spstate: 0x%.16lx pc: 0x%.16lx\n", - indent, "", pstate, pc); -} - -void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code) -{ - vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc), (uint64_t)guest_code); -} - -static struct kvm_vcpu *__aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, - struct kvm_vcpu_init *init) -{ - size_t stack_size; - uint64_t stack_vaddr; - struct kvm_vcpu *vcpu = __vm_vcpu_add(vm, vcpu_id); - - stack_size = vm->page_size == 4096 ? DEFAULT_STACK_PGS * vm->page_size : - vm->page_size; - stack_vaddr = __vm_vaddr_alloc(vm, stack_size, - DEFAULT_ARM64_GUEST_STACK_VADDR_MIN, - MEM_REGION_DATA); - - aarch64_vcpu_setup(vcpu, init); - - vcpu_set_reg(vcpu, ARM64_CORE_REG(sp_el1), stack_vaddr + stack_size); - return vcpu; -} - -struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, - struct kvm_vcpu_init *init, void *guest_code) -{ - struct kvm_vcpu *vcpu = __aarch64_vcpu_add(vm, vcpu_id, init); - - vcpu_arch_set_entry_point(vcpu, guest_code); - - return vcpu; -} - -struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) -{ - return __aarch64_vcpu_add(vm, vcpu_id, NULL); -} - -void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...) -{ - va_list ap; - int i; - - TEST_ASSERT(num >= 1 && num <= 8, "Unsupported number of args,\n" - " num: %u", num); - - va_start(ap, num); - - for (i = 0; i < num; i++) { - vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.regs[i]), - va_arg(ap, uint64_t)); - } - - va_end(ap); -} - -void kvm_exit_unexpected_exception(int vector, uint64_t ec, bool valid_ec) -{ - ucall(UCALL_UNHANDLED, 3, vector, ec, valid_ec); - while (1) - ; -} - -void assert_on_unhandled_exception(struct kvm_vcpu *vcpu) -{ - struct ucall uc; - - if (get_ucall(vcpu, &uc) != UCALL_UNHANDLED) - return; - - if (uc.args[2]) /* valid_ec */ { - assert(VECTOR_IS_SYNC(uc.args[0])); - TEST_FAIL("Unexpected exception (vector:0x%lx, ec:0x%lx)", - uc.args[0], uc.args[1]); - } else { - assert(!VECTOR_IS_SYNC(uc.args[0])); - TEST_FAIL("Unexpected exception (vector:0x%lx)", - uc.args[0]); - } -} - -struct handlers { - handler_fn exception_handlers[VECTOR_NUM][ESR_ELx_EC_MAX + 1]; -}; - -void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu) -{ - extern char vectors; - - vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_VBAR_EL1), (uint64_t)&vectors); -} - -void route_exception(struct ex_regs *regs, int vector) -{ - struct handlers *handlers = (struct handlers *)exception_handlers; - bool valid_ec; - int ec = 0; - - switch (vector) { - case VECTOR_SYNC_CURRENT: - case VECTOR_SYNC_LOWER_64: - ec = ESR_ELx_EC(read_sysreg(esr_el1)); - valid_ec = true; - break; - case VECTOR_IRQ_CURRENT: - case VECTOR_IRQ_LOWER_64: - case VECTOR_FIQ_CURRENT: - case VECTOR_FIQ_LOWER_64: - case VECTOR_ERROR_CURRENT: - case VECTOR_ERROR_LOWER_64: - ec = 0; - valid_ec = false; - break; - default: - valid_ec = false; - goto unexpected_exception; - } - - if (handlers && handlers->exception_handlers[vector][ec]) - return handlers->exception_handlers[vector][ec](regs); - -unexpected_exception: - kvm_exit_unexpected_exception(vector, ec, valid_ec); -} - -void vm_init_descriptor_tables(struct kvm_vm *vm) -{ - vm->handlers = __vm_vaddr_alloc(vm, sizeof(struct handlers), - vm->page_size, MEM_REGION_DATA); - - *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers; -} - -void vm_install_sync_handler(struct kvm_vm *vm, int vector, int ec, - void (*handler)(struct ex_regs *)) -{ - struct handlers *handlers = addr_gva2hva(vm, vm->handlers); - - assert(VECTOR_IS_SYNC(vector)); - assert(vector < VECTOR_NUM); - assert(ec <= ESR_ELx_EC_MAX); - handlers->exception_handlers[vector][ec] = handler; -} - -void vm_install_exception_handler(struct kvm_vm *vm, int vector, - void (*handler)(struct ex_regs *)) -{ - struct handlers *handlers = addr_gva2hva(vm, vm->handlers); - - assert(!VECTOR_IS_SYNC(vector)); - assert(vector < VECTOR_NUM); - handlers->exception_handlers[vector][0] = handler; -} - -uint32_t guest_get_vcpuid(void) -{ - return read_sysreg(tpidr_el1); -} - -static uint32_t max_ipa_for_page_size(uint32_t vm_ipa, uint32_t gran, - uint32_t not_sup_val, uint32_t ipa52_min_val) -{ - if (gran == not_sup_val) - return 0; - else if (gran >= ipa52_min_val && vm_ipa >= 52) - return 52; - else - return min(vm_ipa, 48U); -} - -void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k, - uint32_t *ipa16k, uint32_t *ipa64k) -{ - struct kvm_vcpu_init preferred_init; - int kvm_fd, vm_fd, vcpu_fd, err; - uint64_t val; - uint32_t gran; - struct kvm_one_reg reg = { - .id = KVM_ARM64_SYS_REG(SYS_ID_AA64MMFR0_EL1), - .addr = (uint64_t)&val, - }; - - kvm_fd = open_kvm_dev_path_or_exit(); - vm_fd = __kvm_ioctl(kvm_fd, KVM_CREATE_VM, (void *)(unsigned long)ipa); - TEST_ASSERT(vm_fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VM, vm_fd)); - - vcpu_fd = ioctl(vm_fd, KVM_CREATE_VCPU, 0); - TEST_ASSERT(vcpu_fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VCPU, vcpu_fd)); - - err = ioctl(vm_fd, KVM_ARM_PREFERRED_TARGET, &preferred_init); - TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_ARM_PREFERRED_TARGET, err)); - err = ioctl(vcpu_fd, KVM_ARM_VCPU_INIT, &preferred_init); - TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_ARM_VCPU_INIT, err)); - - err = ioctl(vcpu_fd, KVM_GET_ONE_REG, ®); - TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_GET_ONE_REG, vcpu_fd)); - - gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN4), val); - *ipa4k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN4_NI, - ID_AA64MMFR0_EL1_TGRAN4_52_BIT); - - gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN64), val); - *ipa64k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN64_NI, - ID_AA64MMFR0_EL1_TGRAN64_IMP); - - gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN16), val); - *ipa16k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN16_NI, - ID_AA64MMFR0_EL1_TGRAN16_52_BIT); - - close(vcpu_fd); - close(vm_fd); - close(kvm_fd); -} - -#define __smccc_call(insn, function_id, arg0, arg1, arg2, arg3, arg4, arg5, \ - arg6, res) \ - asm volatile("mov w0, %w[function_id]\n" \ - "mov x1, %[arg0]\n" \ - "mov x2, %[arg1]\n" \ - "mov x3, %[arg2]\n" \ - "mov x4, %[arg3]\n" \ - "mov x5, %[arg4]\n" \ - "mov x6, %[arg5]\n" \ - "mov x7, %[arg6]\n" \ - #insn "#0\n" \ - "mov %[res0], x0\n" \ - "mov %[res1], x1\n" \ - "mov %[res2], x2\n" \ - "mov %[res3], x3\n" \ - : [res0] "=r"(res->a0), [res1] "=r"(res->a1), \ - [res2] "=r"(res->a2), [res3] "=r"(res->a3) \ - : [function_id] "r"(function_id), [arg0] "r"(arg0), \ - [arg1] "r"(arg1), [arg2] "r"(arg2), [arg3] "r"(arg3), \ - [arg4] "r"(arg4), [arg5] "r"(arg5), [arg6] "r"(arg6) \ - : "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7") - - -void smccc_hvc(uint32_t function_id, uint64_t arg0, uint64_t arg1, - uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5, - uint64_t arg6, struct arm_smccc_res *res) -{ - __smccc_call(hvc, function_id, arg0, arg1, arg2, arg3, arg4, arg5, - arg6, res); -} - -void smccc_smc(uint32_t function_id, uint64_t arg0, uint64_t arg1, - uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5, - uint64_t arg6, struct arm_smccc_res *res) -{ - __smccc_call(smc, function_id, arg0, arg1, arg2, arg3, arg4, arg5, - arg6, res); -} - -void kvm_selftest_arch_init(void) -{ - /* - * arm64 doesn't have a true default mode, so start by computing the - * available IPA space and page sizes early. - */ - guest_modes_append_default(); -} - -void vm_vaddr_populate_bitmap(struct kvm_vm *vm) -{ - /* - * arm64 selftests use only TTBR0_EL1, meaning that the valid VA space - * is [0, 2^(64 - TCR_EL1.T0SZ)). - */ - sparsebit_set_num(vm->vpages_valid, 0, - (1ULL << vm->va_bits) >> vm->page_shift); -} - -/* Helper to call wfi instruction. */ -void wfi(void) -{ - asm volatile("wfi"); -} diff --git a/tools/testing/selftests/kvm/lib/aarch64/spinlock.c b/tools/testing/selftests/kvm/lib/aarch64/spinlock.c deleted file mode 100644 index a076e780be5d..000000000000 --- a/tools/testing/selftests/kvm/lib/aarch64/spinlock.c +++ /dev/null @@ -1,27 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * ARM64 Spinlock support - */ -#include - -#include "spinlock.h" - -void spin_lock(struct spinlock *lock) -{ - int val, res; - - asm volatile( - "1: ldaxr %w0, [%2]\n" - " cbnz %w0, 1b\n" - " mov %w0, #1\n" - " stxr %w1, %w0, [%2]\n" - " cbnz %w1, 1b\n" - : "=&r" (val), "=&r" (res) - : "r" (&lock->v) - : "memory"); -} - -void spin_unlock(struct spinlock *lock) -{ - asm volatile("stlr wzr, [%0]\n" : : "r" (&lock->v) : "memory"); -} diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/aarch64/ucall.c deleted file mode 100644 index ddab0ce89d4d..000000000000 --- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c +++ /dev/null @@ -1,34 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * ucall support. A ucall is a "hypercall to userspace". - * - * Copyright (C) 2018, Red Hat, Inc. - */ -#include "kvm_util.h" - -vm_vaddr_t *ucall_exit_mmio_addr; - -void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) -{ - vm_vaddr_t mmio_gva = vm_vaddr_unused_gap(vm, vm->page_size, KVM_UTIL_MIN_VADDR); - - virt_map(vm, mmio_gva, mmio_gpa, 1); - - vm->ucall_mmio_addr = mmio_gpa; - - write_guest_global(vm, ucall_exit_mmio_addr, (vm_vaddr_t *)mmio_gva); -} - -void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu) -{ - struct kvm_run *run = vcpu->run; - - if (run->exit_reason == KVM_EXIT_MMIO && - run->mmio.phys_addr == vcpu->vm->ucall_mmio_addr) { - TEST_ASSERT(run->mmio.is_write && run->mmio.len == sizeof(uint64_t), - "Unexpected ucall exit mmio address access"); - return (void *)(*((uint64_t *)run->mmio.data)); - } - - return NULL; -} diff --git a/tools/testing/selftests/kvm/lib/aarch64/vgic.c b/tools/testing/selftests/kvm/lib/aarch64/vgic.c deleted file mode 100644 index 4427f43f73ea..000000000000 --- a/tools/testing/selftests/kvm/lib/aarch64/vgic.c +++ /dev/null @@ -1,188 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * ARM Generic Interrupt Controller (GIC) v3 host support - */ - -#include -#include -#include -#include -#include -#include - -#include "kvm_util.h" -#include "vgic.h" -#include "gic.h" -#include "gic_v3.h" - -/* - * vGIC-v3 default host setup - * - * Input args: - * vm - KVM VM - * nr_vcpus - Number of vCPUs supported by this VM - * - * Output args: None - * - * Return: GIC file-descriptor or negative error code upon failure - * - * The function creates a vGIC-v3 device and maps the distributor and - * redistributor regions of the guest. Since it depends on the number of - * vCPUs for the VM, it must be called after all the vCPUs have been created. - */ -int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs) -{ - int gic_fd; - uint64_t attr; - struct list_head *iter; - unsigned int nr_gic_pages, nr_vcpus_created = 0; - - TEST_ASSERT(nr_vcpus, "Number of vCPUs cannot be empty"); - - /* - * Make sure that the caller is infact calling this - * function after all the vCPUs are added. - */ - list_for_each(iter, &vm->vcpus) - nr_vcpus_created++; - TEST_ASSERT(nr_vcpus == nr_vcpus_created, - "Number of vCPUs requested (%u) doesn't match with the ones created for the VM (%u)", - nr_vcpus, nr_vcpus_created); - - /* Distributor setup */ - gic_fd = __kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_V3); - if (gic_fd < 0) - return gic_fd; - - kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_NR_IRQS, 0, &nr_irqs); - - kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, - KVM_DEV_ARM_VGIC_CTRL_INIT, NULL); - - attr = GICD_BASE_GPA; - kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_DIST, &attr); - nr_gic_pages = vm_calc_num_guest_pages(vm->mode, KVM_VGIC_V3_DIST_SIZE); - virt_map(vm, GICD_BASE_GPA, GICD_BASE_GPA, nr_gic_pages); - - /* Redistributor setup */ - attr = REDIST_REGION_ATTR_ADDR(nr_vcpus, GICR_BASE_GPA, 0, 0); - kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &attr); - nr_gic_pages = vm_calc_num_guest_pages(vm->mode, - KVM_VGIC_V3_REDIST_SIZE * nr_vcpus); - virt_map(vm, GICR_BASE_GPA, GICR_BASE_GPA, nr_gic_pages); - - kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, - KVM_DEV_ARM_VGIC_CTRL_INIT, NULL); - - return gic_fd; -} - -/* should only work for level sensitive interrupts */ -int _kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level) -{ - uint64_t attr = 32 * (intid / 32); - uint64_t index = intid % 32; - uint64_t val; - int ret; - - ret = __kvm_device_attr_get(gic_fd, KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO, - attr, &val); - if (ret != 0) - return ret; - - val |= 1U << index; - ret = __kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO, - attr, &val); - return ret; -} - -void kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level) -{ - int ret = _kvm_irq_set_level_info(gic_fd, intid, level); - - TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO, ret)); -} - -int _kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level) -{ - uint32_t irq = intid & KVM_ARM_IRQ_NUM_MASK; - - TEST_ASSERT(!INTID_IS_SGI(intid), "KVM_IRQ_LINE's interface itself " - "doesn't allow injecting SGIs. There's no mask for it."); - - if (INTID_IS_PPI(intid)) - irq |= KVM_ARM_IRQ_TYPE_PPI << KVM_ARM_IRQ_TYPE_SHIFT; - else - irq |= KVM_ARM_IRQ_TYPE_SPI << KVM_ARM_IRQ_TYPE_SHIFT; - - return _kvm_irq_line(vm, irq, level); -} - -void kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level) -{ - int ret = _kvm_arm_irq_line(vm, intid, level); - - TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_IRQ_LINE, ret)); -} - -static void vgic_poke_irq(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu, - uint64_t reg_off) -{ - uint64_t reg = intid / 32; - uint64_t index = intid % 32; - uint64_t attr = reg_off + reg * 4; - uint64_t val; - bool intid_is_private = INTID_IS_SGI(intid) || INTID_IS_PPI(intid); - - uint32_t group = intid_is_private ? KVM_DEV_ARM_VGIC_GRP_REDIST_REGS - : KVM_DEV_ARM_VGIC_GRP_DIST_REGS; - - if (intid_is_private) { - /* TODO: only vcpu 0 implemented for now. */ - assert(vcpu->id == 0); - attr += SZ_64K; - } - - /* Check that the addr part of the attr is within 32 bits. */ - assert((attr & ~KVM_DEV_ARM_VGIC_OFFSET_MASK) == 0); - - /* - * All calls will succeed, even with invalid intid's, as long as the - * addr part of the attr is within 32 bits (checked above). An invalid - * intid will just make the read/writes point to above the intended - * register space (i.e., ICPENDR after ISPENDR). - */ - kvm_device_attr_get(gic_fd, group, attr, &val); - val |= 1ULL << index; - kvm_device_attr_set(gic_fd, group, attr, &val); -} - -void kvm_irq_write_ispendr(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu) -{ - vgic_poke_irq(gic_fd, intid, vcpu, GICD_ISPENDR); -} - -void kvm_irq_write_isactiver(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu) -{ - vgic_poke_irq(gic_fd, intid, vcpu, GICD_ISACTIVER); -} - -int vgic_its_setup(struct kvm_vm *vm) -{ - int its_fd = kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_ITS); - u64 attr; - - attr = GITS_BASE_GPA; - kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_ITS_ADDR_TYPE, &attr); - - kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, - KVM_DEV_ARM_VGIC_CTRL_INIT, NULL); - - virt_map(vm, GITS_BASE_GPA, GITS_BASE_GPA, - vm_calc_num_guest_pages(vm->mode, KVM_VGIC_V3_ITS_SIZE)); - - return its_fd; -} diff --git a/tools/testing/selftests/kvm/lib/arm64/gic.c b/tools/testing/selftests/kvm/lib/arm64/gic.c new file mode 100644 index 000000000000..7abbf8866512 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/arm64/gic.c @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ARM Generic Interrupt Controller (GIC) support + */ + +#include +#include +#include + +#include "kvm_util.h" + +#include +#include "gic_private.h" +#include "processor.h" +#include "spinlock.h" + +static const struct gic_common_ops *gic_common_ops; +static struct spinlock gic_lock; + +static void gic_cpu_init(unsigned int cpu) +{ + gic_common_ops->gic_cpu_init(cpu); +} + +static void gic_dist_init(enum gic_type type, unsigned int nr_cpus) +{ + const struct gic_common_ops *gic_ops = NULL; + + spin_lock(&gic_lock); + + /* Distributor initialization is needed only once per VM */ + if (gic_common_ops) { + spin_unlock(&gic_lock); + return; + } + + if (type == GIC_V3) + gic_ops = &gicv3_ops; + + GUEST_ASSERT(gic_ops); + + gic_ops->gic_init(nr_cpus); + gic_common_ops = gic_ops; + + /* Make sure that the initialized data is visible to all the vCPUs */ + dsb(sy); + + spin_unlock(&gic_lock); +} + +void gic_init(enum gic_type type, unsigned int nr_cpus) +{ + uint32_t cpu = guest_get_vcpuid(); + + GUEST_ASSERT(type < GIC_TYPE_MAX); + GUEST_ASSERT(nr_cpus); + + gic_dist_init(type, nr_cpus); + gic_cpu_init(cpu); +} + +void gic_irq_enable(unsigned int intid) +{ + GUEST_ASSERT(gic_common_ops); + gic_common_ops->gic_irq_enable(intid); +} + +void gic_irq_disable(unsigned int intid) +{ + GUEST_ASSERT(gic_common_ops); + gic_common_ops->gic_irq_disable(intid); +} + +unsigned int gic_get_and_ack_irq(void) +{ + uint64_t irqstat; + unsigned int intid; + + GUEST_ASSERT(gic_common_ops); + + irqstat = gic_common_ops->gic_read_iar(); + intid = irqstat & GENMASK(23, 0); + + return intid; +} + +void gic_set_eoi(unsigned int intid) +{ + GUEST_ASSERT(gic_common_ops); + gic_common_ops->gic_write_eoir(intid); +} + +void gic_set_dir(unsigned int intid) +{ + GUEST_ASSERT(gic_common_ops); + gic_common_ops->gic_write_dir(intid); +} + +void gic_set_eoi_split(bool split) +{ + GUEST_ASSERT(gic_common_ops); + gic_common_ops->gic_set_eoi_split(split); +} + +void gic_set_priority_mask(uint64_t pmr) +{ + GUEST_ASSERT(gic_common_ops); + gic_common_ops->gic_set_priority_mask(pmr); +} + +void gic_set_priority(unsigned int intid, unsigned int prio) +{ + GUEST_ASSERT(gic_common_ops); + gic_common_ops->gic_set_priority(intid, prio); +} + +void gic_irq_set_active(unsigned int intid) +{ + GUEST_ASSERT(gic_common_ops); + gic_common_ops->gic_irq_set_active(intid); +} + +void gic_irq_clear_active(unsigned int intid) +{ + GUEST_ASSERT(gic_common_ops); + gic_common_ops->gic_irq_clear_active(intid); +} + +bool gic_irq_get_active(unsigned int intid) +{ + GUEST_ASSERT(gic_common_ops); + return gic_common_ops->gic_irq_get_active(intid); +} + +void gic_irq_set_pending(unsigned int intid) +{ + GUEST_ASSERT(gic_common_ops); + gic_common_ops->gic_irq_set_pending(intid); +} + +void gic_irq_clear_pending(unsigned int intid) +{ + GUEST_ASSERT(gic_common_ops); + gic_common_ops->gic_irq_clear_pending(intid); +} + +bool gic_irq_get_pending(unsigned int intid) +{ + GUEST_ASSERT(gic_common_ops); + return gic_common_ops->gic_irq_get_pending(intid); +} + +void gic_irq_set_config(unsigned int intid, bool is_edge) +{ + GUEST_ASSERT(gic_common_ops); + gic_common_ops->gic_irq_set_config(intid, is_edge); +} diff --git a/tools/testing/selftests/kvm/lib/arm64/gic_private.h b/tools/testing/selftests/kvm/lib/arm64/gic_private.h new file mode 100644 index 000000000000..d24e9ecc96c6 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/arm64/gic_private.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * ARM Generic Interrupt Controller (GIC) private defines that's only + * shared among the GIC library code. + */ + +#ifndef SELFTEST_KVM_GIC_PRIVATE_H +#define SELFTEST_KVM_GIC_PRIVATE_H + +struct gic_common_ops { + void (*gic_init)(unsigned int nr_cpus); + void (*gic_cpu_init)(unsigned int cpu); + void (*gic_irq_enable)(unsigned int intid); + void (*gic_irq_disable)(unsigned int intid); + uint64_t (*gic_read_iar)(void); + void (*gic_write_eoir)(uint32_t irq); + void (*gic_write_dir)(uint32_t irq); + void (*gic_set_eoi_split)(bool split); + void (*gic_set_priority_mask)(uint64_t mask); + void (*gic_set_priority)(uint32_t intid, uint32_t prio); + void (*gic_irq_set_active)(uint32_t intid); + void (*gic_irq_clear_active)(uint32_t intid); + bool (*gic_irq_get_active)(uint32_t intid); + void (*gic_irq_set_pending)(uint32_t intid); + void (*gic_irq_clear_pending)(uint32_t intid); + bool (*gic_irq_get_pending)(uint32_t intid); + void (*gic_irq_set_config)(uint32_t intid, bool is_edge); +}; + +extern const struct gic_common_ops gicv3_ops; + +#endif /* SELFTEST_KVM_GIC_PRIVATE_H */ diff --git a/tools/testing/selftests/kvm/lib/arm64/gic_v3.c b/tools/testing/selftests/kvm/lib/arm64/gic_v3.c new file mode 100644 index 000000000000..66d05506f78b --- /dev/null +++ b/tools/testing/selftests/kvm/lib/arm64/gic_v3.c @@ -0,0 +1,427 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ARM Generic Interrupt Controller (GIC) v3 support + */ + +#include + +#include "kvm_util.h" +#include "processor.h" +#include "delay.h" + +#include "gic.h" +#include "gic_v3.h" +#include "gic_private.h" + +#define GICV3_MAX_CPUS 512 + +#define GICD_INT_DEF_PRI 0xa0 +#define GICD_INT_DEF_PRI_X4 ((GICD_INT_DEF_PRI << 24) |\ + (GICD_INT_DEF_PRI << 16) |\ + (GICD_INT_DEF_PRI << 8) |\ + GICD_INT_DEF_PRI) + +#define ICC_PMR_DEF_PRIO 0xf0 + +struct gicv3_data { + unsigned int nr_cpus; + unsigned int nr_spis; +}; + +#define sgi_base_from_redist(redist_base) (redist_base + SZ_64K) +#define DIST_BIT (1U << 31) + +enum gicv3_intid_range { + SGI_RANGE, + PPI_RANGE, + SPI_RANGE, + INVALID_RANGE, +}; + +static struct gicv3_data gicv3_data; + +static void gicv3_gicd_wait_for_rwp(void) +{ + unsigned int count = 100000; /* 1s */ + + while (readl(GICD_BASE_GVA + GICD_CTLR) & GICD_CTLR_RWP) { + GUEST_ASSERT(count--); + udelay(10); + } +} + +static inline volatile void *gicr_base_cpu(uint32_t cpu) +{ + /* Align all the redistributors sequentially */ + return GICR_BASE_GVA + cpu * SZ_64K * 2; +} + +static void gicv3_gicr_wait_for_rwp(uint32_t cpu) +{ + unsigned int count = 100000; /* 1s */ + + while (readl(gicr_base_cpu(cpu) + GICR_CTLR) & GICR_CTLR_RWP) { + GUEST_ASSERT(count--); + udelay(10); + } +} + +static void gicv3_wait_for_rwp(uint32_t cpu_or_dist) +{ + if (cpu_or_dist & DIST_BIT) + gicv3_gicd_wait_for_rwp(); + else + gicv3_gicr_wait_for_rwp(cpu_or_dist); +} + +static enum gicv3_intid_range get_intid_range(unsigned int intid) +{ + switch (intid) { + case 0 ... 15: + return SGI_RANGE; + case 16 ... 31: + return PPI_RANGE; + case 32 ... 1019: + return SPI_RANGE; + } + + /* We should not be reaching here */ + GUEST_ASSERT(0); + + return INVALID_RANGE; +} + +static uint64_t gicv3_read_iar(void) +{ + uint64_t irqstat = read_sysreg_s(SYS_ICC_IAR1_EL1); + + dsb(sy); + return irqstat; +} + +static void gicv3_write_eoir(uint32_t irq) +{ + write_sysreg_s(irq, SYS_ICC_EOIR1_EL1); + isb(); +} + +static void gicv3_write_dir(uint32_t irq) +{ + write_sysreg_s(irq, SYS_ICC_DIR_EL1); + isb(); +} + +static void gicv3_set_priority_mask(uint64_t mask) +{ + write_sysreg_s(mask, SYS_ICC_PMR_EL1); +} + +static void gicv3_set_eoi_split(bool split) +{ + uint32_t val; + + /* + * All other fields are read-only, so no need to read CTLR first. In + * fact, the kernel does the same. + */ + val = split ? (1U << 1) : 0; + write_sysreg_s(val, SYS_ICC_CTLR_EL1); + isb(); +} + +uint32_t gicv3_reg_readl(uint32_t cpu_or_dist, uint64_t offset) +{ + volatile void *base = cpu_or_dist & DIST_BIT ? GICD_BASE_GVA + : sgi_base_from_redist(gicr_base_cpu(cpu_or_dist)); + return readl(base + offset); +} + +void gicv3_reg_writel(uint32_t cpu_or_dist, uint64_t offset, uint32_t reg_val) +{ + volatile void *base = cpu_or_dist & DIST_BIT ? GICD_BASE_GVA + : sgi_base_from_redist(gicr_base_cpu(cpu_or_dist)); + writel(reg_val, base + offset); +} + +uint32_t gicv3_getl_fields(uint32_t cpu_or_dist, uint64_t offset, uint32_t mask) +{ + return gicv3_reg_readl(cpu_or_dist, offset) & mask; +} + +void gicv3_setl_fields(uint32_t cpu_or_dist, uint64_t offset, + uint32_t mask, uint32_t reg_val) +{ + uint32_t tmp = gicv3_reg_readl(cpu_or_dist, offset) & ~mask; + + tmp |= (reg_val & mask); + gicv3_reg_writel(cpu_or_dist, offset, tmp); +} + +/* + * We use a single offset for the distributor and redistributor maps as they + * have the same value in both. The only exceptions are registers that only + * exist in one and not the other, like GICR_WAKER that doesn't exist in the + * distributor map. Such registers are conveniently marked as reserved in the + * map that doesn't implement it; like GICR_WAKER's offset of 0x0014 being + * marked as "Reserved" in the Distributor map. + */ +static void gicv3_access_reg(uint32_t intid, uint64_t offset, + uint32_t reg_bits, uint32_t bits_per_field, + bool write, uint32_t *val) +{ + uint32_t cpu = guest_get_vcpuid(); + enum gicv3_intid_range intid_range = get_intid_range(intid); + uint32_t fields_per_reg, index, mask, shift; + uint32_t cpu_or_dist; + + GUEST_ASSERT(bits_per_field <= reg_bits); + GUEST_ASSERT(!write || *val < (1U << bits_per_field)); + /* + * This function does not support 64 bit accesses. Just asserting here + * until we implement readq/writeq. + */ + GUEST_ASSERT(reg_bits == 32); + + fields_per_reg = reg_bits / bits_per_field; + index = intid % fields_per_reg; + shift = index * bits_per_field; + mask = ((1U << bits_per_field) - 1) << shift; + + /* Set offset to the actual register holding intid's config. */ + offset += (intid / fields_per_reg) * (reg_bits / 8); + + cpu_or_dist = (intid_range == SPI_RANGE) ? DIST_BIT : cpu; + + if (write) + gicv3_setl_fields(cpu_or_dist, offset, mask, *val << shift); + *val = gicv3_getl_fields(cpu_or_dist, offset, mask) >> shift; +} + +static void gicv3_write_reg(uint32_t intid, uint64_t offset, + uint32_t reg_bits, uint32_t bits_per_field, uint32_t val) +{ + gicv3_access_reg(intid, offset, reg_bits, + bits_per_field, true, &val); +} + +static uint32_t gicv3_read_reg(uint32_t intid, uint64_t offset, + uint32_t reg_bits, uint32_t bits_per_field) +{ + uint32_t val; + + gicv3_access_reg(intid, offset, reg_bits, + bits_per_field, false, &val); + return val; +} + +static void gicv3_set_priority(uint32_t intid, uint32_t prio) +{ + gicv3_write_reg(intid, GICD_IPRIORITYR, 32, 8, prio); +} + +/* Sets the intid to be level-sensitive or edge-triggered. */ +static void gicv3_irq_set_config(uint32_t intid, bool is_edge) +{ + uint32_t val; + + /* N/A for private interrupts. */ + GUEST_ASSERT(get_intid_range(intid) == SPI_RANGE); + val = is_edge ? 2 : 0; + gicv3_write_reg(intid, GICD_ICFGR, 32, 2, val); +} + +static void gicv3_irq_enable(uint32_t intid) +{ + bool is_spi = get_intid_range(intid) == SPI_RANGE; + uint32_t cpu = guest_get_vcpuid(); + + gicv3_write_reg(intid, GICD_ISENABLER, 32, 1, 1); + gicv3_wait_for_rwp(is_spi ? DIST_BIT : cpu); +} + +static void gicv3_irq_disable(uint32_t intid) +{ + bool is_spi = get_intid_range(intid) == SPI_RANGE; + uint32_t cpu = guest_get_vcpuid(); + + gicv3_write_reg(intid, GICD_ICENABLER, 32, 1, 1); + gicv3_wait_for_rwp(is_spi ? DIST_BIT : cpu); +} + +static void gicv3_irq_set_active(uint32_t intid) +{ + gicv3_write_reg(intid, GICD_ISACTIVER, 32, 1, 1); +} + +static void gicv3_irq_clear_active(uint32_t intid) +{ + gicv3_write_reg(intid, GICD_ICACTIVER, 32, 1, 1); +} + +static bool gicv3_irq_get_active(uint32_t intid) +{ + return gicv3_read_reg(intid, GICD_ISACTIVER, 32, 1); +} + +static void gicv3_irq_set_pending(uint32_t intid) +{ + gicv3_write_reg(intid, GICD_ISPENDR, 32, 1, 1); +} + +static void gicv3_irq_clear_pending(uint32_t intid) +{ + gicv3_write_reg(intid, GICD_ICPENDR, 32, 1, 1); +} + +static bool gicv3_irq_get_pending(uint32_t intid) +{ + return gicv3_read_reg(intid, GICD_ISPENDR, 32, 1); +} + +static void gicv3_enable_redist(volatile void *redist_base) +{ + uint32_t val = readl(redist_base + GICR_WAKER); + unsigned int count = 100000; /* 1s */ + + val &= ~GICR_WAKER_ProcessorSleep; + writel(val, redist_base + GICR_WAKER); + + /* Wait until the processor is 'active' */ + while (readl(redist_base + GICR_WAKER) & GICR_WAKER_ChildrenAsleep) { + GUEST_ASSERT(count--); + udelay(10); + } +} + +static void gicv3_cpu_init(unsigned int cpu) +{ + volatile void *sgi_base; + unsigned int i; + volatile void *redist_base_cpu; + + GUEST_ASSERT(cpu < gicv3_data.nr_cpus); + + redist_base_cpu = gicr_base_cpu(cpu); + sgi_base = sgi_base_from_redist(redist_base_cpu); + + gicv3_enable_redist(redist_base_cpu); + + /* + * Mark all the SGI and PPI interrupts as non-secure Group-1. + * Also, deactivate and disable them. + */ + writel(~0, sgi_base + GICR_IGROUPR0); + writel(~0, sgi_base + GICR_ICACTIVER0); + writel(~0, sgi_base + GICR_ICENABLER0); + + /* Set a default priority for all the SGIs and PPIs */ + for (i = 0; i < 32; i += 4) + writel(GICD_INT_DEF_PRI_X4, + sgi_base + GICR_IPRIORITYR0 + i); + + gicv3_gicr_wait_for_rwp(cpu); + + /* Enable the GIC system register (ICC_*) access */ + write_sysreg_s(read_sysreg_s(SYS_ICC_SRE_EL1) | ICC_SRE_EL1_SRE, + SYS_ICC_SRE_EL1); + + /* Set a default priority threshold */ + write_sysreg_s(ICC_PMR_DEF_PRIO, SYS_ICC_PMR_EL1); + + /* Enable non-secure Group-1 interrupts */ + write_sysreg_s(ICC_IGRPEN1_EL1_MASK, SYS_ICC_IGRPEN1_EL1); +} + +static void gicv3_dist_init(void) +{ + unsigned int i; + + /* Disable the distributor until we set things up */ + writel(0, GICD_BASE_GVA + GICD_CTLR); + gicv3_gicd_wait_for_rwp(); + + /* + * Mark all the SPI interrupts as non-secure Group-1. + * Also, deactivate and disable them. + */ + for (i = 32; i < gicv3_data.nr_spis; i += 32) { + writel(~0, GICD_BASE_GVA + GICD_IGROUPR + i / 8); + writel(~0, GICD_BASE_GVA + GICD_ICACTIVER + i / 8); + writel(~0, GICD_BASE_GVA + GICD_ICENABLER + i / 8); + } + + /* Set a default priority for all the SPIs */ + for (i = 32; i < gicv3_data.nr_spis; i += 4) + writel(GICD_INT_DEF_PRI_X4, + GICD_BASE_GVA + GICD_IPRIORITYR + i); + + /* Wait for the settings to sync-in */ + gicv3_gicd_wait_for_rwp(); + + /* Finally, enable the distributor globally with ARE */ + writel(GICD_CTLR_ARE_NS | GICD_CTLR_ENABLE_G1A | + GICD_CTLR_ENABLE_G1, GICD_BASE_GVA + GICD_CTLR); + gicv3_gicd_wait_for_rwp(); +} + +static void gicv3_init(unsigned int nr_cpus) +{ + GUEST_ASSERT(nr_cpus <= GICV3_MAX_CPUS); + + gicv3_data.nr_cpus = nr_cpus; + gicv3_data.nr_spis = GICD_TYPER_SPIS( + readl(GICD_BASE_GVA + GICD_TYPER)); + if (gicv3_data.nr_spis > 1020) + gicv3_data.nr_spis = 1020; + + /* + * Initialize only the distributor for now. + * The redistributor and CPU interfaces are initialized + * later for every PE. + */ + gicv3_dist_init(); +} + +const struct gic_common_ops gicv3_ops = { + .gic_init = gicv3_init, + .gic_cpu_init = gicv3_cpu_init, + .gic_irq_enable = gicv3_irq_enable, + .gic_irq_disable = gicv3_irq_disable, + .gic_read_iar = gicv3_read_iar, + .gic_write_eoir = gicv3_write_eoir, + .gic_write_dir = gicv3_write_dir, + .gic_set_priority_mask = gicv3_set_priority_mask, + .gic_set_eoi_split = gicv3_set_eoi_split, + .gic_set_priority = gicv3_set_priority, + .gic_irq_set_active = gicv3_irq_set_active, + .gic_irq_clear_active = gicv3_irq_clear_active, + .gic_irq_get_active = gicv3_irq_get_active, + .gic_irq_set_pending = gicv3_irq_set_pending, + .gic_irq_clear_pending = gicv3_irq_clear_pending, + .gic_irq_get_pending = gicv3_irq_get_pending, + .gic_irq_set_config = gicv3_irq_set_config, +}; + +void gic_rdist_enable_lpis(vm_paddr_t cfg_table, size_t cfg_table_size, + vm_paddr_t pend_table) +{ + volatile void *rdist_base = gicr_base_cpu(guest_get_vcpuid()); + + u32 ctlr; + u64 val; + + val = (cfg_table | + GICR_PROPBASER_InnerShareable | + GICR_PROPBASER_RaWaWb | + ((ilog2(cfg_table_size) - 1) & GICR_PROPBASER_IDBITS_MASK)); + writeq_relaxed(val, rdist_base + GICR_PROPBASER); + + val = (pend_table | + GICR_PENDBASER_InnerShareable | + GICR_PENDBASER_RaWaWb); + writeq_relaxed(val, rdist_base + GICR_PENDBASER); + + ctlr = readl_relaxed(rdist_base + GICR_CTLR); + ctlr |= GICR_CTLR_ENABLE_LPIS; + writel_relaxed(ctlr, rdist_base + GICR_CTLR); +} diff --git a/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c b/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c new file mode 100644 index 000000000000..09f270545646 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c @@ -0,0 +1,248 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Guest ITS library, generously donated by drivers/irqchip/irq-gic-v3-its.c + * over in the kernel tree. + */ + +#include +#include +#include +#include + +#include "kvm_util.h" +#include "vgic.h" +#include "gic.h" +#include "gic_v3.h" +#include "processor.h" + +static u64 its_read_u64(unsigned long offset) +{ + return readq_relaxed(GITS_BASE_GVA + offset); +} + +static void its_write_u64(unsigned long offset, u64 val) +{ + writeq_relaxed(val, GITS_BASE_GVA + offset); +} + +static u32 its_read_u32(unsigned long offset) +{ + return readl_relaxed(GITS_BASE_GVA + offset); +} + +static void its_write_u32(unsigned long offset, u32 val) +{ + writel_relaxed(val, GITS_BASE_GVA + offset); +} + +static unsigned long its_find_baser(unsigned int type) +{ + int i; + + for (i = 0; i < GITS_BASER_NR_REGS; i++) { + u64 baser; + unsigned long offset = GITS_BASER + (i * sizeof(baser)); + + baser = its_read_u64(offset); + if (GITS_BASER_TYPE(baser) == type) + return offset; + } + + GUEST_FAIL("Couldn't find an ITS BASER of type %u", type); + return -1; +} + +static void its_install_table(unsigned int type, vm_paddr_t base, size_t size) +{ + unsigned long offset = its_find_baser(type); + u64 baser; + + baser = ((size / SZ_64K) - 1) | + GITS_BASER_PAGE_SIZE_64K | + GITS_BASER_InnerShareable | + base | + GITS_BASER_RaWaWb | + GITS_BASER_VALID; + + its_write_u64(offset, baser); +} + +static void its_install_cmdq(vm_paddr_t base, size_t size) +{ + u64 cbaser; + + cbaser = ((size / SZ_4K) - 1) | + GITS_CBASER_InnerShareable | + base | + GITS_CBASER_RaWaWb | + GITS_CBASER_VALID; + + its_write_u64(GITS_CBASER, cbaser); +} + +void its_init(vm_paddr_t coll_tbl, size_t coll_tbl_sz, + vm_paddr_t device_tbl, size_t device_tbl_sz, + vm_paddr_t cmdq, size_t cmdq_size) +{ + u32 ctlr; + + its_install_table(GITS_BASER_TYPE_COLLECTION, coll_tbl, coll_tbl_sz); + its_install_table(GITS_BASER_TYPE_DEVICE, device_tbl, device_tbl_sz); + its_install_cmdq(cmdq, cmdq_size); + + ctlr = its_read_u32(GITS_CTLR); + ctlr |= GITS_CTLR_ENABLE; + its_write_u32(GITS_CTLR, ctlr); +} + +struct its_cmd_block { + union { + u64 raw_cmd[4]; + __le64 raw_cmd_le[4]; + }; +}; + +static inline void its_fixup_cmd(struct its_cmd_block *cmd) +{ + /* Let's fixup BE commands */ + cmd->raw_cmd_le[0] = cpu_to_le64(cmd->raw_cmd[0]); + cmd->raw_cmd_le[1] = cpu_to_le64(cmd->raw_cmd[1]); + cmd->raw_cmd_le[2] = cpu_to_le64(cmd->raw_cmd[2]); + cmd->raw_cmd_le[3] = cpu_to_le64(cmd->raw_cmd[3]); +} + +static void its_mask_encode(u64 *raw_cmd, u64 val, int h, int l) +{ + u64 mask = GENMASK_ULL(h, l); + *raw_cmd &= ~mask; + *raw_cmd |= (val << l) & mask; +} + +static void its_encode_cmd(struct its_cmd_block *cmd, u8 cmd_nr) +{ + its_mask_encode(&cmd->raw_cmd[0], cmd_nr, 7, 0); +} + +static void its_encode_devid(struct its_cmd_block *cmd, u32 devid) +{ + its_mask_encode(&cmd->raw_cmd[0], devid, 63, 32); +} + +static void its_encode_event_id(struct its_cmd_block *cmd, u32 id) +{ + its_mask_encode(&cmd->raw_cmd[1], id, 31, 0); +} + +static void its_encode_phys_id(struct its_cmd_block *cmd, u32 phys_id) +{ + its_mask_encode(&cmd->raw_cmd[1], phys_id, 63, 32); +} + +static void its_encode_size(struct its_cmd_block *cmd, u8 size) +{ + its_mask_encode(&cmd->raw_cmd[1], size, 4, 0); +} + +static void its_encode_itt(struct its_cmd_block *cmd, u64 itt_addr) +{ + its_mask_encode(&cmd->raw_cmd[2], itt_addr >> 8, 51, 8); +} + +static void its_encode_valid(struct its_cmd_block *cmd, int valid) +{ + its_mask_encode(&cmd->raw_cmd[2], !!valid, 63, 63); +} + +static void its_encode_target(struct its_cmd_block *cmd, u64 target_addr) +{ + its_mask_encode(&cmd->raw_cmd[2], target_addr >> 16, 51, 16); +} + +static void its_encode_collection(struct its_cmd_block *cmd, u16 col) +{ + its_mask_encode(&cmd->raw_cmd[2], col, 15, 0); +} + +#define GITS_CMDQ_POLL_ITERATIONS 0 + +static void its_send_cmd(void *cmdq_base, struct its_cmd_block *cmd) +{ + u64 cwriter = its_read_u64(GITS_CWRITER); + struct its_cmd_block *dst = cmdq_base + cwriter; + u64 cbaser = its_read_u64(GITS_CBASER); + size_t cmdq_size; + u64 next; + int i; + + cmdq_size = ((cbaser & 0xFF) + 1) * SZ_4K; + + its_fixup_cmd(cmd); + + WRITE_ONCE(*dst, *cmd); + dsb(ishst); + next = (cwriter + sizeof(*cmd)) % cmdq_size; + its_write_u64(GITS_CWRITER, next); + + /* + * Polling isn't necessary considering KVM's ITS emulation at the time + * of writing this, as the CMDQ is processed synchronously after a write + * to CWRITER. + */ + for (i = 0; its_read_u64(GITS_CREADR) != next; i++) { + __GUEST_ASSERT(i < GITS_CMDQ_POLL_ITERATIONS, + "ITS didn't process command at offset %lu after %d iterations\n", + cwriter, i); + + cpu_relax(); + } +} + +void its_send_mapd_cmd(void *cmdq_base, u32 device_id, vm_paddr_t itt_base, + size_t itt_size, bool valid) +{ + struct its_cmd_block cmd = {}; + + its_encode_cmd(&cmd, GITS_CMD_MAPD); + its_encode_devid(&cmd, device_id); + its_encode_size(&cmd, ilog2(itt_size) - 1); + its_encode_itt(&cmd, itt_base); + its_encode_valid(&cmd, valid); + + its_send_cmd(cmdq_base, &cmd); +} + +void its_send_mapc_cmd(void *cmdq_base, u32 vcpu_id, u32 collection_id, bool valid) +{ + struct its_cmd_block cmd = {}; + + its_encode_cmd(&cmd, GITS_CMD_MAPC); + its_encode_collection(&cmd, collection_id); + its_encode_target(&cmd, vcpu_id); + its_encode_valid(&cmd, valid); + + its_send_cmd(cmdq_base, &cmd); +} + +void its_send_mapti_cmd(void *cmdq_base, u32 device_id, u32 event_id, + u32 collection_id, u32 intid) +{ + struct its_cmd_block cmd = {}; + + its_encode_cmd(&cmd, GITS_CMD_MAPTI); + its_encode_devid(&cmd, device_id); + its_encode_event_id(&cmd, event_id); + its_encode_phys_id(&cmd, intid); + its_encode_collection(&cmd, collection_id); + + its_send_cmd(cmdq_base, &cmd); +} + +void its_send_invall_cmd(void *cmdq_base, u32 collection_id) +{ + struct its_cmd_block cmd = {}; + + its_encode_cmd(&cmd, GITS_CMD_INVALL); + its_encode_collection(&cmd, collection_id); + + its_send_cmd(cmdq_base, &cmd); +} diff --git a/tools/testing/selftests/kvm/lib/arm64/handlers.S b/tools/testing/selftests/kvm/lib/arm64/handlers.S new file mode 100644 index 000000000000..0e443eadfac6 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/arm64/handlers.S @@ -0,0 +1,126 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +.macro save_registers + add sp, sp, #-16 * 17 + + stp x0, x1, [sp, #16 * 0] + stp x2, x3, [sp, #16 * 1] + stp x4, x5, [sp, #16 * 2] + stp x6, x7, [sp, #16 * 3] + stp x8, x9, [sp, #16 * 4] + stp x10, x11, [sp, #16 * 5] + stp x12, x13, [sp, #16 * 6] + stp x14, x15, [sp, #16 * 7] + stp x16, x17, [sp, #16 * 8] + stp x18, x19, [sp, #16 * 9] + stp x20, x21, [sp, #16 * 10] + stp x22, x23, [sp, #16 * 11] + stp x24, x25, [sp, #16 * 12] + stp x26, x27, [sp, #16 * 13] + stp x28, x29, [sp, #16 * 14] + + /* + * This stores sp_el1 into ex_regs.sp so exception handlers can "look" + * at it. It will _not_ be used to restore the sp on return from the + * exception so handlers can not update it. + */ + add x1, sp, #16 * 17 + stp x30, x1, [sp, #16 * 15] /* x30, SP */ + + mrs x1, elr_el1 + mrs x2, spsr_el1 + stp x1, x2, [sp, #16 * 16] /* PC, PSTATE */ +.endm + +.macro restore_registers + ldp x1, x2, [sp, #16 * 16] /* PC, PSTATE */ + msr elr_el1, x1 + msr spsr_el1, x2 + + /* sp is not restored */ + ldp x30, xzr, [sp, #16 * 15] /* x30, SP */ + + ldp x28, x29, [sp, #16 * 14] + ldp x26, x27, [sp, #16 * 13] + ldp x24, x25, [sp, #16 * 12] + ldp x22, x23, [sp, #16 * 11] + ldp x20, x21, [sp, #16 * 10] + ldp x18, x19, [sp, #16 * 9] + ldp x16, x17, [sp, #16 * 8] + ldp x14, x15, [sp, #16 * 7] + ldp x12, x13, [sp, #16 * 6] + ldp x10, x11, [sp, #16 * 5] + ldp x8, x9, [sp, #16 * 4] + ldp x6, x7, [sp, #16 * 3] + ldp x4, x5, [sp, #16 * 2] + ldp x2, x3, [sp, #16 * 1] + ldp x0, x1, [sp, #16 * 0] + + add sp, sp, #16 * 17 + + eret +.endm + +.pushsection ".entry.text", "ax" +.balign 0x800 +.global vectors +vectors: +.popsection + +.set vector, 0 + +/* + * Build an exception handler for vector and append a jump to it into + * vectors (while making sure that it's 0x80 aligned). + */ +.macro HANDLER, label +handler_\label: + save_registers + mov x0, sp + mov x1, #vector + bl route_exception + restore_registers + +.pushsection ".entry.text", "ax" +.balign 0x80 + b handler_\label +.popsection + +.set vector, vector + 1 +.endm + +.macro HANDLER_INVALID +.pushsection ".entry.text", "ax" +.balign 0x80 +/* This will abort so no need to save and restore registers. */ + mov x0, #vector + mov x1, #0 /* ec */ + mov x2, #0 /* valid_ec */ + b kvm_exit_unexpected_exception +.popsection + +.set vector, vector + 1 +.endm + +/* + * Caution: be sure to not add anything between the declaration of vectors + * above and these macro calls that will build the vectors table below it. + */ + HANDLER_INVALID // Synchronous EL1t + HANDLER_INVALID // IRQ EL1t + HANDLER_INVALID // FIQ EL1t + HANDLER_INVALID // Error EL1t + + HANDLER el1h_sync // Synchronous EL1h + HANDLER el1h_irq // IRQ EL1h + HANDLER el1h_fiq // FIQ EL1h + HANDLER el1h_error // Error EL1h + + HANDLER el0_sync_64 // Synchronous 64-bit EL0 + HANDLER el0_irq_64 // IRQ 64-bit EL0 + HANDLER el0_fiq_64 // FIQ 64-bit EL0 + HANDLER el0_error_64 // Error 64-bit EL0 + + HANDLER el0_sync_32 // Synchronous 32-bit EL0 + HANDLER el0_irq_32 // IRQ 32-bit EL0 + HANDLER el0_fiq_32 // FIQ 32-bit EL0 + HANDLER el0_error_32 // Error 32-bit EL0 diff --git a/tools/testing/selftests/kvm/lib/arm64/processor.c b/tools/testing/selftests/kvm/lib/arm64/processor.c new file mode 100644 index 000000000000..7ba3aa3755f3 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/arm64/processor.c @@ -0,0 +1,647 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * AArch64 code + * + * Copyright (C) 2018, Red Hat, Inc. + */ + +#include +#include + +#include "guest_modes.h" +#include "kvm_util.h" +#include "processor.h" +#include "ucall_common.h" + +#include +#include + +#define DEFAULT_ARM64_GUEST_STACK_VADDR_MIN 0xac0000 + +static vm_vaddr_t exception_handlers; + +static uint64_t page_align(struct kvm_vm *vm, uint64_t v) +{ + return (v + vm->page_size) & ~(vm->page_size - 1); +} + +static uint64_t pgd_index(struct kvm_vm *vm, vm_vaddr_t gva) +{ + unsigned int shift = (vm->pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift; + uint64_t mask = (1UL << (vm->va_bits - shift)) - 1; + + return (gva >> shift) & mask; +} + +static uint64_t pud_index(struct kvm_vm *vm, vm_vaddr_t gva) +{ + unsigned int shift = 2 * (vm->page_shift - 3) + vm->page_shift; + uint64_t mask = (1UL << (vm->page_shift - 3)) - 1; + + TEST_ASSERT(vm->pgtable_levels == 4, + "Mode %d does not have 4 page table levels", vm->mode); + + return (gva >> shift) & mask; +} + +static uint64_t pmd_index(struct kvm_vm *vm, vm_vaddr_t gva) +{ + unsigned int shift = (vm->page_shift - 3) + vm->page_shift; + uint64_t mask = (1UL << (vm->page_shift - 3)) - 1; + + TEST_ASSERT(vm->pgtable_levels >= 3, + "Mode %d does not have >= 3 page table levels", vm->mode); + + return (gva >> shift) & mask; +} + +static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva) +{ + uint64_t mask = (1UL << (vm->page_shift - 3)) - 1; + return (gva >> vm->page_shift) & mask; +} + +static inline bool use_lpa2_pte_format(struct kvm_vm *vm) +{ + return (vm->page_size == SZ_4K || vm->page_size == SZ_16K) && + (vm->pa_bits > 48 || vm->va_bits > 48); +} + +static uint64_t addr_pte(struct kvm_vm *vm, uint64_t pa, uint64_t attrs) +{ + uint64_t pte; + + if (use_lpa2_pte_format(vm)) { + pte = pa & GENMASK(49, vm->page_shift); + pte |= FIELD_GET(GENMASK(51, 50), pa) << 8; + attrs &= ~GENMASK(9, 8); + } else { + pte = pa & GENMASK(47, vm->page_shift); + if (vm->page_shift == 16) + pte |= FIELD_GET(GENMASK(51, 48), pa) << 12; + } + pte |= attrs; + + return pte; +} + +static uint64_t pte_addr(struct kvm_vm *vm, uint64_t pte) +{ + uint64_t pa; + + if (use_lpa2_pte_format(vm)) { + pa = pte & GENMASK(49, vm->page_shift); + pa |= FIELD_GET(GENMASK(9, 8), pte) << 50; + } else { + pa = pte & GENMASK(47, vm->page_shift); + if (vm->page_shift == 16) + pa |= FIELD_GET(GENMASK(15, 12), pte) << 48; + } + + return pa; +} + +static uint64_t ptrs_per_pgd(struct kvm_vm *vm) +{ + unsigned int shift = (vm->pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift; + return 1 << (vm->va_bits - shift); +} + +static uint64_t __maybe_unused ptrs_per_pte(struct kvm_vm *vm) +{ + return 1 << (vm->page_shift - 3); +} + +void virt_arch_pgd_alloc(struct kvm_vm *vm) +{ + size_t nr_pages = page_align(vm, ptrs_per_pgd(vm) * 8) / vm->page_size; + + if (vm->pgd_created) + return; + + vm->pgd = vm_phy_pages_alloc(vm, nr_pages, + KVM_GUEST_PAGE_TABLE_MIN_PADDR, + vm->memslots[MEM_REGION_PT]); + vm->pgd_created = true; +} + +static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, + uint64_t flags) +{ + uint8_t attr_idx = flags & 7; + uint64_t *ptep; + + TEST_ASSERT((vaddr % vm->page_size) == 0, + "Virtual address not on page boundary,\n" + " vaddr: 0x%lx vm->page_size: 0x%x", vaddr, vm->page_size); + TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, + (vaddr >> vm->page_shift)), + "Invalid virtual address, vaddr: 0x%lx", vaddr); + TEST_ASSERT((paddr % vm->page_size) == 0, + "Physical address not on page boundary,\n" + " paddr: 0x%lx vm->page_size: 0x%x", paddr, vm->page_size); + TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn, + "Physical address beyond beyond maximum supported,\n" + " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", + paddr, vm->max_gfn, vm->page_size); + + ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, vaddr) * 8; + if (!*ptep) + *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3); + + switch (vm->pgtable_levels) { + case 4: + ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, vaddr) * 8; + if (!*ptep) + *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3); + /* fall through */ + case 3: + ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, vaddr) * 8; + if (!*ptep) + *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3); + /* fall through */ + case 2: + ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, vaddr) * 8; + break; + default: + TEST_FAIL("Page table levels must be 2, 3, or 4"); + } + + *ptep = addr_pte(vm, paddr, (attr_idx << 2) | (1 << 10) | 3); /* AF */ +} + +void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) +{ + uint64_t attr_idx = MT_NORMAL; + + _virt_pg_map(vm, vaddr, paddr, attr_idx); +} + +uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva) +{ + uint64_t *ptep; + + if (!vm->pgd_created) + goto unmapped_gva; + + ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, gva) * 8; + if (!ptep) + goto unmapped_gva; + + switch (vm->pgtable_levels) { + case 4: + ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, gva) * 8; + if (!ptep) + goto unmapped_gva; + /* fall through */ + case 3: + ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, gva) * 8; + if (!ptep) + goto unmapped_gva; + /* fall through */ + case 2: + ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, gva) * 8; + if (!ptep) + goto unmapped_gva; + break; + default: + TEST_FAIL("Page table levels must be 2, 3, or 4"); + } + + return ptep; + +unmapped_gva: + TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva); + exit(EXIT_FAILURE); +} + +vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) +{ + uint64_t *ptep = virt_get_pte_hva(vm, gva); + + return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1)); +} + +static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, uint64_t page, int level) +{ +#ifdef DEBUG + static const char * const type[] = { "", "pud", "pmd", "pte" }; + uint64_t pte, *ptep; + + if (level == 4) + return; + + for (pte = page; pte < page + ptrs_per_pte(vm) * 8; pte += 8) { + ptep = addr_gpa2hva(vm, pte); + if (!*ptep) + continue; + fprintf(stream, "%*s%s: %lx: %lx at %p\n", indent, "", type[level], pte, *ptep, ptep); + pte_dump(stream, vm, indent + 1, pte_addr(vm, *ptep), level + 1); + } +#endif +} + +void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) +{ + int level = 4 - (vm->pgtable_levels - 1); + uint64_t pgd, *ptep; + + if (!vm->pgd_created) + return; + + for (pgd = vm->pgd; pgd < vm->pgd + ptrs_per_pgd(vm) * 8; pgd += 8) { + ptep = addr_gpa2hva(vm, pgd); + if (!*ptep) + continue; + fprintf(stream, "%*spgd: %lx: %lx at %p\n", indent, "", pgd, *ptep, ptep); + pte_dump(stream, vm, indent + 1, pte_addr(vm, *ptep), level); + } +} + +void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init) +{ + struct kvm_vcpu_init default_init = { .target = -1, }; + struct kvm_vm *vm = vcpu->vm; + uint64_t sctlr_el1, tcr_el1, ttbr0_el1; + + if (!init) + init = &default_init; + + if (init->target == -1) { + struct kvm_vcpu_init preferred; + vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &preferred); + init->target = preferred.target; + } + + vcpu_ioctl(vcpu, KVM_ARM_VCPU_INIT, init); + + /* + * Enable FP/ASIMD to avoid trapping when accessing Q0-Q15 + * registers, which the variable argument list macros do. + */ + vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CPACR_EL1), 3 << 20); + + sctlr_el1 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1)); + tcr_el1 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1)); + + /* Configure base granule size */ + switch (vm->mode) { + case VM_MODE_PXXV48_4K: + TEST_FAIL("AArch64 does not support 4K sized pages " + "with ANY-bit physical address ranges"); + case VM_MODE_P52V48_64K: + case VM_MODE_P48V48_64K: + case VM_MODE_P40V48_64K: + case VM_MODE_P36V48_64K: + tcr_el1 |= 1ul << 14; /* TG0 = 64KB */ + break; + case VM_MODE_P52V48_16K: + case VM_MODE_P48V48_16K: + case VM_MODE_P40V48_16K: + case VM_MODE_P36V48_16K: + case VM_MODE_P36V47_16K: + tcr_el1 |= 2ul << 14; /* TG0 = 16KB */ + break; + case VM_MODE_P52V48_4K: + case VM_MODE_P48V48_4K: + case VM_MODE_P40V48_4K: + case VM_MODE_P36V48_4K: + tcr_el1 |= 0ul << 14; /* TG0 = 4KB */ + break; + default: + TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode); + } + + ttbr0_el1 = vm->pgd & GENMASK(47, vm->page_shift); + + /* Configure output size */ + switch (vm->mode) { + case VM_MODE_P52V48_4K: + case VM_MODE_P52V48_16K: + case VM_MODE_P52V48_64K: + tcr_el1 |= 6ul << 32; /* IPS = 52 bits */ + ttbr0_el1 |= FIELD_GET(GENMASK(51, 48), vm->pgd) << 2; + break; + case VM_MODE_P48V48_4K: + case VM_MODE_P48V48_16K: + case VM_MODE_P48V48_64K: + tcr_el1 |= 5ul << 32; /* IPS = 48 bits */ + break; + case VM_MODE_P40V48_4K: + case VM_MODE_P40V48_16K: + case VM_MODE_P40V48_64K: + tcr_el1 |= 2ul << 32; /* IPS = 40 bits */ + break; + case VM_MODE_P36V48_4K: + case VM_MODE_P36V48_16K: + case VM_MODE_P36V48_64K: + case VM_MODE_P36V47_16K: + tcr_el1 |= 1ul << 32; /* IPS = 36 bits */ + break; + default: + TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode); + } + + sctlr_el1 |= (1 << 0) | (1 << 2) | (1 << 12) /* M | C | I */; + /* TCR_EL1 |= IRGN0:WBWA | ORGN0:WBWA | SH0:Inner-Shareable */; + tcr_el1 |= (1 << 8) | (1 << 10) | (3 << 12); + tcr_el1 |= (64 - vm->va_bits) /* T0SZ */; + if (use_lpa2_pte_format(vm)) + tcr_el1 |= (1ul << 59) /* DS */; + + vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), sctlr_el1); + vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1), tcr_el1); + vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MAIR_EL1), DEFAULT_MAIR_EL1); + vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TTBR0_EL1), ttbr0_el1); + vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TPIDR_EL1), vcpu->id); +} + +void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent) +{ + uint64_t pstate, pc; + + pstate = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pstate)); + pc = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc)); + + fprintf(stream, "%*spstate: 0x%.16lx pc: 0x%.16lx\n", + indent, "", pstate, pc); +} + +void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code) +{ + vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc), (uint64_t)guest_code); +} + +static struct kvm_vcpu *__aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, + struct kvm_vcpu_init *init) +{ + size_t stack_size; + uint64_t stack_vaddr; + struct kvm_vcpu *vcpu = __vm_vcpu_add(vm, vcpu_id); + + stack_size = vm->page_size == 4096 ? DEFAULT_STACK_PGS * vm->page_size : + vm->page_size; + stack_vaddr = __vm_vaddr_alloc(vm, stack_size, + DEFAULT_ARM64_GUEST_STACK_VADDR_MIN, + MEM_REGION_DATA); + + aarch64_vcpu_setup(vcpu, init); + + vcpu_set_reg(vcpu, ARM64_CORE_REG(sp_el1), stack_vaddr + stack_size); + return vcpu; +} + +struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, + struct kvm_vcpu_init *init, void *guest_code) +{ + struct kvm_vcpu *vcpu = __aarch64_vcpu_add(vm, vcpu_id, init); + + vcpu_arch_set_entry_point(vcpu, guest_code); + + return vcpu; +} + +struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) +{ + return __aarch64_vcpu_add(vm, vcpu_id, NULL); +} + +void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...) +{ + va_list ap; + int i; + + TEST_ASSERT(num >= 1 && num <= 8, "Unsupported number of args,\n" + " num: %u", num); + + va_start(ap, num); + + for (i = 0; i < num; i++) { + vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.regs[i]), + va_arg(ap, uint64_t)); + } + + va_end(ap); +} + +void kvm_exit_unexpected_exception(int vector, uint64_t ec, bool valid_ec) +{ + ucall(UCALL_UNHANDLED, 3, vector, ec, valid_ec); + while (1) + ; +} + +void assert_on_unhandled_exception(struct kvm_vcpu *vcpu) +{ + struct ucall uc; + + if (get_ucall(vcpu, &uc) != UCALL_UNHANDLED) + return; + + if (uc.args[2]) /* valid_ec */ { + assert(VECTOR_IS_SYNC(uc.args[0])); + TEST_FAIL("Unexpected exception (vector:0x%lx, ec:0x%lx)", + uc.args[0], uc.args[1]); + } else { + assert(!VECTOR_IS_SYNC(uc.args[0])); + TEST_FAIL("Unexpected exception (vector:0x%lx)", + uc.args[0]); + } +} + +struct handlers { + handler_fn exception_handlers[VECTOR_NUM][ESR_ELx_EC_MAX + 1]; +}; + +void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu) +{ + extern char vectors; + + vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_VBAR_EL1), (uint64_t)&vectors); +} + +void route_exception(struct ex_regs *regs, int vector) +{ + struct handlers *handlers = (struct handlers *)exception_handlers; + bool valid_ec; + int ec = 0; + + switch (vector) { + case VECTOR_SYNC_CURRENT: + case VECTOR_SYNC_LOWER_64: + ec = ESR_ELx_EC(read_sysreg(esr_el1)); + valid_ec = true; + break; + case VECTOR_IRQ_CURRENT: + case VECTOR_IRQ_LOWER_64: + case VECTOR_FIQ_CURRENT: + case VECTOR_FIQ_LOWER_64: + case VECTOR_ERROR_CURRENT: + case VECTOR_ERROR_LOWER_64: + ec = 0; + valid_ec = false; + break; + default: + valid_ec = false; + goto unexpected_exception; + } + + if (handlers && handlers->exception_handlers[vector][ec]) + return handlers->exception_handlers[vector][ec](regs); + +unexpected_exception: + kvm_exit_unexpected_exception(vector, ec, valid_ec); +} + +void vm_init_descriptor_tables(struct kvm_vm *vm) +{ + vm->handlers = __vm_vaddr_alloc(vm, sizeof(struct handlers), + vm->page_size, MEM_REGION_DATA); + + *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers; +} + +void vm_install_sync_handler(struct kvm_vm *vm, int vector, int ec, + void (*handler)(struct ex_regs *)) +{ + struct handlers *handlers = addr_gva2hva(vm, vm->handlers); + + assert(VECTOR_IS_SYNC(vector)); + assert(vector < VECTOR_NUM); + assert(ec <= ESR_ELx_EC_MAX); + handlers->exception_handlers[vector][ec] = handler; +} + +void vm_install_exception_handler(struct kvm_vm *vm, int vector, + void (*handler)(struct ex_regs *)) +{ + struct handlers *handlers = addr_gva2hva(vm, vm->handlers); + + assert(!VECTOR_IS_SYNC(vector)); + assert(vector < VECTOR_NUM); + handlers->exception_handlers[vector][0] = handler; +} + +uint32_t guest_get_vcpuid(void) +{ + return read_sysreg(tpidr_el1); +} + +static uint32_t max_ipa_for_page_size(uint32_t vm_ipa, uint32_t gran, + uint32_t not_sup_val, uint32_t ipa52_min_val) +{ + if (gran == not_sup_val) + return 0; + else if (gran >= ipa52_min_val && vm_ipa >= 52) + return 52; + else + return min(vm_ipa, 48U); +} + +void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k, + uint32_t *ipa16k, uint32_t *ipa64k) +{ + struct kvm_vcpu_init preferred_init; + int kvm_fd, vm_fd, vcpu_fd, err; + uint64_t val; + uint32_t gran; + struct kvm_one_reg reg = { + .id = KVM_ARM64_SYS_REG(SYS_ID_AA64MMFR0_EL1), + .addr = (uint64_t)&val, + }; + + kvm_fd = open_kvm_dev_path_or_exit(); + vm_fd = __kvm_ioctl(kvm_fd, KVM_CREATE_VM, (void *)(unsigned long)ipa); + TEST_ASSERT(vm_fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VM, vm_fd)); + + vcpu_fd = ioctl(vm_fd, KVM_CREATE_VCPU, 0); + TEST_ASSERT(vcpu_fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VCPU, vcpu_fd)); + + err = ioctl(vm_fd, KVM_ARM_PREFERRED_TARGET, &preferred_init); + TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_ARM_PREFERRED_TARGET, err)); + err = ioctl(vcpu_fd, KVM_ARM_VCPU_INIT, &preferred_init); + TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_ARM_VCPU_INIT, err)); + + err = ioctl(vcpu_fd, KVM_GET_ONE_REG, ®); + TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_GET_ONE_REG, vcpu_fd)); + + gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN4), val); + *ipa4k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN4_NI, + ID_AA64MMFR0_EL1_TGRAN4_52_BIT); + + gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN64), val); + *ipa64k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN64_NI, + ID_AA64MMFR0_EL1_TGRAN64_IMP); + + gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN16), val); + *ipa16k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN16_NI, + ID_AA64MMFR0_EL1_TGRAN16_52_BIT); + + close(vcpu_fd); + close(vm_fd); + close(kvm_fd); +} + +#define __smccc_call(insn, function_id, arg0, arg1, arg2, arg3, arg4, arg5, \ + arg6, res) \ + asm volatile("mov w0, %w[function_id]\n" \ + "mov x1, %[arg0]\n" \ + "mov x2, %[arg1]\n" \ + "mov x3, %[arg2]\n" \ + "mov x4, %[arg3]\n" \ + "mov x5, %[arg4]\n" \ + "mov x6, %[arg5]\n" \ + "mov x7, %[arg6]\n" \ + #insn "#0\n" \ + "mov %[res0], x0\n" \ + "mov %[res1], x1\n" \ + "mov %[res2], x2\n" \ + "mov %[res3], x3\n" \ + : [res0] "=r"(res->a0), [res1] "=r"(res->a1), \ + [res2] "=r"(res->a2), [res3] "=r"(res->a3) \ + : [function_id] "r"(function_id), [arg0] "r"(arg0), \ + [arg1] "r"(arg1), [arg2] "r"(arg2), [arg3] "r"(arg3), \ + [arg4] "r"(arg4), [arg5] "r"(arg5), [arg6] "r"(arg6) \ + : "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7") + + +void smccc_hvc(uint32_t function_id, uint64_t arg0, uint64_t arg1, + uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5, + uint64_t arg6, struct arm_smccc_res *res) +{ + __smccc_call(hvc, function_id, arg0, arg1, arg2, arg3, arg4, arg5, + arg6, res); +} + +void smccc_smc(uint32_t function_id, uint64_t arg0, uint64_t arg1, + uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5, + uint64_t arg6, struct arm_smccc_res *res) +{ + __smccc_call(smc, function_id, arg0, arg1, arg2, arg3, arg4, arg5, + arg6, res); +} + +void kvm_selftest_arch_init(void) +{ + /* + * arm64 doesn't have a true default mode, so start by computing the + * available IPA space and page sizes early. + */ + guest_modes_append_default(); +} + +void vm_vaddr_populate_bitmap(struct kvm_vm *vm) +{ + /* + * arm64 selftests use only TTBR0_EL1, meaning that the valid VA space + * is [0, 2^(64 - TCR_EL1.T0SZ)). + */ + sparsebit_set_num(vm->vpages_valid, 0, + (1ULL << vm->va_bits) >> vm->page_shift); +} + +/* Helper to call wfi instruction. */ +void wfi(void) +{ + asm volatile("wfi"); +} diff --git a/tools/testing/selftests/kvm/lib/arm64/spinlock.c b/tools/testing/selftests/kvm/lib/arm64/spinlock.c new file mode 100644 index 000000000000..a076e780be5d --- /dev/null +++ b/tools/testing/selftests/kvm/lib/arm64/spinlock.c @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ARM64 Spinlock support + */ +#include + +#include "spinlock.h" + +void spin_lock(struct spinlock *lock) +{ + int val, res; + + asm volatile( + "1: ldaxr %w0, [%2]\n" + " cbnz %w0, 1b\n" + " mov %w0, #1\n" + " stxr %w1, %w0, [%2]\n" + " cbnz %w1, 1b\n" + : "=&r" (val), "=&r" (res) + : "r" (&lock->v) + : "memory"); +} + +void spin_unlock(struct spinlock *lock) +{ + asm volatile("stlr wzr, [%0]\n" : : "r" (&lock->v) : "memory"); +} diff --git a/tools/testing/selftests/kvm/lib/arm64/ucall.c b/tools/testing/selftests/kvm/lib/arm64/ucall.c new file mode 100644 index 000000000000..ddab0ce89d4d --- /dev/null +++ b/tools/testing/selftests/kvm/lib/arm64/ucall.c @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ucall support. A ucall is a "hypercall to userspace". + * + * Copyright (C) 2018, Red Hat, Inc. + */ +#include "kvm_util.h" + +vm_vaddr_t *ucall_exit_mmio_addr; + +void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) +{ + vm_vaddr_t mmio_gva = vm_vaddr_unused_gap(vm, vm->page_size, KVM_UTIL_MIN_VADDR); + + virt_map(vm, mmio_gva, mmio_gpa, 1); + + vm->ucall_mmio_addr = mmio_gpa; + + write_guest_global(vm, ucall_exit_mmio_addr, (vm_vaddr_t *)mmio_gva); +} + +void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + + if (run->exit_reason == KVM_EXIT_MMIO && + run->mmio.phys_addr == vcpu->vm->ucall_mmio_addr) { + TEST_ASSERT(run->mmio.is_write && run->mmio.len == sizeof(uint64_t), + "Unexpected ucall exit mmio address access"); + return (void *)(*((uint64_t *)run->mmio.data)); + } + + return NULL; +} diff --git a/tools/testing/selftests/kvm/lib/arm64/vgic.c b/tools/testing/selftests/kvm/lib/arm64/vgic.c new file mode 100644 index 000000000000..4427f43f73ea --- /dev/null +++ b/tools/testing/selftests/kvm/lib/arm64/vgic.c @@ -0,0 +1,188 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ARM Generic Interrupt Controller (GIC) v3 host support + */ + +#include +#include +#include +#include +#include +#include + +#include "kvm_util.h" +#include "vgic.h" +#include "gic.h" +#include "gic_v3.h" + +/* + * vGIC-v3 default host setup + * + * Input args: + * vm - KVM VM + * nr_vcpus - Number of vCPUs supported by this VM + * + * Output args: None + * + * Return: GIC file-descriptor or negative error code upon failure + * + * The function creates a vGIC-v3 device and maps the distributor and + * redistributor regions of the guest. Since it depends on the number of + * vCPUs for the VM, it must be called after all the vCPUs have been created. + */ +int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs) +{ + int gic_fd; + uint64_t attr; + struct list_head *iter; + unsigned int nr_gic_pages, nr_vcpus_created = 0; + + TEST_ASSERT(nr_vcpus, "Number of vCPUs cannot be empty"); + + /* + * Make sure that the caller is infact calling this + * function after all the vCPUs are added. + */ + list_for_each(iter, &vm->vcpus) + nr_vcpus_created++; + TEST_ASSERT(nr_vcpus == nr_vcpus_created, + "Number of vCPUs requested (%u) doesn't match with the ones created for the VM (%u)", + nr_vcpus, nr_vcpus_created); + + /* Distributor setup */ + gic_fd = __kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_V3); + if (gic_fd < 0) + return gic_fd; + + kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_NR_IRQS, 0, &nr_irqs); + + kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, + KVM_DEV_ARM_VGIC_CTRL_INIT, NULL); + + attr = GICD_BASE_GPA; + kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_DIST, &attr); + nr_gic_pages = vm_calc_num_guest_pages(vm->mode, KVM_VGIC_V3_DIST_SIZE); + virt_map(vm, GICD_BASE_GPA, GICD_BASE_GPA, nr_gic_pages); + + /* Redistributor setup */ + attr = REDIST_REGION_ATTR_ADDR(nr_vcpus, GICR_BASE_GPA, 0, 0); + kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &attr); + nr_gic_pages = vm_calc_num_guest_pages(vm->mode, + KVM_VGIC_V3_REDIST_SIZE * nr_vcpus); + virt_map(vm, GICR_BASE_GPA, GICR_BASE_GPA, nr_gic_pages); + + kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, + KVM_DEV_ARM_VGIC_CTRL_INIT, NULL); + + return gic_fd; +} + +/* should only work for level sensitive interrupts */ +int _kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level) +{ + uint64_t attr = 32 * (intid / 32); + uint64_t index = intid % 32; + uint64_t val; + int ret; + + ret = __kvm_device_attr_get(gic_fd, KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO, + attr, &val); + if (ret != 0) + return ret; + + val |= 1U << index; + ret = __kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO, + attr, &val); + return ret; +} + +void kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level) +{ + int ret = _kvm_irq_set_level_info(gic_fd, intid, level); + + TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO, ret)); +} + +int _kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level) +{ + uint32_t irq = intid & KVM_ARM_IRQ_NUM_MASK; + + TEST_ASSERT(!INTID_IS_SGI(intid), "KVM_IRQ_LINE's interface itself " + "doesn't allow injecting SGIs. There's no mask for it."); + + if (INTID_IS_PPI(intid)) + irq |= KVM_ARM_IRQ_TYPE_PPI << KVM_ARM_IRQ_TYPE_SHIFT; + else + irq |= KVM_ARM_IRQ_TYPE_SPI << KVM_ARM_IRQ_TYPE_SHIFT; + + return _kvm_irq_line(vm, irq, level); +} + +void kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level) +{ + int ret = _kvm_arm_irq_line(vm, intid, level); + + TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_IRQ_LINE, ret)); +} + +static void vgic_poke_irq(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu, + uint64_t reg_off) +{ + uint64_t reg = intid / 32; + uint64_t index = intid % 32; + uint64_t attr = reg_off + reg * 4; + uint64_t val; + bool intid_is_private = INTID_IS_SGI(intid) || INTID_IS_PPI(intid); + + uint32_t group = intid_is_private ? KVM_DEV_ARM_VGIC_GRP_REDIST_REGS + : KVM_DEV_ARM_VGIC_GRP_DIST_REGS; + + if (intid_is_private) { + /* TODO: only vcpu 0 implemented for now. */ + assert(vcpu->id == 0); + attr += SZ_64K; + } + + /* Check that the addr part of the attr is within 32 bits. */ + assert((attr & ~KVM_DEV_ARM_VGIC_OFFSET_MASK) == 0); + + /* + * All calls will succeed, even with invalid intid's, as long as the + * addr part of the attr is within 32 bits (checked above). An invalid + * intid will just make the read/writes point to above the intended + * register space (i.e., ICPENDR after ISPENDR). + */ + kvm_device_attr_get(gic_fd, group, attr, &val); + val |= 1ULL << index; + kvm_device_attr_set(gic_fd, group, attr, &val); +} + +void kvm_irq_write_ispendr(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu) +{ + vgic_poke_irq(gic_fd, intid, vcpu, GICD_ISPENDR); +} + +void kvm_irq_write_isactiver(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu) +{ + vgic_poke_irq(gic_fd, intid, vcpu, GICD_ISACTIVER); +} + +int vgic_its_setup(struct kvm_vm *vm) +{ + int its_fd = kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_ITS); + u64 attr; + + attr = GITS_BASE_GPA; + kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_ITS_ADDR_TYPE, &attr); + + kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, + KVM_DEV_ARM_VGIC_CTRL_INIT, NULL); + + virt_map(vm, GITS_BASE_GPA, GITS_BASE_GPA, + vm_calc_num_guest_pages(vm->mode, KVM_VGIC_V3_ITS_SIZE)); + + return its_fd; +} diff --git a/tools/testing/selftests/kvm/lib/s390/diag318_test_handler.c b/tools/testing/selftests/kvm/lib/s390/diag318_test_handler.c new file mode 100644 index 000000000000..2c432fa164f1 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/s390/diag318_test_handler.c @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Test handler for the s390x DIAGNOSE 0x0318 instruction. + * + * Copyright (C) 2020, IBM + */ + +#include "test_util.h" +#include "kvm_util.h" + +#define ICPT_INSTRUCTION 0x04 +#define IPA0_DIAG 0x8300 + +static void guest_code(void) +{ + uint64_t diag318_info = 0x12345678; + + asm volatile ("diag %0,0,0x318\n" : : "d" (diag318_info)); +} + +/* + * The DIAGNOSE 0x0318 instruction call must be handled via userspace. As such, + * we create an ad-hoc VM here to handle the instruction then extract the + * necessary data. It is up to the caller to decide what to do with that data. + */ +static uint64_t diag318_handler(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + struct kvm_run *run; + uint64_t reg; + uint64_t diag318_info; + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + vcpu_run(vcpu); + run = vcpu->run; + + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC); + TEST_ASSERT(run->s390_sieic.icptcode == ICPT_INSTRUCTION, + "Unexpected intercept code: 0x%x", run->s390_sieic.icptcode); + TEST_ASSERT((run->s390_sieic.ipa & 0xff00) == IPA0_DIAG, + "Unexpected IPA0 code: 0x%x", (run->s390_sieic.ipa & 0xff00)); + + reg = (run->s390_sieic.ipa & 0x00f0) >> 4; + diag318_info = run->s.regs.gprs[reg]; + + TEST_ASSERT(diag318_info != 0, "DIAGNOSE 0x0318 info not set"); + + kvm_vm_free(vm); + + return diag318_info; +} + +uint64_t get_diag318_info(void) +{ + static uint64_t diag318_info; + static bool printed_skip; + + /* + * If KVM does not support diag318, then return 0 to + * ensure tests do not break. + */ + if (!kvm_has_cap(KVM_CAP_S390_DIAG318)) { + if (!printed_skip) { + fprintf(stdout, "KVM_CAP_S390_DIAG318 not supported. " + "Skipping diag318 test.\n"); + printed_skip = true; + } + return 0; + } + + /* + * If a test has previously requested the diag318 info, + * then don't bother spinning up a temporary VM again. + */ + if (!diag318_info) + diag318_info = diag318_handler(); + + return diag318_info; +} diff --git a/tools/testing/selftests/kvm/lib/s390/facility.c b/tools/testing/selftests/kvm/lib/s390/facility.c new file mode 100644 index 000000000000..d540812d911a --- /dev/null +++ b/tools/testing/selftests/kvm/lib/s390/facility.c @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright IBM Corp. 2024 + * + * Authors: + * Hariharan Mari + * + * Contains the definition for the global variables to have the test facitlity feature. + */ + +#include "facility.h" + +uint64_t stfl_doublewords[NB_STFL_DOUBLEWORDS]; +bool stfle_flag; diff --git a/tools/testing/selftests/kvm/lib/s390/processor.c b/tools/testing/selftests/kvm/lib/s390/processor.c new file mode 100644 index 000000000000..20cfe970e3e3 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/s390/processor.c @@ -0,0 +1,223 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * KVM selftest s390x library code - CPU-related functions (page tables...) + * + * Copyright (C) 2019, Red Hat, Inc. + */ + +#include "processor.h" +#include "kvm_util.h" + +#define PAGES_PER_REGION 4 + +void virt_arch_pgd_alloc(struct kvm_vm *vm) +{ + vm_paddr_t paddr; + + TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x", + vm->page_size); + + if (vm->pgd_created) + return; + + paddr = vm_phy_pages_alloc(vm, PAGES_PER_REGION, + KVM_GUEST_PAGE_TABLE_MIN_PADDR, + vm->memslots[MEM_REGION_PT]); + memset(addr_gpa2hva(vm, paddr), 0xff, PAGES_PER_REGION * vm->page_size); + + vm->pgd = paddr; + vm->pgd_created = true; +} + +/* + * Allocate 4 pages for a region/segment table (ri < 4), or one page for + * a page table (ri == 4). Returns a suitable region/segment table entry + * which points to the freshly allocated pages. + */ +static uint64_t virt_alloc_region(struct kvm_vm *vm, int ri) +{ + uint64_t taddr; + + taddr = vm_phy_pages_alloc(vm, ri < 4 ? PAGES_PER_REGION : 1, + KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0); + memset(addr_gpa2hva(vm, taddr), 0xff, PAGES_PER_REGION * vm->page_size); + + return (taddr & REGION_ENTRY_ORIGIN) + | (((4 - ri) << 2) & REGION_ENTRY_TYPE) + | ((ri < 4 ? (PAGES_PER_REGION - 1) : 0) & REGION_ENTRY_LENGTH); +} + +void virt_arch_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa) +{ + int ri, idx; + uint64_t *entry; + + TEST_ASSERT((gva % vm->page_size) == 0, + "Virtual address not on page boundary,\n" + " vaddr: 0x%lx vm->page_size: 0x%x", + gva, vm->page_size); + TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, + (gva >> vm->page_shift)), + "Invalid virtual address, vaddr: 0x%lx", + gva); + TEST_ASSERT((gpa % vm->page_size) == 0, + "Physical address not on page boundary,\n" + " paddr: 0x%lx vm->page_size: 0x%x", + gva, vm->page_size); + TEST_ASSERT((gpa >> vm->page_shift) <= vm->max_gfn, + "Physical address beyond beyond maximum supported,\n" + " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", + gva, vm->max_gfn, vm->page_size); + + /* Walk through region and segment tables */ + entry = addr_gpa2hva(vm, vm->pgd); + for (ri = 1; ri <= 4; ri++) { + idx = (gva >> (64 - 11 * ri)) & 0x7ffu; + if (entry[idx] & REGION_ENTRY_INVALID) + entry[idx] = virt_alloc_region(vm, ri); + entry = addr_gpa2hva(vm, entry[idx] & REGION_ENTRY_ORIGIN); + } + + /* Fill in page table entry */ + idx = (gva >> PAGE_SHIFT) & 0x0ffu; /* page index */ + if (!(entry[idx] & PAGE_INVALID)) + fprintf(stderr, + "WARNING: PTE for gpa=0x%"PRIx64" already set!\n", gpa); + entry[idx] = gpa; +} + +vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) +{ + int ri, idx; + uint64_t *entry; + + TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x", + vm->page_size); + + entry = addr_gpa2hva(vm, vm->pgd); + for (ri = 1; ri <= 4; ri++) { + idx = (gva >> (64 - 11 * ri)) & 0x7ffu; + TEST_ASSERT(!(entry[idx] & REGION_ENTRY_INVALID), + "No region mapping for vm virtual address 0x%lx", + gva); + entry = addr_gpa2hva(vm, entry[idx] & REGION_ENTRY_ORIGIN); + } + + idx = (gva >> PAGE_SHIFT) & 0x0ffu; /* page index */ + + TEST_ASSERT(!(entry[idx] & PAGE_INVALID), + "No page mapping for vm virtual address 0x%lx", gva); + + return (entry[idx] & ~0xffful) + (gva & 0xffful); +} + +static void virt_dump_ptes(FILE *stream, struct kvm_vm *vm, uint8_t indent, + uint64_t ptea_start) +{ + uint64_t *pte, ptea; + + for (ptea = ptea_start; ptea < ptea_start + 0x100 * 8; ptea += 8) { + pte = addr_gpa2hva(vm, ptea); + if (*pte & PAGE_INVALID) + continue; + fprintf(stream, "%*spte @ 0x%lx: 0x%016lx\n", + indent, "", ptea, *pte); + } +} + +static void virt_dump_region(FILE *stream, struct kvm_vm *vm, uint8_t indent, + uint64_t reg_tab_addr) +{ + uint64_t addr, *entry; + + for (addr = reg_tab_addr; addr < reg_tab_addr + 0x400 * 8; addr += 8) { + entry = addr_gpa2hva(vm, addr); + if (*entry & REGION_ENTRY_INVALID) + continue; + fprintf(stream, "%*srt%lde @ 0x%lx: 0x%016lx\n", + indent, "", 4 - ((*entry & REGION_ENTRY_TYPE) >> 2), + addr, *entry); + if (*entry & REGION_ENTRY_TYPE) { + virt_dump_region(stream, vm, indent + 2, + *entry & REGION_ENTRY_ORIGIN); + } else { + virt_dump_ptes(stream, vm, indent + 2, + *entry & REGION_ENTRY_ORIGIN); + } + } +} + +void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) +{ + if (!vm->pgd_created) + return; + + virt_dump_region(stream, vm, indent, vm->pgd); +} + +void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code) +{ + vcpu->run->psw_addr = (uintptr_t)guest_code; +} + +struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) +{ + size_t stack_size = DEFAULT_STACK_PGS * getpagesize(); + uint64_t stack_vaddr; + struct kvm_regs regs; + struct kvm_sregs sregs; + struct kvm_vcpu *vcpu; + + TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x", + vm->page_size); + + stack_vaddr = __vm_vaddr_alloc(vm, stack_size, + DEFAULT_GUEST_STACK_VADDR_MIN, + MEM_REGION_DATA); + + vcpu = __vm_vcpu_add(vm, vcpu_id); + + /* Setup guest registers */ + vcpu_regs_get(vcpu, ®s); + regs.gprs[15] = stack_vaddr + (DEFAULT_STACK_PGS * getpagesize()) - 160; + vcpu_regs_set(vcpu, ®s); + + vcpu_sregs_get(vcpu, &sregs); + sregs.crs[0] |= 0x00040000; /* Enable floating point regs */ + sregs.crs[1] = vm->pgd | 0xf; /* Primary region table */ + vcpu_sregs_set(vcpu, &sregs); + + vcpu->run->psw_mask = 0x0400000180000000ULL; /* DAT enabled + 64 bit mode */ + + return vcpu; +} + +void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...) +{ + va_list ap; + struct kvm_regs regs; + int i; + + TEST_ASSERT(num >= 1 && num <= 5, "Unsupported number of args,\n" + " num: %u", + num); + + va_start(ap, num); + vcpu_regs_get(vcpu, ®s); + + for (i = 0; i < num; i++) + regs.gprs[i + 2] = va_arg(ap, uint64_t); + + vcpu_regs_set(vcpu, ®s); + va_end(ap); +} + +void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent) +{ + fprintf(stream, "%*spstate: psw: 0x%.16llx:0x%.16llx\n", + indent, "", vcpu->run->psw_mask, vcpu->run->psw_addr); +} + +void assert_on_unhandled_exception(struct kvm_vcpu *vcpu) +{ +} diff --git a/tools/testing/selftests/kvm/lib/s390/ucall.c b/tools/testing/selftests/kvm/lib/s390/ucall.c new file mode 100644 index 000000000000..cca98734653d --- /dev/null +++ b/tools/testing/selftests/kvm/lib/s390/ucall.c @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ucall support. A ucall is a "hypercall to userspace". + * + * Copyright (C) 2019 Red Hat, Inc. + */ +#include "kvm_util.h" + +void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + + if (run->exit_reason == KVM_EXIT_S390_SIEIC && + run->s390_sieic.icptcode == 4 && + (run->s390_sieic.ipa >> 8) == 0x83 && /* 0x83 means DIAGNOSE */ + (run->s390_sieic.ipb >> 16) == 0x501) { + int reg = run->s390_sieic.ipa & 0xf; + + return (void *)run->s.regs.gprs[reg]; + } + return NULL; +} diff --git a/tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c b/tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c deleted file mode 100644 index 2c432fa164f1..000000000000 --- a/tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c +++ /dev/null @@ -1,80 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Test handler for the s390x DIAGNOSE 0x0318 instruction. - * - * Copyright (C) 2020, IBM - */ - -#include "test_util.h" -#include "kvm_util.h" - -#define ICPT_INSTRUCTION 0x04 -#define IPA0_DIAG 0x8300 - -static void guest_code(void) -{ - uint64_t diag318_info = 0x12345678; - - asm volatile ("diag %0,0,0x318\n" : : "d" (diag318_info)); -} - -/* - * The DIAGNOSE 0x0318 instruction call must be handled via userspace. As such, - * we create an ad-hoc VM here to handle the instruction then extract the - * necessary data. It is up to the caller to decide what to do with that data. - */ -static uint64_t diag318_handler(void) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - struct kvm_run *run; - uint64_t reg; - uint64_t diag318_info; - - vm = vm_create_with_one_vcpu(&vcpu, guest_code); - vcpu_run(vcpu); - run = vcpu->run; - - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC); - TEST_ASSERT(run->s390_sieic.icptcode == ICPT_INSTRUCTION, - "Unexpected intercept code: 0x%x", run->s390_sieic.icptcode); - TEST_ASSERT((run->s390_sieic.ipa & 0xff00) == IPA0_DIAG, - "Unexpected IPA0 code: 0x%x", (run->s390_sieic.ipa & 0xff00)); - - reg = (run->s390_sieic.ipa & 0x00f0) >> 4; - diag318_info = run->s.regs.gprs[reg]; - - TEST_ASSERT(diag318_info != 0, "DIAGNOSE 0x0318 info not set"); - - kvm_vm_free(vm); - - return diag318_info; -} - -uint64_t get_diag318_info(void) -{ - static uint64_t diag318_info; - static bool printed_skip; - - /* - * If KVM does not support diag318, then return 0 to - * ensure tests do not break. - */ - if (!kvm_has_cap(KVM_CAP_S390_DIAG318)) { - if (!printed_skip) { - fprintf(stdout, "KVM_CAP_S390_DIAG318 not supported. " - "Skipping diag318 test.\n"); - printed_skip = true; - } - return 0; - } - - /* - * If a test has previously requested the diag318 info, - * then don't bother spinning up a temporary VM again. - */ - if (!diag318_info) - diag318_info = diag318_handler(); - - return diag318_info; -} diff --git a/tools/testing/selftests/kvm/lib/s390x/facility.c b/tools/testing/selftests/kvm/lib/s390x/facility.c deleted file mode 100644 index d540812d911a..000000000000 --- a/tools/testing/selftests/kvm/lib/s390x/facility.c +++ /dev/null @@ -1,14 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright IBM Corp. 2024 - * - * Authors: - * Hariharan Mari - * - * Contains the definition for the global variables to have the test facitlity feature. - */ - -#include "facility.h" - -uint64_t stfl_doublewords[NB_STFL_DOUBLEWORDS]; -bool stfle_flag; diff --git a/tools/testing/selftests/kvm/lib/s390x/processor.c b/tools/testing/selftests/kvm/lib/s390x/processor.c deleted file mode 100644 index 20cfe970e3e3..000000000000 --- a/tools/testing/selftests/kvm/lib/s390x/processor.c +++ /dev/null @@ -1,223 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * KVM selftest s390x library code - CPU-related functions (page tables...) - * - * Copyright (C) 2019, Red Hat, Inc. - */ - -#include "processor.h" -#include "kvm_util.h" - -#define PAGES_PER_REGION 4 - -void virt_arch_pgd_alloc(struct kvm_vm *vm) -{ - vm_paddr_t paddr; - - TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x", - vm->page_size); - - if (vm->pgd_created) - return; - - paddr = vm_phy_pages_alloc(vm, PAGES_PER_REGION, - KVM_GUEST_PAGE_TABLE_MIN_PADDR, - vm->memslots[MEM_REGION_PT]); - memset(addr_gpa2hva(vm, paddr), 0xff, PAGES_PER_REGION * vm->page_size); - - vm->pgd = paddr; - vm->pgd_created = true; -} - -/* - * Allocate 4 pages for a region/segment table (ri < 4), or one page for - * a page table (ri == 4). Returns a suitable region/segment table entry - * which points to the freshly allocated pages. - */ -static uint64_t virt_alloc_region(struct kvm_vm *vm, int ri) -{ - uint64_t taddr; - - taddr = vm_phy_pages_alloc(vm, ri < 4 ? PAGES_PER_REGION : 1, - KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0); - memset(addr_gpa2hva(vm, taddr), 0xff, PAGES_PER_REGION * vm->page_size); - - return (taddr & REGION_ENTRY_ORIGIN) - | (((4 - ri) << 2) & REGION_ENTRY_TYPE) - | ((ri < 4 ? (PAGES_PER_REGION - 1) : 0) & REGION_ENTRY_LENGTH); -} - -void virt_arch_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa) -{ - int ri, idx; - uint64_t *entry; - - TEST_ASSERT((gva % vm->page_size) == 0, - "Virtual address not on page boundary,\n" - " vaddr: 0x%lx vm->page_size: 0x%x", - gva, vm->page_size); - TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, - (gva >> vm->page_shift)), - "Invalid virtual address, vaddr: 0x%lx", - gva); - TEST_ASSERT((gpa % vm->page_size) == 0, - "Physical address not on page boundary,\n" - " paddr: 0x%lx vm->page_size: 0x%x", - gva, vm->page_size); - TEST_ASSERT((gpa >> vm->page_shift) <= vm->max_gfn, - "Physical address beyond beyond maximum supported,\n" - " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", - gva, vm->max_gfn, vm->page_size); - - /* Walk through region and segment tables */ - entry = addr_gpa2hva(vm, vm->pgd); - for (ri = 1; ri <= 4; ri++) { - idx = (gva >> (64 - 11 * ri)) & 0x7ffu; - if (entry[idx] & REGION_ENTRY_INVALID) - entry[idx] = virt_alloc_region(vm, ri); - entry = addr_gpa2hva(vm, entry[idx] & REGION_ENTRY_ORIGIN); - } - - /* Fill in page table entry */ - idx = (gva >> PAGE_SHIFT) & 0x0ffu; /* page index */ - if (!(entry[idx] & PAGE_INVALID)) - fprintf(stderr, - "WARNING: PTE for gpa=0x%"PRIx64" already set!\n", gpa); - entry[idx] = gpa; -} - -vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) -{ - int ri, idx; - uint64_t *entry; - - TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x", - vm->page_size); - - entry = addr_gpa2hva(vm, vm->pgd); - for (ri = 1; ri <= 4; ri++) { - idx = (gva >> (64 - 11 * ri)) & 0x7ffu; - TEST_ASSERT(!(entry[idx] & REGION_ENTRY_INVALID), - "No region mapping for vm virtual address 0x%lx", - gva); - entry = addr_gpa2hva(vm, entry[idx] & REGION_ENTRY_ORIGIN); - } - - idx = (gva >> PAGE_SHIFT) & 0x0ffu; /* page index */ - - TEST_ASSERT(!(entry[idx] & PAGE_INVALID), - "No page mapping for vm virtual address 0x%lx", gva); - - return (entry[idx] & ~0xffful) + (gva & 0xffful); -} - -static void virt_dump_ptes(FILE *stream, struct kvm_vm *vm, uint8_t indent, - uint64_t ptea_start) -{ - uint64_t *pte, ptea; - - for (ptea = ptea_start; ptea < ptea_start + 0x100 * 8; ptea += 8) { - pte = addr_gpa2hva(vm, ptea); - if (*pte & PAGE_INVALID) - continue; - fprintf(stream, "%*spte @ 0x%lx: 0x%016lx\n", - indent, "", ptea, *pte); - } -} - -static void virt_dump_region(FILE *stream, struct kvm_vm *vm, uint8_t indent, - uint64_t reg_tab_addr) -{ - uint64_t addr, *entry; - - for (addr = reg_tab_addr; addr < reg_tab_addr + 0x400 * 8; addr += 8) { - entry = addr_gpa2hva(vm, addr); - if (*entry & REGION_ENTRY_INVALID) - continue; - fprintf(stream, "%*srt%lde @ 0x%lx: 0x%016lx\n", - indent, "", 4 - ((*entry & REGION_ENTRY_TYPE) >> 2), - addr, *entry); - if (*entry & REGION_ENTRY_TYPE) { - virt_dump_region(stream, vm, indent + 2, - *entry & REGION_ENTRY_ORIGIN); - } else { - virt_dump_ptes(stream, vm, indent + 2, - *entry & REGION_ENTRY_ORIGIN); - } - } -} - -void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) -{ - if (!vm->pgd_created) - return; - - virt_dump_region(stream, vm, indent, vm->pgd); -} - -void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code) -{ - vcpu->run->psw_addr = (uintptr_t)guest_code; -} - -struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) -{ - size_t stack_size = DEFAULT_STACK_PGS * getpagesize(); - uint64_t stack_vaddr; - struct kvm_regs regs; - struct kvm_sregs sregs; - struct kvm_vcpu *vcpu; - - TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x", - vm->page_size); - - stack_vaddr = __vm_vaddr_alloc(vm, stack_size, - DEFAULT_GUEST_STACK_VADDR_MIN, - MEM_REGION_DATA); - - vcpu = __vm_vcpu_add(vm, vcpu_id); - - /* Setup guest registers */ - vcpu_regs_get(vcpu, ®s); - regs.gprs[15] = stack_vaddr + (DEFAULT_STACK_PGS * getpagesize()) - 160; - vcpu_regs_set(vcpu, ®s); - - vcpu_sregs_get(vcpu, &sregs); - sregs.crs[0] |= 0x00040000; /* Enable floating point regs */ - sregs.crs[1] = vm->pgd | 0xf; /* Primary region table */ - vcpu_sregs_set(vcpu, &sregs); - - vcpu->run->psw_mask = 0x0400000180000000ULL; /* DAT enabled + 64 bit mode */ - - return vcpu; -} - -void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...) -{ - va_list ap; - struct kvm_regs regs; - int i; - - TEST_ASSERT(num >= 1 && num <= 5, "Unsupported number of args,\n" - " num: %u", - num); - - va_start(ap, num); - vcpu_regs_get(vcpu, ®s); - - for (i = 0; i < num; i++) - regs.gprs[i + 2] = va_arg(ap, uint64_t); - - vcpu_regs_set(vcpu, ®s); - va_end(ap); -} - -void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent) -{ - fprintf(stream, "%*spstate: psw: 0x%.16llx:0x%.16llx\n", - indent, "", vcpu->run->psw_mask, vcpu->run->psw_addr); -} - -void assert_on_unhandled_exception(struct kvm_vcpu *vcpu) -{ -} diff --git a/tools/testing/selftests/kvm/lib/s390x/ucall.c b/tools/testing/selftests/kvm/lib/s390x/ucall.c deleted file mode 100644 index cca98734653d..000000000000 --- a/tools/testing/selftests/kvm/lib/s390x/ucall.c +++ /dev/null @@ -1,22 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * ucall support. A ucall is a "hypercall to userspace". - * - * Copyright (C) 2019 Red Hat, Inc. - */ -#include "kvm_util.h" - -void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu) -{ - struct kvm_run *run = vcpu->run; - - if (run->exit_reason == KVM_EXIT_S390_SIEIC && - run->s390_sieic.icptcode == 4 && - (run->s390_sieic.ipa >> 8) == 0x83 && /* 0x83 means DIAGNOSE */ - (run->s390_sieic.ipb >> 16) == 0x501) { - int reg = run->s390_sieic.ipa & 0xf; - - return (void *)run->s.regs.gprs[reg]; - } - return NULL; -} diff --git a/tools/testing/selftests/kvm/lib/x86/apic.c b/tools/testing/selftests/kvm/lib/x86/apic.c new file mode 100644 index 000000000000..89153a333e83 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/x86/apic.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2021, Google LLC. + */ + +#include "apic.h" + +void apic_disable(void) +{ + wrmsr(MSR_IA32_APICBASE, + rdmsr(MSR_IA32_APICBASE) & + ~(MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD)); +} + +void xapic_enable(void) +{ + uint64_t val = rdmsr(MSR_IA32_APICBASE); + + /* Per SDM: to enable xAPIC when in x2APIC must first disable APIC */ + if (val & MSR_IA32_APICBASE_EXTD) { + apic_disable(); + wrmsr(MSR_IA32_APICBASE, + rdmsr(MSR_IA32_APICBASE) | MSR_IA32_APICBASE_ENABLE); + } else if (!(val & MSR_IA32_APICBASE_ENABLE)) { + wrmsr(MSR_IA32_APICBASE, val | MSR_IA32_APICBASE_ENABLE); + } + + /* + * Per SDM: reset value of spurious interrupt vector register has the + * APIC software enabled bit=0. It must be enabled in addition to the + * enable bit in the MSR. + */ + val = xapic_read_reg(APIC_SPIV) | APIC_SPIV_APIC_ENABLED; + xapic_write_reg(APIC_SPIV, val); +} + +void x2apic_enable(void) +{ + wrmsr(MSR_IA32_APICBASE, rdmsr(MSR_IA32_APICBASE) | + MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD); + x2apic_write_reg(APIC_SPIV, + x2apic_read_reg(APIC_SPIV) | APIC_SPIV_APIC_ENABLED); +} diff --git a/tools/testing/selftests/kvm/lib/x86/handlers.S b/tools/testing/selftests/kvm/lib/x86/handlers.S new file mode 100644 index 000000000000..7629819734af --- /dev/null +++ b/tools/testing/selftests/kvm/lib/x86/handlers.S @@ -0,0 +1,81 @@ +handle_exception: + push %r15 + push %r14 + push %r13 + push %r12 + push %r11 + push %r10 + push %r9 + push %r8 + + push %rdi + push %rsi + push %rbp + push %rbx + push %rdx + push %rcx + push %rax + mov %rsp, %rdi + + call route_exception + + pop %rax + pop %rcx + pop %rdx + pop %rbx + pop %rbp + pop %rsi + pop %rdi + pop %r8 + pop %r9 + pop %r10 + pop %r11 + pop %r12 + pop %r13 + pop %r14 + pop %r15 + + /* Discard vector and error code. */ + add $16, %rsp + iretq + +/* + * Build the handle_exception wrappers which push the vector/error code on the + * stack and an array of pointers to those wrappers. + */ +.pushsection .rodata +.globl idt_handlers +idt_handlers: +.popsection + +.macro HANDLERS has_error from to + vector = \from + .rept \to - \from + 1 + .align 8 + + /* Fetch current address and append it to idt_handlers. */ +666 : +.pushsection .rodata + .quad 666b +.popsection + + .if ! \has_error + pushq $0 + .endif + pushq $vector + jmp handle_exception + vector = vector + 1 + .endr +.endm + +.global idt_handler_code +idt_handler_code: + HANDLERS has_error=0 from=0 to=7 + HANDLERS has_error=1 from=8 to=8 + HANDLERS has_error=0 from=9 to=9 + HANDLERS has_error=1 from=10 to=14 + HANDLERS has_error=0 from=15 to=16 + HANDLERS has_error=1 from=17 to=17 + HANDLERS has_error=0 from=18 to=255 + +.section .note.GNU-stack, "", %progbits diff --git a/tools/testing/selftests/kvm/lib/x86/hyperv.c b/tools/testing/selftests/kvm/lib/x86/hyperv.c new file mode 100644 index 000000000000..15bc8cd583aa --- /dev/null +++ b/tools/testing/selftests/kvm/lib/x86/hyperv.c @@ -0,0 +1,113 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Hyper-V specific functions. + * + * Copyright (C) 2021, Red Hat Inc. + */ +#include +#include "processor.h" +#include "hyperv.h" + +const struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void) +{ + static struct kvm_cpuid2 *cpuid; + int kvm_fd; + + if (cpuid) + return cpuid; + + cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES); + kvm_fd = open_kvm_dev_path_or_exit(); + + kvm_ioctl(kvm_fd, KVM_GET_SUPPORTED_HV_CPUID, cpuid); + + close(kvm_fd); + return cpuid; +} + +void vcpu_set_hv_cpuid(struct kvm_vcpu *vcpu) +{ + static struct kvm_cpuid2 *cpuid_full; + const struct kvm_cpuid2 *cpuid_sys, *cpuid_hv; + int i, nent = 0; + + if (!cpuid_full) { + cpuid_sys = kvm_get_supported_cpuid(); + cpuid_hv = kvm_get_supported_hv_cpuid(); + + cpuid_full = allocate_kvm_cpuid2(cpuid_sys->nent + cpuid_hv->nent); + if (!cpuid_full) { + perror("malloc"); + abort(); + } + + /* Need to skip KVM CPUID leaves 0x400000xx */ + for (i = 0; i < cpuid_sys->nent; i++) { + if (cpuid_sys->entries[i].function >= 0x40000000 && + cpuid_sys->entries[i].function < 0x40000100) + continue; + cpuid_full->entries[nent] = cpuid_sys->entries[i]; + nent++; + } + + memcpy(&cpuid_full->entries[nent], cpuid_hv->entries, + cpuid_hv->nent * sizeof(struct kvm_cpuid_entry2)); + cpuid_full->nent = nent + cpuid_hv->nent; + } + + vcpu_init_cpuid(vcpu, cpuid_full); +} + +const struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid2 *cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES); + + vcpu_ioctl(vcpu, KVM_GET_SUPPORTED_HV_CPUID, cpuid); + + return cpuid; +} + +bool kvm_hv_cpu_has(struct kvm_x86_cpu_feature feature) +{ + if (!kvm_has_cap(KVM_CAP_SYS_HYPERV_CPUID)) + return false; + + return kvm_cpuid_has(kvm_get_supported_hv_cpuid(), feature); +} + +struct hyperv_test_pages *vcpu_alloc_hyperv_test_pages(struct kvm_vm *vm, + vm_vaddr_t *p_hv_pages_gva) +{ + vm_vaddr_t hv_pages_gva = vm_vaddr_alloc_page(vm); + struct hyperv_test_pages *hv = addr_gva2hva(vm, hv_pages_gva); + + /* Setup of a region of guest memory for the VP Assist page. */ + hv->vp_assist = (void *)vm_vaddr_alloc_page(vm); + hv->vp_assist_hva = addr_gva2hva(vm, (uintptr_t)hv->vp_assist); + hv->vp_assist_gpa = addr_gva2gpa(vm, (uintptr_t)hv->vp_assist); + + /* Setup of a region of guest memory for the partition assist page. */ + hv->partition_assist = (void *)vm_vaddr_alloc_page(vm); + hv->partition_assist_hva = addr_gva2hva(vm, (uintptr_t)hv->partition_assist); + hv->partition_assist_gpa = addr_gva2gpa(vm, (uintptr_t)hv->partition_assist); + + /* Setup of a region of guest memory for the enlightened VMCS. */ + hv->enlightened_vmcs = (void *)vm_vaddr_alloc_page(vm); + hv->enlightened_vmcs_hva = addr_gva2hva(vm, (uintptr_t)hv->enlightened_vmcs); + hv->enlightened_vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)hv->enlightened_vmcs); + + *p_hv_pages_gva = hv_pages_gva; + return hv; +} + +int enable_vp_assist(uint64_t vp_assist_pa, void *vp_assist) +{ + uint64_t val = (vp_assist_pa & HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK) | + HV_X64_MSR_VP_ASSIST_PAGE_ENABLE; + + wrmsr(HV_X64_MSR_VP_ASSIST_PAGE, val); + + current_vp_assist = vp_assist; + + return 0; +} diff --git a/tools/testing/selftests/kvm/lib/x86/memstress.c b/tools/testing/selftests/kvm/lib/x86/memstress.c new file mode 100644 index 000000000000..7f5d62a65c68 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/x86/memstress.c @@ -0,0 +1,112 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * x86-specific extensions to memstress.c. + * + * Copyright (C) 2022, Google, Inc. + */ +#include +#include +#include +#include + +#include "test_util.h" +#include "kvm_util.h" +#include "memstress.h" +#include "processor.h" +#include "vmx.h" + +void memstress_l2_guest_code(uint64_t vcpu_id) +{ + memstress_guest_code(vcpu_id); + vmcall(); +} + +extern char memstress_l2_guest_entry[]; +__asm__( +"memstress_l2_guest_entry:" +" mov (%rsp), %rdi;" +" call memstress_l2_guest_code;" +" ud2;" +); + +static void memstress_l1_guest_code(struct vmx_pages *vmx, uint64_t vcpu_id) +{ +#define L2_GUEST_STACK_SIZE 64 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + unsigned long *rsp; + + GUEST_ASSERT(vmx->vmcs_gpa); + GUEST_ASSERT(prepare_for_vmx_operation(vmx)); + GUEST_ASSERT(load_vmcs(vmx)); + GUEST_ASSERT(ept_1g_pages_supported()); + + rsp = &l2_guest_stack[L2_GUEST_STACK_SIZE - 1]; + *rsp = vcpu_id; + prepare_vmcs(vmx, memstress_l2_guest_entry, rsp); + + GUEST_ASSERT(!vmlaunch()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + GUEST_DONE(); +} + +uint64_t memstress_nested_pages(int nr_vcpus) +{ + /* + * 513 page tables is enough to identity-map 256 TiB of L2 with 1G + * pages and 4-level paging, plus a few pages per-vCPU for data + * structures such as the VMCS. + */ + return 513 + 10 * nr_vcpus; +} + +void memstress_setup_ept(struct vmx_pages *vmx, struct kvm_vm *vm) +{ + uint64_t start, end; + + prepare_eptp(vmx, vm, 0); + + /* + * Identity map the first 4G and the test region with 1G pages so that + * KVM can shadow the EPT12 with the maximum huge page size supported + * by the backing source. + */ + nested_identity_map_1g(vmx, vm, 0, 0x100000000ULL); + + start = align_down(memstress_args.gpa, PG_SIZE_1G); + end = align_up(memstress_args.gpa + memstress_args.size, PG_SIZE_1G); + nested_identity_map_1g(vmx, vm, start, end - start); +} + +void memstress_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vcpus[]) +{ + struct vmx_pages *vmx, *vmx0 = NULL; + struct kvm_regs regs; + vm_vaddr_t vmx_gva; + int vcpu_id; + + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); + TEST_REQUIRE(kvm_cpu_has_ept()); + + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { + vmx = vcpu_alloc_vmx(vm, &vmx_gva); + + if (vcpu_id == 0) { + memstress_setup_ept(vmx, vm); + vmx0 = vmx; + } else { + /* Share the same EPT table across all vCPUs. */ + vmx->eptp = vmx0->eptp; + vmx->eptp_hva = vmx0->eptp_hva; + vmx->eptp_gpa = vmx0->eptp_gpa; + } + + /* + * Override the vCPU to run memstress_l1_guest_code() which will + * bounce it into L2 before calling memstress_guest_code(). + */ + vcpu_regs_get(vcpus[vcpu_id], ®s); + regs.rip = (unsigned long) memstress_l1_guest_code; + vcpu_regs_set(vcpus[vcpu_id], ®s); + vcpu_args_set(vcpus[vcpu_id], 2, vmx_gva, vcpu_id); + } +} diff --git a/tools/testing/selftests/kvm/lib/x86/pmu.c b/tools/testing/selftests/kvm/lib/x86/pmu.c new file mode 100644 index 000000000000..f31f0427c17c --- /dev/null +++ b/tools/testing/selftests/kvm/lib/x86/pmu.c @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2023, Tencent, Inc. + */ + +#include + +#include + +#include "kvm_util.h" +#include "pmu.h" + +const uint64_t intel_pmu_arch_events[] = { + INTEL_ARCH_CPU_CYCLES, + INTEL_ARCH_INSTRUCTIONS_RETIRED, + INTEL_ARCH_REFERENCE_CYCLES, + INTEL_ARCH_LLC_REFERENCES, + INTEL_ARCH_LLC_MISSES, + INTEL_ARCH_BRANCHES_RETIRED, + INTEL_ARCH_BRANCHES_MISPREDICTED, + INTEL_ARCH_TOPDOWN_SLOTS, +}; +kvm_static_assert(ARRAY_SIZE(intel_pmu_arch_events) == NR_INTEL_ARCH_EVENTS); + +const uint64_t amd_pmu_zen_events[] = { + AMD_ZEN_CORE_CYCLES, + AMD_ZEN_INSTRUCTIONS_RETIRED, + AMD_ZEN_BRANCHES_RETIRED, + AMD_ZEN_BRANCHES_MISPREDICTED, +}; +kvm_static_assert(ARRAY_SIZE(amd_pmu_zen_events) == NR_AMD_ZEN_EVENTS); diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c new file mode 100644 index 000000000000..bd5a802fa7a5 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/x86/processor.c @@ -0,0 +1,1293 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2018, Google LLC. + */ + +#include "linux/bitmap.h" +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "sev.h" + +#ifndef NUM_INTERRUPTS +#define NUM_INTERRUPTS 256 +#endif + +#define KERNEL_CS 0x8 +#define KERNEL_DS 0x10 +#define KERNEL_TSS 0x18 + +vm_vaddr_t exception_handlers; +bool host_cpu_is_amd; +bool host_cpu_is_intel; +bool is_forced_emulation_enabled; +uint64_t guest_tsc_khz; + +static void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent) +{ + fprintf(stream, "%*srax: 0x%.16llx rbx: 0x%.16llx " + "rcx: 0x%.16llx rdx: 0x%.16llx\n", + indent, "", + regs->rax, regs->rbx, regs->rcx, regs->rdx); + fprintf(stream, "%*srsi: 0x%.16llx rdi: 0x%.16llx " + "rsp: 0x%.16llx rbp: 0x%.16llx\n", + indent, "", + regs->rsi, regs->rdi, regs->rsp, regs->rbp); + fprintf(stream, "%*sr8: 0x%.16llx r9: 0x%.16llx " + "r10: 0x%.16llx r11: 0x%.16llx\n", + indent, "", + regs->r8, regs->r9, regs->r10, regs->r11); + fprintf(stream, "%*sr12: 0x%.16llx r13: 0x%.16llx " + "r14: 0x%.16llx r15: 0x%.16llx\n", + indent, "", + regs->r12, regs->r13, regs->r14, regs->r15); + fprintf(stream, "%*srip: 0x%.16llx rfl: 0x%.16llx\n", + indent, "", + regs->rip, regs->rflags); +} + +static void segment_dump(FILE *stream, struct kvm_segment *segment, + uint8_t indent) +{ + fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.8x " + "selector: 0x%.4x type: 0x%.2x\n", + indent, "", segment->base, segment->limit, + segment->selector, segment->type); + fprintf(stream, "%*spresent: 0x%.2x dpl: 0x%.2x " + "db: 0x%.2x s: 0x%.2x l: 0x%.2x\n", + indent, "", segment->present, segment->dpl, + segment->db, segment->s, segment->l); + fprintf(stream, "%*sg: 0x%.2x avl: 0x%.2x " + "unusable: 0x%.2x padding: 0x%.2x\n", + indent, "", segment->g, segment->avl, + segment->unusable, segment->padding); +} + +static void dtable_dump(FILE *stream, struct kvm_dtable *dtable, + uint8_t indent) +{ + fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.4x " + "padding: 0x%.4x 0x%.4x 0x%.4x\n", + indent, "", dtable->base, dtable->limit, + dtable->padding[0], dtable->padding[1], dtable->padding[2]); +} + +static void sregs_dump(FILE *stream, struct kvm_sregs *sregs, uint8_t indent) +{ + unsigned int i; + + fprintf(stream, "%*scs:\n", indent, ""); + segment_dump(stream, &sregs->cs, indent + 2); + fprintf(stream, "%*sds:\n", indent, ""); + segment_dump(stream, &sregs->ds, indent + 2); + fprintf(stream, "%*ses:\n", indent, ""); + segment_dump(stream, &sregs->es, indent + 2); + fprintf(stream, "%*sfs:\n", indent, ""); + segment_dump(stream, &sregs->fs, indent + 2); + fprintf(stream, "%*sgs:\n", indent, ""); + segment_dump(stream, &sregs->gs, indent + 2); + fprintf(stream, "%*sss:\n", indent, ""); + segment_dump(stream, &sregs->ss, indent + 2); + fprintf(stream, "%*str:\n", indent, ""); + segment_dump(stream, &sregs->tr, indent + 2); + fprintf(stream, "%*sldt:\n", indent, ""); + segment_dump(stream, &sregs->ldt, indent + 2); + + fprintf(stream, "%*sgdt:\n", indent, ""); + dtable_dump(stream, &sregs->gdt, indent + 2); + fprintf(stream, "%*sidt:\n", indent, ""); + dtable_dump(stream, &sregs->idt, indent + 2); + + fprintf(stream, "%*scr0: 0x%.16llx cr2: 0x%.16llx " + "cr3: 0x%.16llx cr4: 0x%.16llx\n", + indent, "", + sregs->cr0, sregs->cr2, sregs->cr3, sregs->cr4); + fprintf(stream, "%*scr8: 0x%.16llx efer: 0x%.16llx " + "apic_base: 0x%.16llx\n", + indent, "", + sregs->cr8, sregs->efer, sregs->apic_base); + + fprintf(stream, "%*sinterrupt_bitmap:\n", indent, ""); + for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++) { + fprintf(stream, "%*s%.16llx\n", indent + 2, "", + sregs->interrupt_bitmap[i]); + } +} + +bool kvm_is_tdp_enabled(void) +{ + if (host_cpu_is_intel) + return get_kvm_intel_param_bool("ept"); + else + return get_kvm_amd_param_bool("npt"); +} + +void virt_arch_pgd_alloc(struct kvm_vm *vm) +{ + TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " + "unknown or unsupported guest mode, mode: 0x%x", vm->mode); + + /* If needed, create page map l4 table. */ + if (!vm->pgd_created) { + vm->pgd = vm_alloc_page_table(vm); + vm->pgd_created = true; + } +} + +static void *virt_get_pte(struct kvm_vm *vm, uint64_t *parent_pte, + uint64_t vaddr, int level) +{ + uint64_t pt_gpa = PTE_GET_PA(*parent_pte); + uint64_t *page_table = addr_gpa2hva(vm, pt_gpa); + int index = (vaddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu; + + TEST_ASSERT((*parent_pte & PTE_PRESENT_MASK) || parent_pte == &vm->pgd, + "Parent PTE (level %d) not PRESENT for gva: 0x%08lx", + level + 1, vaddr); + + return &page_table[index]; +} + +static uint64_t *virt_create_upper_pte(struct kvm_vm *vm, + uint64_t *parent_pte, + uint64_t vaddr, + uint64_t paddr, + int current_level, + int target_level) +{ + uint64_t *pte = virt_get_pte(vm, parent_pte, vaddr, current_level); + + paddr = vm_untag_gpa(vm, paddr); + + if (!(*pte & PTE_PRESENT_MASK)) { + *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK; + if (current_level == target_level) + *pte |= PTE_LARGE_MASK | (paddr & PHYSICAL_PAGE_MASK); + else + *pte |= vm_alloc_page_table(vm) & PHYSICAL_PAGE_MASK; + } else { + /* + * Entry already present. Assert that the caller doesn't want + * a hugepage at this level, and that there isn't a hugepage at + * this level. + */ + TEST_ASSERT(current_level != target_level, + "Cannot create hugepage at level: %u, vaddr: 0x%lx", + current_level, vaddr); + TEST_ASSERT(!(*pte & PTE_LARGE_MASK), + "Cannot create page table at level: %u, vaddr: 0x%lx", + current_level, vaddr); + } + return pte; +} + +void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level) +{ + const uint64_t pg_size = PG_LEVEL_SIZE(level); + uint64_t *pml4e, *pdpe, *pde; + uint64_t *pte; + + TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, + "Unknown or unsupported guest mode, mode: 0x%x", vm->mode); + + TEST_ASSERT((vaddr % pg_size) == 0, + "Virtual address not aligned,\n" + "vaddr: 0x%lx page size: 0x%lx", vaddr, pg_size); + TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, (vaddr >> vm->page_shift)), + "Invalid virtual address, vaddr: 0x%lx", vaddr); + TEST_ASSERT((paddr % pg_size) == 0, + "Physical address not aligned,\n" + " paddr: 0x%lx page size: 0x%lx", paddr, pg_size); + TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn, + "Physical address beyond maximum supported,\n" + " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", + paddr, vm->max_gfn, vm->page_size); + TEST_ASSERT(vm_untag_gpa(vm, paddr) == paddr, + "Unexpected bits in paddr: %lx", paddr); + + /* + * Allocate upper level page tables, if not already present. Return + * early if a hugepage was created. + */ + pml4e = virt_create_upper_pte(vm, &vm->pgd, vaddr, paddr, PG_LEVEL_512G, level); + if (*pml4e & PTE_LARGE_MASK) + return; + + pdpe = virt_create_upper_pte(vm, pml4e, vaddr, paddr, PG_LEVEL_1G, level); + if (*pdpe & PTE_LARGE_MASK) + return; + + pde = virt_create_upper_pte(vm, pdpe, vaddr, paddr, PG_LEVEL_2M, level); + if (*pde & PTE_LARGE_MASK) + return; + + /* Fill in page table entry. */ + pte = virt_get_pte(vm, pde, vaddr, PG_LEVEL_4K); + TEST_ASSERT(!(*pte & PTE_PRESENT_MASK), + "PTE already present for 4k page at vaddr: 0x%lx", vaddr); + *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK); + + /* + * Neither SEV nor TDX supports shared page tables, so only the final + * leaf PTE needs manually set the C/S-bit. + */ + if (vm_is_gpa_protected(vm, paddr)) + *pte |= vm->arch.c_bit; + else + *pte |= vm->arch.s_bit; +} + +void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) +{ + __virt_pg_map(vm, vaddr, paddr, PG_LEVEL_4K); +} + +void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, + uint64_t nr_bytes, int level) +{ + uint64_t pg_size = PG_LEVEL_SIZE(level); + uint64_t nr_pages = nr_bytes / pg_size; + int i; + + TEST_ASSERT(nr_bytes % pg_size == 0, + "Region size not aligned: nr_bytes: 0x%lx, page size: 0x%lx", + nr_bytes, pg_size); + + for (i = 0; i < nr_pages; i++) { + __virt_pg_map(vm, vaddr, paddr, level); + + vaddr += pg_size; + paddr += pg_size; + } +} + +static bool vm_is_target_pte(uint64_t *pte, int *level, int current_level) +{ + if (*pte & PTE_LARGE_MASK) { + TEST_ASSERT(*level == PG_LEVEL_NONE || + *level == current_level, + "Unexpected hugepage at level %d", current_level); + *level = current_level; + } + + return *level == current_level; +} + +uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr, + int *level) +{ + uint64_t *pml4e, *pdpe, *pde; + + TEST_ASSERT(!vm->arch.is_pt_protected, + "Walking page tables of protected guests is impossible"); + + TEST_ASSERT(*level >= PG_LEVEL_NONE && *level < PG_LEVEL_NUM, + "Invalid PG_LEVEL_* '%d'", *level); + + TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " + "unknown or unsupported guest mode, mode: 0x%x", vm->mode); + TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, + (vaddr >> vm->page_shift)), + "Invalid virtual address, vaddr: 0x%lx", + vaddr); + /* + * Based on the mode check above there are 48 bits in the vaddr, so + * shift 16 to sign extend the last bit (bit-47), + */ + TEST_ASSERT(vaddr == (((int64_t)vaddr << 16) >> 16), + "Canonical check failed. The virtual address is invalid."); + + pml4e = virt_get_pte(vm, &vm->pgd, vaddr, PG_LEVEL_512G); + if (vm_is_target_pte(pml4e, level, PG_LEVEL_512G)) + return pml4e; + + pdpe = virt_get_pte(vm, pml4e, vaddr, PG_LEVEL_1G); + if (vm_is_target_pte(pdpe, level, PG_LEVEL_1G)) + return pdpe; + + pde = virt_get_pte(vm, pdpe, vaddr, PG_LEVEL_2M); + if (vm_is_target_pte(pde, level, PG_LEVEL_2M)) + return pde; + + return virt_get_pte(vm, pde, vaddr, PG_LEVEL_4K); +} + +uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr) +{ + int level = PG_LEVEL_4K; + + return __vm_get_page_table_entry(vm, vaddr, &level); +} + +void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) +{ + uint64_t *pml4e, *pml4e_start; + uint64_t *pdpe, *pdpe_start; + uint64_t *pde, *pde_start; + uint64_t *pte, *pte_start; + + if (!vm->pgd_created) + return; + + fprintf(stream, "%*s " + " no\n", indent, ""); + fprintf(stream, "%*s index hvaddr gpaddr " + "addr w exec dirty\n", + indent, ""); + pml4e_start = (uint64_t *) addr_gpa2hva(vm, vm->pgd); + for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) { + pml4e = &pml4e_start[n1]; + if (!(*pml4e & PTE_PRESENT_MASK)) + continue; + fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10llx %u " + " %u\n", + indent, "", + pml4e - pml4e_start, pml4e, + addr_hva2gpa(vm, pml4e), PTE_GET_PFN(*pml4e), + !!(*pml4e & PTE_WRITABLE_MASK), !!(*pml4e & PTE_NX_MASK)); + + pdpe_start = addr_gpa2hva(vm, *pml4e & PHYSICAL_PAGE_MASK); + for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) { + pdpe = &pdpe_start[n2]; + if (!(*pdpe & PTE_PRESENT_MASK)) + continue; + fprintf(stream, "%*spdpe 0x%-3zx %p 0x%-12lx 0x%-10llx " + "%u %u\n", + indent, "", + pdpe - pdpe_start, pdpe, + addr_hva2gpa(vm, pdpe), + PTE_GET_PFN(*pdpe), !!(*pdpe & PTE_WRITABLE_MASK), + !!(*pdpe & PTE_NX_MASK)); + + pde_start = addr_gpa2hva(vm, *pdpe & PHYSICAL_PAGE_MASK); + for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) { + pde = &pde_start[n3]; + if (!(*pde & PTE_PRESENT_MASK)) + continue; + fprintf(stream, "%*spde 0x%-3zx %p " + "0x%-12lx 0x%-10llx %u %u\n", + indent, "", pde - pde_start, pde, + addr_hva2gpa(vm, pde), + PTE_GET_PFN(*pde), !!(*pde & PTE_WRITABLE_MASK), + !!(*pde & PTE_NX_MASK)); + + pte_start = addr_gpa2hva(vm, *pde & PHYSICAL_PAGE_MASK); + for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) { + pte = &pte_start[n4]; + if (!(*pte & PTE_PRESENT_MASK)) + continue; + fprintf(stream, "%*spte 0x%-3zx %p " + "0x%-12lx 0x%-10llx %u %u " + " %u 0x%-10lx\n", + indent, "", + pte - pte_start, pte, + addr_hva2gpa(vm, pte), + PTE_GET_PFN(*pte), + !!(*pte & PTE_WRITABLE_MASK), + !!(*pte & PTE_NX_MASK), + !!(*pte & PTE_DIRTY_MASK), + ((uint64_t) n1 << 27) + | ((uint64_t) n2 << 18) + | ((uint64_t) n3 << 9) + | ((uint64_t) n4)); + } + } + } + } +} + +/* + * Set Unusable Segment + * + * Input Args: None + * + * Output Args: + * segp - Pointer to segment register + * + * Return: None + * + * Sets the segment register pointed to by @segp to an unusable state. + */ +static void kvm_seg_set_unusable(struct kvm_segment *segp) +{ + memset(segp, 0, sizeof(*segp)); + segp->unusable = true; +} + +static void kvm_seg_fill_gdt_64bit(struct kvm_vm *vm, struct kvm_segment *segp) +{ + void *gdt = addr_gva2hva(vm, vm->arch.gdt); + struct desc64 *desc = gdt + (segp->selector >> 3) * 8; + + desc->limit0 = segp->limit & 0xFFFF; + desc->base0 = segp->base & 0xFFFF; + desc->base1 = segp->base >> 16; + desc->type = segp->type; + desc->s = segp->s; + desc->dpl = segp->dpl; + desc->p = segp->present; + desc->limit1 = segp->limit >> 16; + desc->avl = segp->avl; + desc->l = segp->l; + desc->db = segp->db; + desc->g = segp->g; + desc->base2 = segp->base >> 24; + if (!segp->s) + desc->base3 = segp->base >> 32; +} + +static void kvm_seg_set_kernel_code_64bit(struct kvm_segment *segp) +{ + memset(segp, 0, sizeof(*segp)); + segp->selector = KERNEL_CS; + segp->limit = 0xFFFFFFFFu; + segp->s = 0x1; /* kTypeCodeData */ + segp->type = 0x08 | 0x01 | 0x02; /* kFlagCode | kFlagCodeAccessed + * | kFlagCodeReadable + */ + segp->g = true; + segp->l = true; + segp->present = 1; +} + +static void kvm_seg_set_kernel_data_64bit(struct kvm_segment *segp) +{ + memset(segp, 0, sizeof(*segp)); + segp->selector = KERNEL_DS; + segp->limit = 0xFFFFFFFFu; + segp->s = 0x1; /* kTypeCodeData */ + segp->type = 0x00 | 0x01 | 0x02; /* kFlagData | kFlagDataAccessed + * | kFlagDataWritable + */ + segp->g = true; + segp->present = true; +} + +vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) +{ + int level = PG_LEVEL_NONE; + uint64_t *pte = __vm_get_page_table_entry(vm, gva, &level); + + TEST_ASSERT(*pte & PTE_PRESENT_MASK, + "Leaf PTE not PRESENT for gva: 0x%08lx", gva); + + /* + * No need for a hugepage mask on the PTE, x86-64 requires the "unused" + * address bits to be zero. + */ + return vm_untag_gpa(vm, PTE_GET_PA(*pte)) | (gva & ~HUGEPAGE_MASK(level)); +} + +static void kvm_seg_set_tss_64bit(vm_vaddr_t base, struct kvm_segment *segp) +{ + memset(segp, 0, sizeof(*segp)); + segp->base = base; + segp->limit = 0x67; + segp->selector = KERNEL_TSS; + segp->type = 0xb; + segp->present = 1; +} + +static void vcpu_init_sregs(struct kvm_vm *vm, struct kvm_vcpu *vcpu) +{ + struct kvm_sregs sregs; + + TEST_ASSERT_EQ(vm->mode, VM_MODE_PXXV48_4K); + + /* Set mode specific system register values. */ + vcpu_sregs_get(vcpu, &sregs); + + sregs.idt.base = vm->arch.idt; + sregs.idt.limit = NUM_INTERRUPTS * sizeof(struct idt_entry) - 1; + sregs.gdt.base = vm->arch.gdt; + sregs.gdt.limit = getpagesize() - 1; + + sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG; + sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR; + if (kvm_cpu_has(X86_FEATURE_XSAVE)) + sregs.cr4 |= X86_CR4_OSXSAVE; + sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX); + + kvm_seg_set_unusable(&sregs.ldt); + kvm_seg_set_kernel_code_64bit(&sregs.cs); + kvm_seg_set_kernel_data_64bit(&sregs.ds); + kvm_seg_set_kernel_data_64bit(&sregs.es); + kvm_seg_set_kernel_data_64bit(&sregs.gs); + kvm_seg_set_tss_64bit(vm->arch.tss, &sregs.tr); + + sregs.cr3 = vm->pgd; + vcpu_sregs_set(vcpu, &sregs); +} + +static void vcpu_init_xcrs(struct kvm_vm *vm, struct kvm_vcpu *vcpu) +{ + struct kvm_xcrs xcrs = { + .nr_xcrs = 1, + .xcrs[0].xcr = 0, + .xcrs[0].value = kvm_cpu_supported_xcr0(), + }; + + if (!kvm_cpu_has(X86_FEATURE_XSAVE)) + return; + + vcpu_xcrs_set(vcpu, &xcrs); +} + +static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr, + int dpl, unsigned short selector) +{ + struct idt_entry *base = + (struct idt_entry *)addr_gva2hva(vm, vm->arch.idt); + struct idt_entry *e = &base[vector]; + + memset(e, 0, sizeof(*e)); + e->offset0 = addr; + e->selector = selector; + e->ist = 0; + e->type = 14; + e->dpl = dpl; + e->p = 1; + e->offset1 = addr >> 16; + e->offset2 = addr >> 32; +} + +static bool kvm_fixup_exception(struct ex_regs *regs) +{ + if (regs->r9 != KVM_EXCEPTION_MAGIC || regs->rip != regs->r10) + return false; + + if (regs->vector == DE_VECTOR) + return false; + + regs->rip = regs->r11; + regs->r9 = regs->vector; + regs->r10 = regs->error_code; + return true; +} + +void route_exception(struct ex_regs *regs) +{ + typedef void(*handler)(struct ex_regs *); + handler *handlers = (handler *)exception_handlers; + + if (handlers && handlers[regs->vector]) { + handlers[regs->vector](regs); + return; + } + + if (kvm_fixup_exception(regs)) + return; + + GUEST_FAIL("Unhandled exception '0x%lx' at guest RIP '0x%lx'", + regs->vector, regs->rip); +} + +static void vm_init_descriptor_tables(struct kvm_vm *vm) +{ + extern void *idt_handlers; + struct kvm_segment seg; + int i; + + vm->arch.gdt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA); + vm->arch.idt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA); + vm->handlers = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA); + vm->arch.tss = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA); + + /* Handlers have the same address in both address spaces.*/ + for (i = 0; i < NUM_INTERRUPTS; i++) + set_idt_entry(vm, i, (unsigned long)(&idt_handlers)[i], 0, KERNEL_CS); + + *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers; + + kvm_seg_set_kernel_code_64bit(&seg); + kvm_seg_fill_gdt_64bit(vm, &seg); + + kvm_seg_set_kernel_data_64bit(&seg); + kvm_seg_fill_gdt_64bit(vm, &seg); + + kvm_seg_set_tss_64bit(vm->arch.tss, &seg); + kvm_seg_fill_gdt_64bit(vm, &seg); +} + +void vm_install_exception_handler(struct kvm_vm *vm, int vector, + void (*handler)(struct ex_regs *)) +{ + vm_vaddr_t *handlers = (vm_vaddr_t *)addr_gva2hva(vm, vm->handlers); + + handlers[vector] = (vm_vaddr_t)handler; +} + +void assert_on_unhandled_exception(struct kvm_vcpu *vcpu) +{ + struct ucall uc; + + if (get_ucall(vcpu, &uc) == UCALL_ABORT) + REPORT_GUEST_ASSERT(uc); +} + +void kvm_arch_vm_post_create(struct kvm_vm *vm) +{ + int r; + + TEST_ASSERT(kvm_has_cap(KVM_CAP_GET_TSC_KHZ), + "Require KVM_GET_TSC_KHZ to provide udelay() to guest."); + + vm_create_irqchip(vm); + vm_init_descriptor_tables(vm); + + sync_global_to_guest(vm, host_cpu_is_intel); + sync_global_to_guest(vm, host_cpu_is_amd); + sync_global_to_guest(vm, is_forced_emulation_enabled); + + if (vm->type == KVM_X86_SEV_VM || vm->type == KVM_X86_SEV_ES_VM) { + struct kvm_sev_init init = { 0 }; + + vm_sev_ioctl(vm, KVM_SEV_INIT2, &init); + } + + r = __vm_ioctl(vm, KVM_GET_TSC_KHZ, NULL); + TEST_ASSERT(r > 0, "KVM_GET_TSC_KHZ did not provide a valid TSC frequency."); + guest_tsc_khz = r; + sync_global_to_guest(vm, guest_tsc_khz); +} + +void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code) +{ + struct kvm_regs regs; + + vcpu_regs_get(vcpu, ®s); + regs.rip = (unsigned long) guest_code; + vcpu_regs_set(vcpu, ®s); +} + +struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) +{ + struct kvm_mp_state mp_state; + struct kvm_regs regs; + vm_vaddr_t stack_vaddr; + struct kvm_vcpu *vcpu; + + stack_vaddr = __vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(), + DEFAULT_GUEST_STACK_VADDR_MIN, + MEM_REGION_DATA); + + stack_vaddr += DEFAULT_STACK_PGS * getpagesize(); + + /* + * Align stack to match calling sequence requirements in section "The + * Stack Frame" of the System V ABI AMD64 Architecture Processor + * Supplement, which requires the value (%rsp + 8) to be a multiple of + * 16 when control is transferred to the function entry point. + * + * If this code is ever used to launch a vCPU with 32-bit entry point it + * may need to subtract 4 bytes instead of 8 bytes. + */ + TEST_ASSERT(IS_ALIGNED(stack_vaddr, PAGE_SIZE), + "__vm_vaddr_alloc() did not provide a page-aligned address"); + stack_vaddr -= 8; + + vcpu = __vm_vcpu_add(vm, vcpu_id); + vcpu_init_cpuid(vcpu, kvm_get_supported_cpuid()); + vcpu_init_sregs(vm, vcpu); + vcpu_init_xcrs(vm, vcpu); + + /* Setup guest general purpose registers */ + vcpu_regs_get(vcpu, ®s); + regs.rflags = regs.rflags | 0x2; + regs.rsp = stack_vaddr; + vcpu_regs_set(vcpu, ®s); + + /* Setup the MP state */ + mp_state.mp_state = 0; + vcpu_mp_state_set(vcpu, &mp_state); + + /* + * Refresh CPUID after setting SREGS and XCR0, so that KVM's "runtime" + * updates to guest CPUID, e.g. for OSXSAVE and XSAVE state size, are + * reflected into selftests' vCPU CPUID cache, i.e. so that the cache + * is consistent with vCPU state. + */ + vcpu_get_cpuid(vcpu); + return vcpu; +} + +struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm, uint32_t vcpu_id) +{ + struct kvm_vcpu *vcpu = __vm_vcpu_add(vm, vcpu_id); + + vcpu_init_cpuid(vcpu, kvm_get_supported_cpuid()); + + return vcpu; +} + +void vcpu_arch_free(struct kvm_vcpu *vcpu) +{ + if (vcpu->cpuid) + free(vcpu->cpuid); +} + +/* Do not use kvm_supported_cpuid directly except for validity checks. */ +static void *kvm_supported_cpuid; + +const struct kvm_cpuid2 *kvm_get_supported_cpuid(void) +{ + int kvm_fd; + + if (kvm_supported_cpuid) + return kvm_supported_cpuid; + + kvm_supported_cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES); + kvm_fd = open_kvm_dev_path_or_exit(); + + kvm_ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, + (struct kvm_cpuid2 *)kvm_supported_cpuid); + + close(kvm_fd); + return kvm_supported_cpuid; +} + +static uint32_t __kvm_cpu_has(const struct kvm_cpuid2 *cpuid, + uint32_t function, uint32_t index, + uint8_t reg, uint8_t lo, uint8_t hi) +{ + const struct kvm_cpuid_entry2 *entry; + int i; + + for (i = 0; i < cpuid->nent; i++) { + entry = &cpuid->entries[i]; + + /* + * The output registers in kvm_cpuid_entry2 are in alphabetical + * order, but kvm_x86_cpu_feature matches that mess, so yay + * pointer shenanigans! + */ + if (entry->function == function && entry->index == index) + return ((&entry->eax)[reg] & GENMASK(hi, lo)) >> lo; + } + + return 0; +} + +bool kvm_cpuid_has(const struct kvm_cpuid2 *cpuid, + struct kvm_x86_cpu_feature feature) +{ + return __kvm_cpu_has(cpuid, feature.function, feature.index, + feature.reg, feature.bit, feature.bit); +} + +uint32_t kvm_cpuid_property(const struct kvm_cpuid2 *cpuid, + struct kvm_x86_cpu_property property) +{ + return __kvm_cpu_has(cpuid, property.function, property.index, + property.reg, property.lo_bit, property.hi_bit); +} + +uint64_t kvm_get_feature_msr(uint64_t msr_index) +{ + struct { + struct kvm_msrs header; + struct kvm_msr_entry entry; + } buffer = {}; + int r, kvm_fd; + + buffer.header.nmsrs = 1; + buffer.entry.index = msr_index; + kvm_fd = open_kvm_dev_path_or_exit(); + + r = __kvm_ioctl(kvm_fd, KVM_GET_MSRS, &buffer.header); + TEST_ASSERT(r == 1, KVM_IOCTL_ERROR(KVM_GET_MSRS, r)); + + close(kvm_fd); + return buffer.entry.data; +} + +void __vm_xsave_require_permission(uint64_t xfeature, const char *name) +{ + int kvm_fd; + u64 bitmask; + long rc; + struct kvm_device_attr attr = { + .group = 0, + .attr = KVM_X86_XCOMP_GUEST_SUPP, + .addr = (unsigned long) &bitmask, + }; + + TEST_ASSERT(!kvm_supported_cpuid, + "kvm_get_supported_cpuid() cannot be used before ARCH_REQ_XCOMP_GUEST_PERM"); + + TEST_ASSERT(is_power_of_2(xfeature), + "Dynamic XFeatures must be enabled one at a time"); + + kvm_fd = open_kvm_dev_path_or_exit(); + rc = __kvm_ioctl(kvm_fd, KVM_GET_DEVICE_ATTR, &attr); + close(kvm_fd); + + if (rc == -1 && (errno == ENXIO || errno == EINVAL)) + __TEST_REQUIRE(0, "KVM_X86_XCOMP_GUEST_SUPP not supported"); + + TEST_ASSERT(rc == 0, "KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) error: %ld", rc); + + __TEST_REQUIRE(bitmask & xfeature, + "Required XSAVE feature '%s' not supported", name); + + TEST_REQUIRE(!syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, ilog2(xfeature))); + + rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_GUEST_PERM, &bitmask); + TEST_ASSERT(rc == 0, "prctl(ARCH_GET_XCOMP_GUEST_PERM) error: %ld", rc); + TEST_ASSERT(bitmask & xfeature, + "'%s' (0x%lx) not permitted after prctl(ARCH_REQ_XCOMP_GUEST_PERM) permitted=0x%lx", + name, xfeature, bitmask); +} + +void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid) +{ + TEST_ASSERT(cpuid != vcpu->cpuid, "@cpuid can't be the vCPU's CPUID"); + + /* Allow overriding the default CPUID. */ + if (vcpu->cpuid && vcpu->cpuid->nent < cpuid->nent) { + free(vcpu->cpuid); + vcpu->cpuid = NULL; + } + + if (!vcpu->cpuid) + vcpu->cpuid = allocate_kvm_cpuid2(cpuid->nent); + + memcpy(vcpu->cpuid, cpuid, kvm_cpuid2_size(cpuid->nent)); + vcpu_set_cpuid(vcpu); +} + +void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu, + struct kvm_x86_cpu_property property, + uint32_t value) +{ + struct kvm_cpuid_entry2 *entry; + + entry = __vcpu_get_cpuid_entry(vcpu, property.function, property.index); + + (&entry->eax)[property.reg] &= ~GENMASK(property.hi_bit, property.lo_bit); + (&entry->eax)[property.reg] |= value << property.lo_bit; + + vcpu_set_cpuid(vcpu); + + /* Sanity check that @value doesn't exceed the bounds in any way. */ + TEST_ASSERT_EQ(kvm_cpuid_property(vcpu->cpuid, property), value); +} + +void vcpu_clear_cpuid_entry(struct kvm_vcpu *vcpu, uint32_t function) +{ + struct kvm_cpuid_entry2 *entry = vcpu_get_cpuid_entry(vcpu, function); + + entry->eax = 0; + entry->ebx = 0; + entry->ecx = 0; + entry->edx = 0; + vcpu_set_cpuid(vcpu); +} + +void vcpu_set_or_clear_cpuid_feature(struct kvm_vcpu *vcpu, + struct kvm_x86_cpu_feature feature, + bool set) +{ + struct kvm_cpuid_entry2 *entry; + u32 *reg; + + entry = __vcpu_get_cpuid_entry(vcpu, feature.function, feature.index); + reg = (&entry->eax) + feature.reg; + + if (set) + *reg |= BIT(feature.bit); + else + *reg &= ~BIT(feature.bit); + + vcpu_set_cpuid(vcpu); +} + +uint64_t vcpu_get_msr(struct kvm_vcpu *vcpu, uint64_t msr_index) +{ + struct { + struct kvm_msrs header; + struct kvm_msr_entry entry; + } buffer = {}; + + buffer.header.nmsrs = 1; + buffer.entry.index = msr_index; + + vcpu_msrs_get(vcpu, &buffer.header); + + return buffer.entry.data; +} + +int _vcpu_set_msr(struct kvm_vcpu *vcpu, uint64_t msr_index, uint64_t msr_value) +{ + struct { + struct kvm_msrs header; + struct kvm_msr_entry entry; + } buffer = {}; + + memset(&buffer, 0, sizeof(buffer)); + buffer.header.nmsrs = 1; + buffer.entry.index = msr_index; + buffer.entry.data = msr_value; + + return __vcpu_ioctl(vcpu, KVM_SET_MSRS, &buffer.header); +} + +void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...) +{ + va_list ap; + struct kvm_regs regs; + + TEST_ASSERT(num >= 1 && num <= 6, "Unsupported number of args,\n" + " num: %u", + num); + + va_start(ap, num); + vcpu_regs_get(vcpu, ®s); + + if (num >= 1) + regs.rdi = va_arg(ap, uint64_t); + + if (num >= 2) + regs.rsi = va_arg(ap, uint64_t); + + if (num >= 3) + regs.rdx = va_arg(ap, uint64_t); + + if (num >= 4) + regs.rcx = va_arg(ap, uint64_t); + + if (num >= 5) + regs.r8 = va_arg(ap, uint64_t); + + if (num >= 6) + regs.r9 = va_arg(ap, uint64_t); + + vcpu_regs_set(vcpu, ®s); + va_end(ap); +} + +void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent) +{ + struct kvm_regs regs; + struct kvm_sregs sregs; + + fprintf(stream, "%*svCPU ID: %u\n", indent, "", vcpu->id); + + fprintf(stream, "%*sregs:\n", indent + 2, ""); + vcpu_regs_get(vcpu, ®s); + regs_dump(stream, ®s, indent + 4); + + fprintf(stream, "%*ssregs:\n", indent + 2, ""); + vcpu_sregs_get(vcpu, &sregs); + sregs_dump(stream, &sregs, indent + 4); +} + +static struct kvm_msr_list *__kvm_get_msr_index_list(bool feature_msrs) +{ + struct kvm_msr_list *list; + struct kvm_msr_list nmsrs; + int kvm_fd, r; + + kvm_fd = open_kvm_dev_path_or_exit(); + + nmsrs.nmsrs = 0; + if (!feature_msrs) + r = __kvm_ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, &nmsrs); + else + r = __kvm_ioctl(kvm_fd, KVM_GET_MSR_FEATURE_INDEX_LIST, &nmsrs); + + TEST_ASSERT(r == -1 && errno == E2BIG, + "Expected -E2BIG, got rc: %i errno: %i (%s)", + r, errno, strerror(errno)); + + list = malloc(sizeof(*list) + nmsrs.nmsrs * sizeof(list->indices[0])); + TEST_ASSERT(list, "-ENOMEM when allocating MSR index list"); + list->nmsrs = nmsrs.nmsrs; + + if (!feature_msrs) + kvm_ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, list); + else + kvm_ioctl(kvm_fd, KVM_GET_MSR_FEATURE_INDEX_LIST, list); + close(kvm_fd); + + TEST_ASSERT(list->nmsrs == nmsrs.nmsrs, + "Number of MSRs in list changed, was %d, now %d", + nmsrs.nmsrs, list->nmsrs); + return list; +} + +const struct kvm_msr_list *kvm_get_msr_index_list(void) +{ + static const struct kvm_msr_list *list; + + if (!list) + list = __kvm_get_msr_index_list(false); + return list; +} + + +const struct kvm_msr_list *kvm_get_feature_msr_index_list(void) +{ + static const struct kvm_msr_list *list; + + if (!list) + list = __kvm_get_msr_index_list(true); + return list; +} + +bool kvm_msr_is_in_save_restore_list(uint32_t msr_index) +{ + const struct kvm_msr_list *list = kvm_get_msr_index_list(); + int i; + + for (i = 0; i < list->nmsrs; ++i) { + if (list->indices[i] == msr_index) + return true; + } + + return false; +} + +static void vcpu_save_xsave_state(struct kvm_vcpu *vcpu, + struct kvm_x86_state *state) +{ + int size = vm_check_cap(vcpu->vm, KVM_CAP_XSAVE2); + + if (size) { + state->xsave = malloc(size); + vcpu_xsave2_get(vcpu, state->xsave); + } else { + state->xsave = malloc(sizeof(struct kvm_xsave)); + vcpu_xsave_get(vcpu, state->xsave); + } +} + +struct kvm_x86_state *vcpu_save_state(struct kvm_vcpu *vcpu) +{ + const struct kvm_msr_list *msr_list = kvm_get_msr_index_list(); + struct kvm_x86_state *state; + int i; + + static int nested_size = -1; + + if (nested_size == -1) { + nested_size = kvm_check_cap(KVM_CAP_NESTED_STATE); + TEST_ASSERT(nested_size <= sizeof(state->nested_), + "Nested state size too big, %i > %zi", + nested_size, sizeof(state->nested_)); + } + + /* + * When KVM exits to userspace with KVM_EXIT_IO, KVM guarantees + * guest state is consistent only after userspace re-enters the + * kernel with KVM_RUN. Complete IO prior to migrating state + * to a new VM. + */ + vcpu_run_complete_io(vcpu); + + state = malloc(sizeof(*state) + msr_list->nmsrs * sizeof(state->msrs.entries[0])); + TEST_ASSERT(state, "-ENOMEM when allocating kvm state"); + + vcpu_events_get(vcpu, &state->events); + vcpu_mp_state_get(vcpu, &state->mp_state); + vcpu_regs_get(vcpu, &state->regs); + vcpu_save_xsave_state(vcpu, state); + + if (kvm_has_cap(KVM_CAP_XCRS)) + vcpu_xcrs_get(vcpu, &state->xcrs); + + vcpu_sregs_get(vcpu, &state->sregs); + + if (nested_size) { + state->nested.size = sizeof(state->nested_); + + vcpu_nested_state_get(vcpu, &state->nested); + TEST_ASSERT(state->nested.size <= nested_size, + "Nested state size too big, %i (KVM_CHECK_CAP gave %i)", + state->nested.size, nested_size); + } else { + state->nested.size = 0; + } + + state->msrs.nmsrs = msr_list->nmsrs; + for (i = 0; i < msr_list->nmsrs; i++) + state->msrs.entries[i].index = msr_list->indices[i]; + vcpu_msrs_get(vcpu, &state->msrs); + + vcpu_debugregs_get(vcpu, &state->debugregs); + + return state; +} + +void vcpu_load_state(struct kvm_vcpu *vcpu, struct kvm_x86_state *state) +{ + vcpu_sregs_set(vcpu, &state->sregs); + vcpu_msrs_set(vcpu, &state->msrs); + + if (kvm_has_cap(KVM_CAP_XCRS)) + vcpu_xcrs_set(vcpu, &state->xcrs); + + vcpu_xsave_set(vcpu, state->xsave); + vcpu_events_set(vcpu, &state->events); + vcpu_mp_state_set(vcpu, &state->mp_state); + vcpu_debugregs_set(vcpu, &state->debugregs); + vcpu_regs_set(vcpu, &state->regs); + + if (state->nested.size) + vcpu_nested_state_set(vcpu, &state->nested); +} + +void kvm_x86_state_cleanup(struct kvm_x86_state *state) +{ + free(state->xsave); + free(state); +} + +void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits) +{ + if (!kvm_cpu_has_p(X86_PROPERTY_MAX_PHY_ADDR)) { + *pa_bits = kvm_cpu_has(X86_FEATURE_PAE) ? 36 : 32; + *va_bits = 32; + } else { + *pa_bits = kvm_cpu_property(X86_PROPERTY_MAX_PHY_ADDR); + *va_bits = kvm_cpu_property(X86_PROPERTY_MAX_VIRT_ADDR); + } +} + +void kvm_init_vm_address_properties(struct kvm_vm *vm) +{ + if (vm->type == KVM_X86_SEV_VM || vm->type == KVM_X86_SEV_ES_VM) { + vm->arch.sev_fd = open_sev_dev_path_or_exit(); + vm->arch.c_bit = BIT_ULL(this_cpu_property(X86_PROPERTY_SEV_C_BIT)); + vm->gpa_tag_mask = vm->arch.c_bit; + } else { + vm->arch.sev_fd = -1; + } +} + +const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid, + uint32_t function, uint32_t index) +{ + int i; + + for (i = 0; i < cpuid->nent; i++) { + if (cpuid->entries[i].function == function && + cpuid->entries[i].index == index) + return &cpuid->entries[i]; + } + + TEST_FAIL("CPUID function 0x%x index 0x%x not found ", function, index); + + return NULL; +} + +#define X86_HYPERCALL(inputs...) \ +({ \ + uint64_t r; \ + \ + asm volatile("test %[use_vmmcall], %[use_vmmcall]\n\t" \ + "jnz 1f\n\t" \ + "vmcall\n\t" \ + "jmp 2f\n\t" \ + "1: vmmcall\n\t" \ + "2:" \ + : "=a"(r) \ + : [use_vmmcall] "r" (host_cpu_is_amd), inputs); \ + \ + r; \ +}) + +uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2, + uint64_t a3) +{ + return X86_HYPERCALL("a"(nr), "b"(a0), "c"(a1), "d"(a2), "S"(a3)); +} + +uint64_t __xen_hypercall(uint64_t nr, uint64_t a0, void *a1) +{ + return X86_HYPERCALL("a"(nr), "D"(a0), "S"(a1)); +} + +void xen_hypercall(uint64_t nr, uint64_t a0, void *a1) +{ + GUEST_ASSERT(!__xen_hypercall(nr, a0, a1)); +} + +unsigned long vm_compute_max_gfn(struct kvm_vm *vm) +{ + const unsigned long num_ht_pages = 12 << (30 - vm->page_shift); /* 12 GiB */ + unsigned long ht_gfn, max_gfn, max_pfn; + uint8_t maxphyaddr, guest_maxphyaddr; + + /* + * Use "guest MAXPHYADDR" from KVM if it's available. Guest MAXPHYADDR + * enumerates the max _mappable_ GPA, which can be less than the raw + * MAXPHYADDR, e.g. if MAXPHYADDR=52, KVM is using TDP, and the CPU + * doesn't support 5-level TDP. + */ + guest_maxphyaddr = kvm_cpu_property(X86_PROPERTY_GUEST_MAX_PHY_ADDR); + guest_maxphyaddr = guest_maxphyaddr ?: vm->pa_bits; + TEST_ASSERT(guest_maxphyaddr <= vm->pa_bits, + "Guest MAXPHYADDR should never be greater than raw MAXPHYADDR"); + + max_gfn = (1ULL << (guest_maxphyaddr - vm->page_shift)) - 1; + + /* Avoid reserved HyperTransport region on AMD processors. */ + if (!host_cpu_is_amd) + return max_gfn; + + /* On parts with <40 physical address bits, the area is fully hidden */ + if (vm->pa_bits < 40) + return max_gfn; + + /* Before family 17h, the HyperTransport area is just below 1T. */ + ht_gfn = (1 << 28) - num_ht_pages; + if (this_cpu_family() < 0x17) + goto done; + + /* + * Otherwise it's at the top of the physical address space, possibly + * reduced due to SME by bits 11:6 of CPUID[0x8000001f].EBX. Use + * the old conservative value if MAXPHYADDR is not enumerated. + */ + if (!this_cpu_has_p(X86_PROPERTY_MAX_PHY_ADDR)) + goto done; + + maxphyaddr = this_cpu_property(X86_PROPERTY_MAX_PHY_ADDR); + max_pfn = (1ULL << (maxphyaddr - vm->page_shift)) - 1; + + if (this_cpu_has_p(X86_PROPERTY_PHYS_ADDR_REDUCTION)) + max_pfn >>= this_cpu_property(X86_PROPERTY_PHYS_ADDR_REDUCTION); + + ht_gfn = max_pfn - num_ht_pages; +done: + return min(max_gfn, ht_gfn - 1); +} + +/* Returns true if kvm_intel was loaded with unrestricted_guest=1. */ +bool vm_is_unrestricted_guest(struct kvm_vm *vm) +{ + /* Ensure that a KVM vendor-specific module is loaded. */ + if (vm == NULL) + close(open_kvm_dev_path_or_exit()); + + return get_kvm_intel_param_bool("unrestricted_guest"); +} + +void kvm_selftest_arch_init(void) +{ + host_cpu_is_intel = this_cpu_is_intel(); + host_cpu_is_amd = this_cpu_is_amd(); + is_forced_emulation_enabled = kvm_is_forced_emulation_enabled(); +} + +bool sys_clocksource_is_based_on_tsc(void) +{ + char *clk_name = sys_get_cur_clocksource(); + bool ret = !strcmp(clk_name, "tsc\n") || + !strcmp(clk_name, "hyperv_clocksource_tsc_page\n"); + + free(clk_name); + + return ret; +} diff --git a/tools/testing/selftests/kvm/lib/x86/sev.c b/tools/testing/selftests/kvm/lib/x86/sev.c new file mode 100644 index 000000000000..e9535ee20b7f --- /dev/null +++ b/tools/testing/selftests/kvm/lib/x86/sev.c @@ -0,0 +1,141 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include + +#include "sev.h" + +/* + * sparsebit_next_clear() can return 0 if [x, 2**64-1] are all set, and the + * -1 would then cause an underflow back to 2**64 - 1. This is expected and + * correct. + * + * If the last range in the sparsebit is [x, y] and we try to iterate, + * sparsebit_next_set() will return 0, and sparsebit_next_clear() will try + * and find the first range, but that's correct because the condition + * expression would cause us to quit the loop. + */ +static void encrypt_region(struct kvm_vm *vm, struct userspace_mem_region *region) +{ + const struct sparsebit *protected_phy_pages = region->protected_phy_pages; + const vm_paddr_t gpa_base = region->region.guest_phys_addr; + const sparsebit_idx_t lowest_page_in_region = gpa_base >> vm->page_shift; + sparsebit_idx_t i, j; + + if (!sparsebit_any_set(protected_phy_pages)) + return; + + sev_register_encrypted_memory(vm, region); + + sparsebit_for_each_set_range(protected_phy_pages, i, j) { + const uint64_t size = (j - i + 1) * vm->page_size; + const uint64_t offset = (i - lowest_page_in_region) * vm->page_size; + + sev_launch_update_data(vm, gpa_base + offset, size); + } +} + +void sev_vm_init(struct kvm_vm *vm) +{ + if (vm->type == KVM_X86_DEFAULT_VM) { + assert(vm->arch.sev_fd == -1); + vm->arch.sev_fd = open_sev_dev_path_or_exit(); + vm_sev_ioctl(vm, KVM_SEV_INIT, NULL); + } else { + struct kvm_sev_init init = { 0 }; + assert(vm->type == KVM_X86_SEV_VM); + vm_sev_ioctl(vm, KVM_SEV_INIT2, &init); + } +} + +void sev_es_vm_init(struct kvm_vm *vm) +{ + if (vm->type == KVM_X86_DEFAULT_VM) { + assert(vm->arch.sev_fd == -1); + vm->arch.sev_fd = open_sev_dev_path_or_exit(); + vm_sev_ioctl(vm, KVM_SEV_ES_INIT, NULL); + } else { + struct kvm_sev_init init = { 0 }; + assert(vm->type == KVM_X86_SEV_ES_VM); + vm_sev_ioctl(vm, KVM_SEV_INIT2, &init); + } +} + +void sev_vm_launch(struct kvm_vm *vm, uint32_t policy) +{ + struct kvm_sev_launch_start launch_start = { + .policy = policy, + }; + struct userspace_mem_region *region; + struct kvm_sev_guest_status status; + int ctr; + + vm_sev_ioctl(vm, KVM_SEV_LAUNCH_START, &launch_start); + vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &status); + + TEST_ASSERT_EQ(status.policy, policy); + TEST_ASSERT_EQ(status.state, SEV_GUEST_STATE_LAUNCH_UPDATE); + + hash_for_each(vm->regions.slot_hash, ctr, region, slot_node) + encrypt_region(vm, region); + + if (policy & SEV_POLICY_ES) + vm_sev_ioctl(vm, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL); + + vm->arch.is_pt_protected = true; +} + +void sev_vm_launch_measure(struct kvm_vm *vm, uint8_t *measurement) +{ + struct kvm_sev_launch_measure launch_measure; + struct kvm_sev_guest_status guest_status; + + launch_measure.len = 256; + launch_measure.uaddr = (__u64)measurement; + vm_sev_ioctl(vm, KVM_SEV_LAUNCH_MEASURE, &launch_measure); + + vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &guest_status); + TEST_ASSERT_EQ(guest_status.state, SEV_GUEST_STATE_LAUNCH_SECRET); +} + +void sev_vm_launch_finish(struct kvm_vm *vm) +{ + struct kvm_sev_guest_status status; + + vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &status); + TEST_ASSERT(status.state == SEV_GUEST_STATE_LAUNCH_UPDATE || + status.state == SEV_GUEST_STATE_LAUNCH_SECRET, + "Unexpected guest state: %d", status.state); + + vm_sev_ioctl(vm, KVM_SEV_LAUNCH_FINISH, NULL); + + vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &status); + TEST_ASSERT_EQ(status.state, SEV_GUEST_STATE_RUNNING); +} + +struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t type, void *guest_code, + struct kvm_vcpu **cpu) +{ + struct vm_shape shape = { + .mode = VM_MODE_DEFAULT, + .type = type, + }; + struct kvm_vm *vm; + struct kvm_vcpu *cpus[1]; + + vm = __vm_create_with_vcpus(shape, 1, 0, guest_code, cpus); + *cpu = cpus[0]; + + return vm; +} + +void vm_sev_launch(struct kvm_vm *vm, uint32_t policy, uint8_t *measurement) +{ + sev_vm_launch(vm, policy); + + if (!measurement) + measurement = alloca(256); + + sev_vm_launch_measure(vm, measurement); + + sev_vm_launch_finish(vm); +} diff --git a/tools/testing/selftests/kvm/lib/x86/svm.c b/tools/testing/selftests/kvm/lib/x86/svm.c new file mode 100644 index 000000000000..d239c2097391 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/x86/svm.c @@ -0,0 +1,163 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Helpers used for nested SVM testing + * Largely inspired from KVM unit test svm.c + * + * Copyright (C) 2020, Red Hat, Inc. + */ + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "svm_util.h" + +#define SEV_DEV_PATH "/dev/sev" + +struct gpr64_regs guest_regs; +u64 rflags; + +/* Allocate memory regions for nested SVM tests. + * + * Input Args: + * vm - The VM to allocate guest-virtual addresses in. + * + * Output Args: + * p_svm_gva - The guest virtual address for the struct svm_test_data. + * + * Return: + * Pointer to structure with the addresses of the SVM areas. + */ +struct svm_test_data * +vcpu_alloc_svm(struct kvm_vm *vm, vm_vaddr_t *p_svm_gva) +{ + vm_vaddr_t svm_gva = vm_vaddr_alloc_page(vm); + struct svm_test_data *svm = addr_gva2hva(vm, svm_gva); + + svm->vmcb = (void *)vm_vaddr_alloc_page(vm); + svm->vmcb_hva = addr_gva2hva(vm, (uintptr_t)svm->vmcb); + svm->vmcb_gpa = addr_gva2gpa(vm, (uintptr_t)svm->vmcb); + + svm->save_area = (void *)vm_vaddr_alloc_page(vm); + svm->save_area_hva = addr_gva2hva(vm, (uintptr_t)svm->save_area); + svm->save_area_gpa = addr_gva2gpa(vm, (uintptr_t)svm->save_area); + + svm->msr = (void *)vm_vaddr_alloc_page(vm); + svm->msr_hva = addr_gva2hva(vm, (uintptr_t)svm->msr); + svm->msr_gpa = addr_gva2gpa(vm, (uintptr_t)svm->msr); + memset(svm->msr_hva, 0, getpagesize()); + + *p_svm_gva = svm_gva; + return svm; +} + +static void vmcb_set_seg(struct vmcb_seg *seg, u16 selector, + u64 base, u32 limit, u32 attr) +{ + seg->selector = selector; + seg->attrib = attr; + seg->limit = limit; + seg->base = base; +} + +void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_rsp) +{ + struct vmcb *vmcb = svm->vmcb; + uint64_t vmcb_gpa = svm->vmcb_gpa; + struct vmcb_save_area *save = &vmcb->save; + struct vmcb_control_area *ctrl = &vmcb->control; + u32 data_seg_attr = 3 | SVM_SELECTOR_S_MASK | SVM_SELECTOR_P_MASK + | SVM_SELECTOR_DB_MASK | SVM_SELECTOR_G_MASK; + u32 code_seg_attr = 9 | SVM_SELECTOR_S_MASK | SVM_SELECTOR_P_MASK + | SVM_SELECTOR_L_MASK | SVM_SELECTOR_G_MASK; + uint64_t efer; + + efer = rdmsr(MSR_EFER); + wrmsr(MSR_EFER, efer | EFER_SVME); + wrmsr(MSR_VM_HSAVE_PA, svm->save_area_gpa); + + memset(vmcb, 0, sizeof(*vmcb)); + asm volatile ("vmsave %0\n\t" : : "a" (vmcb_gpa) : "memory"); + vmcb_set_seg(&save->es, get_es(), 0, -1U, data_seg_attr); + vmcb_set_seg(&save->cs, get_cs(), 0, -1U, code_seg_attr); + vmcb_set_seg(&save->ss, get_ss(), 0, -1U, data_seg_attr); + vmcb_set_seg(&save->ds, get_ds(), 0, -1U, data_seg_attr); + vmcb_set_seg(&save->gdtr, 0, get_gdt().address, get_gdt().size, 0); + vmcb_set_seg(&save->idtr, 0, get_idt().address, get_idt().size, 0); + + ctrl->asid = 1; + save->cpl = 0; + save->efer = rdmsr(MSR_EFER); + asm volatile ("mov %%cr4, %0" : "=r"(save->cr4) : : "memory"); + asm volatile ("mov %%cr3, %0" : "=r"(save->cr3) : : "memory"); + asm volatile ("mov %%cr0, %0" : "=r"(save->cr0) : : "memory"); + asm volatile ("mov %%dr7, %0" : "=r"(save->dr7) : : "memory"); + asm volatile ("mov %%dr6, %0" : "=r"(save->dr6) : : "memory"); + asm volatile ("mov %%cr2, %0" : "=r"(save->cr2) : : "memory"); + save->g_pat = rdmsr(MSR_IA32_CR_PAT); + save->dbgctl = rdmsr(MSR_IA32_DEBUGCTLMSR); + ctrl->intercept = (1ULL << INTERCEPT_VMRUN) | + (1ULL << INTERCEPT_VMMCALL); + ctrl->msrpm_base_pa = svm->msr_gpa; + + vmcb->save.rip = (u64)guest_rip; + vmcb->save.rsp = (u64)guest_rsp; + guest_regs.rdi = (u64)svm; +} + +/* + * save/restore 64-bit general registers except rax, rip, rsp + * which are directly handed through the VMCB guest processor state + */ +#define SAVE_GPR_C \ + "xchg %%rbx, guest_regs+0x20\n\t" \ + "xchg %%rcx, guest_regs+0x10\n\t" \ + "xchg %%rdx, guest_regs+0x18\n\t" \ + "xchg %%rbp, guest_regs+0x30\n\t" \ + "xchg %%rsi, guest_regs+0x38\n\t" \ + "xchg %%rdi, guest_regs+0x40\n\t" \ + "xchg %%r8, guest_regs+0x48\n\t" \ + "xchg %%r9, guest_regs+0x50\n\t" \ + "xchg %%r10, guest_regs+0x58\n\t" \ + "xchg %%r11, guest_regs+0x60\n\t" \ + "xchg %%r12, guest_regs+0x68\n\t" \ + "xchg %%r13, guest_regs+0x70\n\t" \ + "xchg %%r14, guest_regs+0x78\n\t" \ + "xchg %%r15, guest_regs+0x80\n\t" + +#define LOAD_GPR_C SAVE_GPR_C + +/* + * selftests do not use interrupts so we dropped clgi/sti/cli/stgi + * for now. registers involved in LOAD/SAVE_GPR_C are eventually + * unmodified so they do not need to be in the clobber list. + */ +void run_guest(struct vmcb *vmcb, uint64_t vmcb_gpa) +{ + asm volatile ( + "vmload %[vmcb_gpa]\n\t" + "mov rflags, %%r15\n\t" // rflags + "mov %%r15, 0x170(%[vmcb])\n\t" + "mov guest_regs, %%r15\n\t" // rax + "mov %%r15, 0x1f8(%[vmcb])\n\t" + LOAD_GPR_C + "vmrun %[vmcb_gpa]\n\t" + SAVE_GPR_C + "mov 0x170(%[vmcb]), %%r15\n\t" // rflags + "mov %%r15, rflags\n\t" + "mov 0x1f8(%[vmcb]), %%r15\n\t" // rax + "mov %%r15, guest_regs\n\t" + "vmsave %[vmcb_gpa]\n\t" + : : [vmcb] "r" (vmcb), [vmcb_gpa] "a" (vmcb_gpa) + : "r15", "memory"); +} + +/* + * Open SEV_DEV_PATH if available, otherwise exit the entire program. + * + * Return: + * The opened file descriptor of /dev/sev. + */ +int open_sev_dev_path_or_exit(void) +{ + return open_path_or_exit(SEV_DEV_PATH, 0); +} diff --git a/tools/testing/selftests/kvm/lib/x86/ucall.c b/tools/testing/selftests/kvm/lib/x86/ucall.c new file mode 100644 index 000000000000..1265cecc7dd1 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/x86/ucall.c @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ucall support. A ucall is a "hypercall to userspace". + * + * Copyright (C) 2018, Red Hat, Inc. + */ +#include "kvm_util.h" + +#define UCALL_PIO_PORT ((uint16_t)0x1000) + +void ucall_arch_do_ucall(vm_vaddr_t uc) +{ + /* + * FIXME: Revert this hack (the entire commit that added it) once nVMX + * preserves L2 GPRs across a nested VM-Exit. If a ucall from L2, e.g. + * to do a GUEST_SYNC(), lands the vCPU in L1, any and all GPRs can be + * clobbered by L1. Save and restore non-volatile GPRs (clobbering RBP + * in particular is problematic) along with RDX and RDI (which are + * inputs), and clobber volatile GPRs. *sigh* + */ +#define HORRIFIC_L2_UCALL_CLOBBER_HACK \ + "rcx", "rsi", "r8", "r9", "r10", "r11" + + asm volatile("push %%rbp\n\t" + "push %%r15\n\t" + "push %%r14\n\t" + "push %%r13\n\t" + "push %%r12\n\t" + "push %%rbx\n\t" + "push %%rdx\n\t" + "push %%rdi\n\t" + "in %[port], %%al\n\t" + "pop %%rdi\n\t" + "pop %%rdx\n\t" + "pop %%rbx\n\t" + "pop %%r12\n\t" + "pop %%r13\n\t" + "pop %%r14\n\t" + "pop %%r15\n\t" + "pop %%rbp\n\t" + : : [port] "d" (UCALL_PIO_PORT), "D" (uc) : "rax", "memory", + HORRIFIC_L2_UCALL_CLOBBER_HACK); +} + +void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + + if (run->exit_reason == KVM_EXIT_IO && run->io.port == UCALL_PIO_PORT) { + struct kvm_regs regs; + + vcpu_regs_get(vcpu, ®s); + return (void *)regs.rdi; + } + return NULL; +} diff --git a/tools/testing/selftests/kvm/lib/x86/vmx.c b/tools/testing/selftests/kvm/lib/x86/vmx.c new file mode 100644 index 000000000000..d4d1208dd023 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/x86/vmx.c @@ -0,0 +1,552 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2018, Google LLC. + */ + +#include + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "vmx.h" + +#define PAGE_SHIFT_4K 12 + +#define KVM_EPT_PAGE_TABLE_MIN_PADDR 0x1c0000 + +bool enable_evmcs; + +struct hv_enlightened_vmcs *current_evmcs; +struct hv_vp_assist_page *current_vp_assist; + +struct eptPageTableEntry { + uint64_t readable:1; + uint64_t writable:1; + uint64_t executable:1; + uint64_t memory_type:3; + uint64_t ignore_pat:1; + uint64_t page_size:1; + uint64_t accessed:1; + uint64_t dirty:1; + uint64_t ignored_11_10:2; + uint64_t address:40; + uint64_t ignored_62_52:11; + uint64_t suppress_ve:1; +}; + +struct eptPageTablePointer { + uint64_t memory_type:3; + uint64_t page_walk_length:3; + uint64_t ad_enabled:1; + uint64_t reserved_11_07:5; + uint64_t address:40; + uint64_t reserved_63_52:12; +}; +int vcpu_enable_evmcs(struct kvm_vcpu *vcpu) +{ + uint16_t evmcs_ver; + + vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_ENLIGHTENED_VMCS, + (unsigned long)&evmcs_ver); + + /* KVM should return supported EVMCS version range */ + TEST_ASSERT(((evmcs_ver >> 8) >= (evmcs_ver & 0xff)) && + (evmcs_ver & 0xff) > 0, + "Incorrect EVMCS version range: %x:%x", + evmcs_ver & 0xff, evmcs_ver >> 8); + + return evmcs_ver; +} + +/* Allocate memory regions for nested VMX tests. + * + * Input Args: + * vm - The VM to allocate guest-virtual addresses in. + * + * Output Args: + * p_vmx_gva - The guest virtual address for the struct vmx_pages. + * + * Return: + * Pointer to structure with the addresses of the VMX areas. + */ +struct vmx_pages * +vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva) +{ + vm_vaddr_t vmx_gva = vm_vaddr_alloc_page(vm); + struct vmx_pages *vmx = addr_gva2hva(vm, vmx_gva); + + /* Setup of a region of guest memory for the vmxon region. */ + vmx->vmxon = (void *)vm_vaddr_alloc_page(vm); + vmx->vmxon_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmxon); + vmx->vmxon_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmxon); + + /* Setup of a region of guest memory for a vmcs. */ + vmx->vmcs = (void *)vm_vaddr_alloc_page(vm); + vmx->vmcs_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmcs); + vmx->vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmcs); + + /* Setup of a region of guest memory for the MSR bitmap. */ + vmx->msr = (void *)vm_vaddr_alloc_page(vm); + vmx->msr_hva = addr_gva2hva(vm, (uintptr_t)vmx->msr); + vmx->msr_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->msr); + memset(vmx->msr_hva, 0, getpagesize()); + + /* Setup of a region of guest memory for the shadow VMCS. */ + vmx->shadow_vmcs = (void *)vm_vaddr_alloc_page(vm); + vmx->shadow_vmcs_hva = addr_gva2hva(vm, (uintptr_t)vmx->shadow_vmcs); + vmx->shadow_vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->shadow_vmcs); + + /* Setup of a region of guest memory for the VMREAD and VMWRITE bitmaps. */ + vmx->vmread = (void *)vm_vaddr_alloc_page(vm); + vmx->vmread_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmread); + vmx->vmread_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmread); + memset(vmx->vmread_hva, 0, getpagesize()); + + vmx->vmwrite = (void *)vm_vaddr_alloc_page(vm); + vmx->vmwrite_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmwrite); + vmx->vmwrite_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmwrite); + memset(vmx->vmwrite_hva, 0, getpagesize()); + + *p_vmx_gva = vmx_gva; + return vmx; +} + +bool prepare_for_vmx_operation(struct vmx_pages *vmx) +{ + uint64_t feature_control; + uint64_t required; + unsigned long cr0; + unsigned long cr4; + + /* + * Ensure bits in CR0 and CR4 are valid in VMX operation: + * - Bit X is 1 in _FIXED0: bit X is fixed to 1 in CRx. + * - Bit X is 0 in _FIXED1: bit X is fixed to 0 in CRx. + */ + __asm__ __volatile__("mov %%cr0, %0" : "=r"(cr0) : : "memory"); + cr0 &= rdmsr(MSR_IA32_VMX_CR0_FIXED1); + cr0 |= rdmsr(MSR_IA32_VMX_CR0_FIXED0); + __asm__ __volatile__("mov %0, %%cr0" : : "r"(cr0) : "memory"); + + __asm__ __volatile__("mov %%cr4, %0" : "=r"(cr4) : : "memory"); + cr4 &= rdmsr(MSR_IA32_VMX_CR4_FIXED1); + cr4 |= rdmsr(MSR_IA32_VMX_CR4_FIXED0); + /* Enable VMX operation */ + cr4 |= X86_CR4_VMXE; + __asm__ __volatile__("mov %0, %%cr4" : : "r"(cr4) : "memory"); + + /* + * Configure IA32_FEATURE_CONTROL MSR to allow VMXON: + * Bit 0: Lock bit. If clear, VMXON causes a #GP. + * Bit 2: Enables VMXON outside of SMX operation. If clear, VMXON + * outside of SMX causes a #GP. + */ + required = FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX; + required |= FEAT_CTL_LOCKED; + feature_control = rdmsr(MSR_IA32_FEAT_CTL); + if ((feature_control & required) != required) + wrmsr(MSR_IA32_FEAT_CTL, feature_control | required); + + /* Enter VMX root operation. */ + *(uint32_t *)(vmx->vmxon) = vmcs_revision(); + if (vmxon(vmx->vmxon_gpa)) + return false; + + return true; +} + +bool load_vmcs(struct vmx_pages *vmx) +{ + /* Load a VMCS. */ + *(uint32_t *)(vmx->vmcs) = vmcs_revision(); + if (vmclear(vmx->vmcs_gpa)) + return false; + + if (vmptrld(vmx->vmcs_gpa)) + return false; + + /* Setup shadow VMCS, do not load it yet. */ + *(uint32_t *)(vmx->shadow_vmcs) = vmcs_revision() | 0x80000000ul; + if (vmclear(vmx->shadow_vmcs_gpa)) + return false; + + return true; +} + +static bool ept_vpid_cap_supported(uint64_t mask) +{ + return rdmsr(MSR_IA32_VMX_EPT_VPID_CAP) & mask; +} + +bool ept_1g_pages_supported(void) +{ + return ept_vpid_cap_supported(VMX_EPT_VPID_CAP_1G_PAGES); +} + +/* + * Initialize the control fields to the most basic settings possible. + */ +static inline void init_vmcs_control_fields(struct vmx_pages *vmx) +{ + uint32_t sec_exec_ctl = 0; + + vmwrite(VIRTUAL_PROCESSOR_ID, 0); + vmwrite(POSTED_INTR_NV, 0); + + vmwrite(PIN_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PINBASED_CTLS)); + + if (vmx->eptp_gpa) { + uint64_t ept_paddr; + struct eptPageTablePointer eptp = { + .memory_type = X86_MEMTYPE_WB, + .page_walk_length = 3, /* + 1 */ + .ad_enabled = ept_vpid_cap_supported(VMX_EPT_VPID_CAP_AD_BITS), + .address = vmx->eptp_gpa >> PAGE_SHIFT_4K, + }; + + memcpy(&ept_paddr, &eptp, sizeof(ept_paddr)); + vmwrite(EPT_POINTER, ept_paddr); + sec_exec_ctl |= SECONDARY_EXEC_ENABLE_EPT; + } + + if (!vmwrite(SECONDARY_VM_EXEC_CONTROL, sec_exec_ctl)) + vmwrite(CPU_BASED_VM_EXEC_CONTROL, + rdmsr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS) | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS); + else { + vmwrite(CPU_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS)); + GUEST_ASSERT(!sec_exec_ctl); + } + + vmwrite(EXCEPTION_BITMAP, 0); + vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0); + vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, -1); /* Never match */ + vmwrite(CR3_TARGET_COUNT, 0); + vmwrite(VM_EXIT_CONTROLS, rdmsr(MSR_IA32_VMX_EXIT_CTLS) | + VM_EXIT_HOST_ADDR_SPACE_SIZE); /* 64-bit host */ + vmwrite(VM_EXIT_MSR_STORE_COUNT, 0); + vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0); + vmwrite(VM_ENTRY_CONTROLS, rdmsr(MSR_IA32_VMX_ENTRY_CTLS) | + VM_ENTRY_IA32E_MODE); /* 64-bit guest */ + vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0); + vmwrite(VM_ENTRY_INTR_INFO_FIELD, 0); + vmwrite(TPR_THRESHOLD, 0); + + vmwrite(CR0_GUEST_HOST_MASK, 0); + vmwrite(CR4_GUEST_HOST_MASK, 0); + vmwrite(CR0_READ_SHADOW, get_cr0()); + vmwrite(CR4_READ_SHADOW, get_cr4()); + + vmwrite(MSR_BITMAP, vmx->msr_gpa); + vmwrite(VMREAD_BITMAP, vmx->vmread_gpa); + vmwrite(VMWRITE_BITMAP, vmx->vmwrite_gpa); +} + +/* + * Initialize the host state fields based on the current host state, with + * the exception of HOST_RSP and HOST_RIP, which should be set by vmlaunch + * or vmresume. + */ +static inline void init_vmcs_host_state(void) +{ + uint32_t exit_controls = vmreadz(VM_EXIT_CONTROLS); + + vmwrite(HOST_ES_SELECTOR, get_es()); + vmwrite(HOST_CS_SELECTOR, get_cs()); + vmwrite(HOST_SS_SELECTOR, get_ss()); + vmwrite(HOST_DS_SELECTOR, get_ds()); + vmwrite(HOST_FS_SELECTOR, get_fs()); + vmwrite(HOST_GS_SELECTOR, get_gs()); + vmwrite(HOST_TR_SELECTOR, get_tr()); + + if (exit_controls & VM_EXIT_LOAD_IA32_PAT) + vmwrite(HOST_IA32_PAT, rdmsr(MSR_IA32_CR_PAT)); + if (exit_controls & VM_EXIT_LOAD_IA32_EFER) + vmwrite(HOST_IA32_EFER, rdmsr(MSR_EFER)); + if (exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) + vmwrite(HOST_IA32_PERF_GLOBAL_CTRL, + rdmsr(MSR_CORE_PERF_GLOBAL_CTRL)); + + vmwrite(HOST_IA32_SYSENTER_CS, rdmsr(MSR_IA32_SYSENTER_CS)); + + vmwrite(HOST_CR0, get_cr0()); + vmwrite(HOST_CR3, get_cr3()); + vmwrite(HOST_CR4, get_cr4()); + vmwrite(HOST_FS_BASE, rdmsr(MSR_FS_BASE)); + vmwrite(HOST_GS_BASE, rdmsr(MSR_GS_BASE)); + vmwrite(HOST_TR_BASE, + get_desc64_base((struct desc64 *)(get_gdt().address + get_tr()))); + vmwrite(HOST_GDTR_BASE, get_gdt().address); + vmwrite(HOST_IDTR_BASE, get_idt().address); + vmwrite(HOST_IA32_SYSENTER_ESP, rdmsr(MSR_IA32_SYSENTER_ESP)); + vmwrite(HOST_IA32_SYSENTER_EIP, rdmsr(MSR_IA32_SYSENTER_EIP)); +} + +/* + * Initialize the guest state fields essentially as a clone of + * the host state fields. Some host state fields have fixed + * values, and we set the corresponding guest state fields accordingly. + */ +static inline void init_vmcs_guest_state(void *rip, void *rsp) +{ + vmwrite(GUEST_ES_SELECTOR, vmreadz(HOST_ES_SELECTOR)); + vmwrite(GUEST_CS_SELECTOR, vmreadz(HOST_CS_SELECTOR)); + vmwrite(GUEST_SS_SELECTOR, vmreadz(HOST_SS_SELECTOR)); + vmwrite(GUEST_DS_SELECTOR, vmreadz(HOST_DS_SELECTOR)); + vmwrite(GUEST_FS_SELECTOR, vmreadz(HOST_FS_SELECTOR)); + vmwrite(GUEST_GS_SELECTOR, vmreadz(HOST_GS_SELECTOR)); + vmwrite(GUEST_LDTR_SELECTOR, 0); + vmwrite(GUEST_TR_SELECTOR, vmreadz(HOST_TR_SELECTOR)); + vmwrite(GUEST_INTR_STATUS, 0); + vmwrite(GUEST_PML_INDEX, 0); + + vmwrite(VMCS_LINK_POINTER, -1ll); + vmwrite(GUEST_IA32_DEBUGCTL, 0); + vmwrite(GUEST_IA32_PAT, vmreadz(HOST_IA32_PAT)); + vmwrite(GUEST_IA32_EFER, vmreadz(HOST_IA32_EFER)); + vmwrite(GUEST_IA32_PERF_GLOBAL_CTRL, + vmreadz(HOST_IA32_PERF_GLOBAL_CTRL)); + + vmwrite(GUEST_ES_LIMIT, -1); + vmwrite(GUEST_CS_LIMIT, -1); + vmwrite(GUEST_SS_LIMIT, -1); + vmwrite(GUEST_DS_LIMIT, -1); + vmwrite(GUEST_FS_LIMIT, -1); + vmwrite(GUEST_GS_LIMIT, -1); + vmwrite(GUEST_LDTR_LIMIT, -1); + vmwrite(GUEST_TR_LIMIT, 0x67); + vmwrite(GUEST_GDTR_LIMIT, 0xffff); + vmwrite(GUEST_IDTR_LIMIT, 0xffff); + vmwrite(GUEST_ES_AR_BYTES, + vmreadz(GUEST_ES_SELECTOR) == 0 ? 0x10000 : 0xc093); + vmwrite(GUEST_CS_AR_BYTES, 0xa09b); + vmwrite(GUEST_SS_AR_BYTES, 0xc093); + vmwrite(GUEST_DS_AR_BYTES, + vmreadz(GUEST_DS_SELECTOR) == 0 ? 0x10000 : 0xc093); + vmwrite(GUEST_FS_AR_BYTES, + vmreadz(GUEST_FS_SELECTOR) == 0 ? 0x10000 : 0xc093); + vmwrite(GUEST_GS_AR_BYTES, + vmreadz(GUEST_GS_SELECTOR) == 0 ? 0x10000 : 0xc093); + vmwrite(GUEST_LDTR_AR_BYTES, 0x10000); + vmwrite(GUEST_TR_AR_BYTES, 0x8b); + vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0); + vmwrite(GUEST_ACTIVITY_STATE, 0); + vmwrite(GUEST_SYSENTER_CS, vmreadz(HOST_IA32_SYSENTER_CS)); + vmwrite(VMX_PREEMPTION_TIMER_VALUE, 0); + + vmwrite(GUEST_CR0, vmreadz(HOST_CR0)); + vmwrite(GUEST_CR3, vmreadz(HOST_CR3)); + vmwrite(GUEST_CR4, vmreadz(HOST_CR4)); + vmwrite(GUEST_ES_BASE, 0); + vmwrite(GUEST_CS_BASE, 0); + vmwrite(GUEST_SS_BASE, 0); + vmwrite(GUEST_DS_BASE, 0); + vmwrite(GUEST_FS_BASE, vmreadz(HOST_FS_BASE)); + vmwrite(GUEST_GS_BASE, vmreadz(HOST_GS_BASE)); + vmwrite(GUEST_LDTR_BASE, 0); + vmwrite(GUEST_TR_BASE, vmreadz(HOST_TR_BASE)); + vmwrite(GUEST_GDTR_BASE, vmreadz(HOST_GDTR_BASE)); + vmwrite(GUEST_IDTR_BASE, vmreadz(HOST_IDTR_BASE)); + vmwrite(GUEST_DR7, 0x400); + vmwrite(GUEST_RSP, (uint64_t)rsp); + vmwrite(GUEST_RIP, (uint64_t)rip); + vmwrite(GUEST_RFLAGS, 2); + vmwrite(GUEST_PENDING_DBG_EXCEPTIONS, 0); + vmwrite(GUEST_SYSENTER_ESP, vmreadz(HOST_IA32_SYSENTER_ESP)); + vmwrite(GUEST_SYSENTER_EIP, vmreadz(HOST_IA32_SYSENTER_EIP)); +} + +void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp) +{ + init_vmcs_control_fields(vmx); + init_vmcs_host_state(); + init_vmcs_guest_state(guest_rip, guest_rsp); +} + +static void nested_create_pte(struct kvm_vm *vm, + struct eptPageTableEntry *pte, + uint64_t nested_paddr, + uint64_t paddr, + int current_level, + int target_level) +{ + if (!pte->readable) { + pte->writable = true; + pte->readable = true; + pte->executable = true; + pte->page_size = (current_level == target_level); + if (pte->page_size) + pte->address = paddr >> vm->page_shift; + else + pte->address = vm_alloc_page_table(vm) >> vm->page_shift; + } else { + /* + * Entry already present. Assert that the caller doesn't want + * a hugepage at this level, and that there isn't a hugepage at + * this level. + */ + TEST_ASSERT(current_level != target_level, + "Cannot create hugepage at level: %u, nested_paddr: 0x%lx", + current_level, nested_paddr); + TEST_ASSERT(!pte->page_size, + "Cannot create page table at level: %u, nested_paddr: 0x%lx", + current_level, nested_paddr); + } +} + + +void __nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm, + uint64_t nested_paddr, uint64_t paddr, int target_level) +{ + const uint64_t page_size = PG_LEVEL_SIZE(target_level); + struct eptPageTableEntry *pt = vmx->eptp_hva, *pte; + uint16_t index; + + TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " + "unknown or unsupported guest mode, mode: 0x%x", vm->mode); + + TEST_ASSERT((nested_paddr >> 48) == 0, + "Nested physical address 0x%lx requires 5-level paging", + nested_paddr); + TEST_ASSERT((nested_paddr % page_size) == 0, + "Nested physical address not on page boundary,\n" + " nested_paddr: 0x%lx page_size: 0x%lx", + nested_paddr, page_size); + TEST_ASSERT((nested_paddr >> vm->page_shift) <= vm->max_gfn, + "Physical address beyond beyond maximum supported,\n" + " nested_paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", + paddr, vm->max_gfn, vm->page_size); + TEST_ASSERT((paddr % page_size) == 0, + "Physical address not on page boundary,\n" + " paddr: 0x%lx page_size: 0x%lx", + paddr, page_size); + TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn, + "Physical address beyond beyond maximum supported,\n" + " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", + paddr, vm->max_gfn, vm->page_size); + + for (int level = PG_LEVEL_512G; level >= PG_LEVEL_4K; level--) { + index = (nested_paddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu; + pte = &pt[index]; + + nested_create_pte(vm, pte, nested_paddr, paddr, level, target_level); + + if (pte->page_size) + break; + + pt = addr_gpa2hva(vm, pte->address * vm->page_size); + } + + /* + * For now mark these as accessed and dirty because the only + * testcase we have needs that. Can be reconsidered later. + */ + pte->accessed = true; + pte->dirty = true; + +} + +void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm, + uint64_t nested_paddr, uint64_t paddr) +{ + __nested_pg_map(vmx, vm, nested_paddr, paddr, PG_LEVEL_4K); +} + +/* + * Map a range of EPT guest physical addresses to the VM's physical address + * + * Input Args: + * vm - Virtual Machine + * nested_paddr - Nested guest physical address to map + * paddr - VM Physical Address + * size - The size of the range to map + * level - The level at which to map the range + * + * Output Args: None + * + * Return: None + * + * Within the VM given by vm, creates a nested guest translation for the + * page range starting at nested_paddr to the page range starting at paddr. + */ +void __nested_map(struct vmx_pages *vmx, struct kvm_vm *vm, + uint64_t nested_paddr, uint64_t paddr, uint64_t size, + int level) +{ + size_t page_size = PG_LEVEL_SIZE(level); + size_t npages = size / page_size; + + TEST_ASSERT(nested_paddr + size > nested_paddr, "Vaddr overflow"); + TEST_ASSERT(paddr + size > paddr, "Paddr overflow"); + + while (npages--) { + __nested_pg_map(vmx, vm, nested_paddr, paddr, level); + nested_paddr += page_size; + paddr += page_size; + } +} + +void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm, + uint64_t nested_paddr, uint64_t paddr, uint64_t size) +{ + __nested_map(vmx, vm, nested_paddr, paddr, size, PG_LEVEL_4K); +} + +/* Prepare an identity extended page table that maps all the + * physical pages in VM. + */ +void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm, + uint32_t memslot) +{ + sparsebit_idx_t i, last; + struct userspace_mem_region *region = + memslot2region(vm, memslot); + + i = (region->region.guest_phys_addr >> vm->page_shift) - 1; + last = i + (region->region.memory_size >> vm->page_shift); + for (;;) { + i = sparsebit_next_clear(region->unused_phy_pages, i); + if (i > last) + break; + + nested_map(vmx, vm, + (uint64_t)i << vm->page_shift, + (uint64_t)i << vm->page_shift, + 1 << vm->page_shift); + } +} + +/* Identity map a region with 1GiB Pages. */ +void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm, + uint64_t addr, uint64_t size) +{ + __nested_map(vmx, vm, addr, addr, size, PG_LEVEL_1G); +} + +bool kvm_cpu_has_ept(void) +{ + uint64_t ctrl; + + ctrl = kvm_get_feature_msr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS) >> 32; + if (!(ctrl & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) + return false; + + ctrl = kvm_get_feature_msr(MSR_IA32_VMX_PROCBASED_CTLS2) >> 32; + return ctrl & SECONDARY_EXEC_ENABLE_EPT; +} + +void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm, + uint32_t eptp_memslot) +{ + TEST_ASSERT(kvm_cpu_has_ept(), "KVM doesn't support nested EPT"); + + vmx->eptp = (void *)vm_vaddr_alloc_page(vm); + vmx->eptp_hva = addr_gva2hva(vm, (uintptr_t)vmx->eptp); + vmx->eptp_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->eptp); +} + +void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm) +{ + vmx->apic_access = (void *)vm_vaddr_alloc_page(vm); + vmx->apic_access_hva = addr_gva2hva(vm, (uintptr_t)vmx->apic_access); + vmx->apic_access_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->apic_access); +} diff --git a/tools/testing/selftests/kvm/lib/x86_64/apic.c b/tools/testing/selftests/kvm/lib/x86_64/apic.c deleted file mode 100644 index 89153a333e83..000000000000 --- a/tools/testing/selftests/kvm/lib/x86_64/apic.c +++ /dev/null @@ -1,43 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2021, Google LLC. - */ - -#include "apic.h" - -void apic_disable(void) -{ - wrmsr(MSR_IA32_APICBASE, - rdmsr(MSR_IA32_APICBASE) & - ~(MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD)); -} - -void xapic_enable(void) -{ - uint64_t val = rdmsr(MSR_IA32_APICBASE); - - /* Per SDM: to enable xAPIC when in x2APIC must first disable APIC */ - if (val & MSR_IA32_APICBASE_EXTD) { - apic_disable(); - wrmsr(MSR_IA32_APICBASE, - rdmsr(MSR_IA32_APICBASE) | MSR_IA32_APICBASE_ENABLE); - } else if (!(val & MSR_IA32_APICBASE_ENABLE)) { - wrmsr(MSR_IA32_APICBASE, val | MSR_IA32_APICBASE_ENABLE); - } - - /* - * Per SDM: reset value of spurious interrupt vector register has the - * APIC software enabled bit=0. It must be enabled in addition to the - * enable bit in the MSR. - */ - val = xapic_read_reg(APIC_SPIV) | APIC_SPIV_APIC_ENABLED; - xapic_write_reg(APIC_SPIV, val); -} - -void x2apic_enable(void) -{ - wrmsr(MSR_IA32_APICBASE, rdmsr(MSR_IA32_APICBASE) | - MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD); - x2apic_write_reg(APIC_SPIV, - x2apic_read_reg(APIC_SPIV) | APIC_SPIV_APIC_ENABLED); -} diff --git a/tools/testing/selftests/kvm/lib/x86_64/handlers.S b/tools/testing/selftests/kvm/lib/x86_64/handlers.S deleted file mode 100644 index 7629819734af..000000000000 --- a/tools/testing/selftests/kvm/lib/x86_64/handlers.S +++ /dev/null @@ -1,81 +0,0 @@ -handle_exception: - push %r15 - push %r14 - push %r13 - push %r12 - push %r11 - push %r10 - push %r9 - push %r8 - - push %rdi - push %rsi - push %rbp - push %rbx - push %rdx - push %rcx - push %rax - mov %rsp, %rdi - - call route_exception - - pop %rax - pop %rcx - pop %rdx - pop %rbx - pop %rbp - pop %rsi - pop %rdi - pop %r8 - pop %r9 - pop %r10 - pop %r11 - pop %r12 - pop %r13 - pop %r14 - pop %r15 - - /* Discard vector and error code. */ - add $16, %rsp - iretq - -/* - * Build the handle_exception wrappers which push the vector/error code on the - * stack and an array of pointers to those wrappers. - */ -.pushsection .rodata -.globl idt_handlers -idt_handlers: -.popsection - -.macro HANDLERS has_error from to - vector = \from - .rept \to - \from + 1 - .align 8 - - /* Fetch current address and append it to idt_handlers. */ -666 : -.pushsection .rodata - .quad 666b -.popsection - - .if ! \has_error - pushq $0 - .endif - pushq $vector - jmp handle_exception - vector = vector + 1 - .endr -.endm - -.global idt_handler_code -idt_handler_code: - HANDLERS has_error=0 from=0 to=7 - HANDLERS has_error=1 from=8 to=8 - HANDLERS has_error=0 from=9 to=9 - HANDLERS has_error=1 from=10 to=14 - HANDLERS has_error=0 from=15 to=16 - HANDLERS has_error=1 from=17 to=17 - HANDLERS has_error=0 from=18 to=255 - -.section .note.GNU-stack, "", %progbits diff --git a/tools/testing/selftests/kvm/lib/x86_64/hyperv.c b/tools/testing/selftests/kvm/lib/x86_64/hyperv.c deleted file mode 100644 index 15bc8cd583aa..000000000000 --- a/tools/testing/selftests/kvm/lib/x86_64/hyperv.c +++ /dev/null @@ -1,113 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Hyper-V specific functions. - * - * Copyright (C) 2021, Red Hat Inc. - */ -#include -#include "processor.h" -#include "hyperv.h" - -const struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void) -{ - static struct kvm_cpuid2 *cpuid; - int kvm_fd; - - if (cpuid) - return cpuid; - - cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES); - kvm_fd = open_kvm_dev_path_or_exit(); - - kvm_ioctl(kvm_fd, KVM_GET_SUPPORTED_HV_CPUID, cpuid); - - close(kvm_fd); - return cpuid; -} - -void vcpu_set_hv_cpuid(struct kvm_vcpu *vcpu) -{ - static struct kvm_cpuid2 *cpuid_full; - const struct kvm_cpuid2 *cpuid_sys, *cpuid_hv; - int i, nent = 0; - - if (!cpuid_full) { - cpuid_sys = kvm_get_supported_cpuid(); - cpuid_hv = kvm_get_supported_hv_cpuid(); - - cpuid_full = allocate_kvm_cpuid2(cpuid_sys->nent + cpuid_hv->nent); - if (!cpuid_full) { - perror("malloc"); - abort(); - } - - /* Need to skip KVM CPUID leaves 0x400000xx */ - for (i = 0; i < cpuid_sys->nent; i++) { - if (cpuid_sys->entries[i].function >= 0x40000000 && - cpuid_sys->entries[i].function < 0x40000100) - continue; - cpuid_full->entries[nent] = cpuid_sys->entries[i]; - nent++; - } - - memcpy(&cpuid_full->entries[nent], cpuid_hv->entries, - cpuid_hv->nent * sizeof(struct kvm_cpuid_entry2)); - cpuid_full->nent = nent + cpuid_hv->nent; - } - - vcpu_init_cpuid(vcpu, cpuid_full); -} - -const struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vcpu *vcpu) -{ - struct kvm_cpuid2 *cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES); - - vcpu_ioctl(vcpu, KVM_GET_SUPPORTED_HV_CPUID, cpuid); - - return cpuid; -} - -bool kvm_hv_cpu_has(struct kvm_x86_cpu_feature feature) -{ - if (!kvm_has_cap(KVM_CAP_SYS_HYPERV_CPUID)) - return false; - - return kvm_cpuid_has(kvm_get_supported_hv_cpuid(), feature); -} - -struct hyperv_test_pages *vcpu_alloc_hyperv_test_pages(struct kvm_vm *vm, - vm_vaddr_t *p_hv_pages_gva) -{ - vm_vaddr_t hv_pages_gva = vm_vaddr_alloc_page(vm); - struct hyperv_test_pages *hv = addr_gva2hva(vm, hv_pages_gva); - - /* Setup of a region of guest memory for the VP Assist page. */ - hv->vp_assist = (void *)vm_vaddr_alloc_page(vm); - hv->vp_assist_hva = addr_gva2hva(vm, (uintptr_t)hv->vp_assist); - hv->vp_assist_gpa = addr_gva2gpa(vm, (uintptr_t)hv->vp_assist); - - /* Setup of a region of guest memory for the partition assist page. */ - hv->partition_assist = (void *)vm_vaddr_alloc_page(vm); - hv->partition_assist_hva = addr_gva2hva(vm, (uintptr_t)hv->partition_assist); - hv->partition_assist_gpa = addr_gva2gpa(vm, (uintptr_t)hv->partition_assist); - - /* Setup of a region of guest memory for the enlightened VMCS. */ - hv->enlightened_vmcs = (void *)vm_vaddr_alloc_page(vm); - hv->enlightened_vmcs_hva = addr_gva2hva(vm, (uintptr_t)hv->enlightened_vmcs); - hv->enlightened_vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)hv->enlightened_vmcs); - - *p_hv_pages_gva = hv_pages_gva; - return hv; -} - -int enable_vp_assist(uint64_t vp_assist_pa, void *vp_assist) -{ - uint64_t val = (vp_assist_pa & HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK) | - HV_X64_MSR_VP_ASSIST_PAGE_ENABLE; - - wrmsr(HV_X64_MSR_VP_ASSIST_PAGE, val); - - current_vp_assist = vp_assist; - - return 0; -} diff --git a/tools/testing/selftests/kvm/lib/x86_64/memstress.c b/tools/testing/selftests/kvm/lib/x86_64/memstress.c deleted file mode 100644 index d61e623afc8c..000000000000 --- a/tools/testing/selftests/kvm/lib/x86_64/memstress.c +++ /dev/null @@ -1,112 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * x86_64-specific extensions to memstress.c. - * - * Copyright (C) 2022, Google, Inc. - */ -#include -#include -#include -#include - -#include "test_util.h" -#include "kvm_util.h" -#include "memstress.h" -#include "processor.h" -#include "vmx.h" - -void memstress_l2_guest_code(uint64_t vcpu_id) -{ - memstress_guest_code(vcpu_id); - vmcall(); -} - -extern char memstress_l2_guest_entry[]; -__asm__( -"memstress_l2_guest_entry:" -" mov (%rsp), %rdi;" -" call memstress_l2_guest_code;" -" ud2;" -); - -static void memstress_l1_guest_code(struct vmx_pages *vmx, uint64_t vcpu_id) -{ -#define L2_GUEST_STACK_SIZE 64 - unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; - unsigned long *rsp; - - GUEST_ASSERT(vmx->vmcs_gpa); - GUEST_ASSERT(prepare_for_vmx_operation(vmx)); - GUEST_ASSERT(load_vmcs(vmx)); - GUEST_ASSERT(ept_1g_pages_supported()); - - rsp = &l2_guest_stack[L2_GUEST_STACK_SIZE - 1]; - *rsp = vcpu_id; - prepare_vmcs(vmx, memstress_l2_guest_entry, rsp); - - GUEST_ASSERT(!vmlaunch()); - GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); - GUEST_DONE(); -} - -uint64_t memstress_nested_pages(int nr_vcpus) -{ - /* - * 513 page tables is enough to identity-map 256 TiB of L2 with 1G - * pages and 4-level paging, plus a few pages per-vCPU for data - * structures such as the VMCS. - */ - return 513 + 10 * nr_vcpus; -} - -void memstress_setup_ept(struct vmx_pages *vmx, struct kvm_vm *vm) -{ - uint64_t start, end; - - prepare_eptp(vmx, vm, 0); - - /* - * Identity map the first 4G and the test region with 1G pages so that - * KVM can shadow the EPT12 with the maximum huge page size supported - * by the backing source. - */ - nested_identity_map_1g(vmx, vm, 0, 0x100000000ULL); - - start = align_down(memstress_args.gpa, PG_SIZE_1G); - end = align_up(memstress_args.gpa + memstress_args.size, PG_SIZE_1G); - nested_identity_map_1g(vmx, vm, start, end - start); -} - -void memstress_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vcpus[]) -{ - struct vmx_pages *vmx, *vmx0 = NULL; - struct kvm_regs regs; - vm_vaddr_t vmx_gva; - int vcpu_id; - - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); - TEST_REQUIRE(kvm_cpu_has_ept()); - - for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { - vmx = vcpu_alloc_vmx(vm, &vmx_gva); - - if (vcpu_id == 0) { - memstress_setup_ept(vmx, vm); - vmx0 = vmx; - } else { - /* Share the same EPT table across all vCPUs. */ - vmx->eptp = vmx0->eptp; - vmx->eptp_hva = vmx0->eptp_hva; - vmx->eptp_gpa = vmx0->eptp_gpa; - } - - /* - * Override the vCPU to run memstress_l1_guest_code() which will - * bounce it into L2 before calling memstress_guest_code(). - */ - vcpu_regs_get(vcpus[vcpu_id], ®s); - regs.rip = (unsigned long) memstress_l1_guest_code; - vcpu_regs_set(vcpus[vcpu_id], ®s); - vcpu_args_set(vcpus[vcpu_id], 2, vmx_gva, vcpu_id); - } -} diff --git a/tools/testing/selftests/kvm/lib/x86_64/pmu.c b/tools/testing/selftests/kvm/lib/x86_64/pmu.c deleted file mode 100644 index f31f0427c17c..000000000000 --- a/tools/testing/selftests/kvm/lib/x86_64/pmu.c +++ /dev/null @@ -1,31 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2023, Tencent, Inc. - */ - -#include - -#include - -#include "kvm_util.h" -#include "pmu.h" - -const uint64_t intel_pmu_arch_events[] = { - INTEL_ARCH_CPU_CYCLES, - INTEL_ARCH_INSTRUCTIONS_RETIRED, - INTEL_ARCH_REFERENCE_CYCLES, - INTEL_ARCH_LLC_REFERENCES, - INTEL_ARCH_LLC_MISSES, - INTEL_ARCH_BRANCHES_RETIRED, - INTEL_ARCH_BRANCHES_MISPREDICTED, - INTEL_ARCH_TOPDOWN_SLOTS, -}; -kvm_static_assert(ARRAY_SIZE(intel_pmu_arch_events) == NR_INTEL_ARCH_EVENTS); - -const uint64_t amd_pmu_zen_events[] = { - AMD_ZEN_CORE_CYCLES, - AMD_ZEN_INSTRUCTIONS_RETIRED, - AMD_ZEN_BRANCHES_RETIRED, - AMD_ZEN_BRANCHES_MISPREDICTED, -}; -kvm_static_assert(ARRAY_SIZE(amd_pmu_zen_events) == NR_AMD_ZEN_EVENTS); diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c deleted file mode 100644 index 636b29ba8985..000000000000 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ /dev/null @@ -1,1295 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * tools/testing/selftests/kvm/lib/x86_64/processor.c - * - * Copyright (C) 2018, Google LLC. - */ - -#include "linux/bitmap.h" -#include "test_util.h" -#include "kvm_util.h" -#include "processor.h" -#include "sev.h" - -#ifndef NUM_INTERRUPTS -#define NUM_INTERRUPTS 256 -#endif - -#define KERNEL_CS 0x8 -#define KERNEL_DS 0x10 -#define KERNEL_TSS 0x18 - -vm_vaddr_t exception_handlers; -bool host_cpu_is_amd; -bool host_cpu_is_intel; -bool is_forced_emulation_enabled; -uint64_t guest_tsc_khz; - -static void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent) -{ - fprintf(stream, "%*srax: 0x%.16llx rbx: 0x%.16llx " - "rcx: 0x%.16llx rdx: 0x%.16llx\n", - indent, "", - regs->rax, regs->rbx, regs->rcx, regs->rdx); - fprintf(stream, "%*srsi: 0x%.16llx rdi: 0x%.16llx " - "rsp: 0x%.16llx rbp: 0x%.16llx\n", - indent, "", - regs->rsi, regs->rdi, regs->rsp, regs->rbp); - fprintf(stream, "%*sr8: 0x%.16llx r9: 0x%.16llx " - "r10: 0x%.16llx r11: 0x%.16llx\n", - indent, "", - regs->r8, regs->r9, regs->r10, regs->r11); - fprintf(stream, "%*sr12: 0x%.16llx r13: 0x%.16llx " - "r14: 0x%.16llx r15: 0x%.16llx\n", - indent, "", - regs->r12, regs->r13, regs->r14, regs->r15); - fprintf(stream, "%*srip: 0x%.16llx rfl: 0x%.16llx\n", - indent, "", - regs->rip, regs->rflags); -} - -static void segment_dump(FILE *stream, struct kvm_segment *segment, - uint8_t indent) -{ - fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.8x " - "selector: 0x%.4x type: 0x%.2x\n", - indent, "", segment->base, segment->limit, - segment->selector, segment->type); - fprintf(stream, "%*spresent: 0x%.2x dpl: 0x%.2x " - "db: 0x%.2x s: 0x%.2x l: 0x%.2x\n", - indent, "", segment->present, segment->dpl, - segment->db, segment->s, segment->l); - fprintf(stream, "%*sg: 0x%.2x avl: 0x%.2x " - "unusable: 0x%.2x padding: 0x%.2x\n", - indent, "", segment->g, segment->avl, - segment->unusable, segment->padding); -} - -static void dtable_dump(FILE *stream, struct kvm_dtable *dtable, - uint8_t indent) -{ - fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.4x " - "padding: 0x%.4x 0x%.4x 0x%.4x\n", - indent, "", dtable->base, dtable->limit, - dtable->padding[0], dtable->padding[1], dtable->padding[2]); -} - -static void sregs_dump(FILE *stream, struct kvm_sregs *sregs, uint8_t indent) -{ - unsigned int i; - - fprintf(stream, "%*scs:\n", indent, ""); - segment_dump(stream, &sregs->cs, indent + 2); - fprintf(stream, "%*sds:\n", indent, ""); - segment_dump(stream, &sregs->ds, indent + 2); - fprintf(stream, "%*ses:\n", indent, ""); - segment_dump(stream, &sregs->es, indent + 2); - fprintf(stream, "%*sfs:\n", indent, ""); - segment_dump(stream, &sregs->fs, indent + 2); - fprintf(stream, "%*sgs:\n", indent, ""); - segment_dump(stream, &sregs->gs, indent + 2); - fprintf(stream, "%*sss:\n", indent, ""); - segment_dump(stream, &sregs->ss, indent + 2); - fprintf(stream, "%*str:\n", indent, ""); - segment_dump(stream, &sregs->tr, indent + 2); - fprintf(stream, "%*sldt:\n", indent, ""); - segment_dump(stream, &sregs->ldt, indent + 2); - - fprintf(stream, "%*sgdt:\n", indent, ""); - dtable_dump(stream, &sregs->gdt, indent + 2); - fprintf(stream, "%*sidt:\n", indent, ""); - dtable_dump(stream, &sregs->idt, indent + 2); - - fprintf(stream, "%*scr0: 0x%.16llx cr2: 0x%.16llx " - "cr3: 0x%.16llx cr4: 0x%.16llx\n", - indent, "", - sregs->cr0, sregs->cr2, sregs->cr3, sregs->cr4); - fprintf(stream, "%*scr8: 0x%.16llx efer: 0x%.16llx " - "apic_base: 0x%.16llx\n", - indent, "", - sregs->cr8, sregs->efer, sregs->apic_base); - - fprintf(stream, "%*sinterrupt_bitmap:\n", indent, ""); - for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++) { - fprintf(stream, "%*s%.16llx\n", indent + 2, "", - sregs->interrupt_bitmap[i]); - } -} - -bool kvm_is_tdp_enabled(void) -{ - if (host_cpu_is_intel) - return get_kvm_intel_param_bool("ept"); - else - return get_kvm_amd_param_bool("npt"); -} - -void virt_arch_pgd_alloc(struct kvm_vm *vm) -{ - TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " - "unknown or unsupported guest mode, mode: 0x%x", vm->mode); - - /* If needed, create page map l4 table. */ - if (!vm->pgd_created) { - vm->pgd = vm_alloc_page_table(vm); - vm->pgd_created = true; - } -} - -static void *virt_get_pte(struct kvm_vm *vm, uint64_t *parent_pte, - uint64_t vaddr, int level) -{ - uint64_t pt_gpa = PTE_GET_PA(*parent_pte); - uint64_t *page_table = addr_gpa2hva(vm, pt_gpa); - int index = (vaddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu; - - TEST_ASSERT((*parent_pte & PTE_PRESENT_MASK) || parent_pte == &vm->pgd, - "Parent PTE (level %d) not PRESENT for gva: 0x%08lx", - level + 1, vaddr); - - return &page_table[index]; -} - -static uint64_t *virt_create_upper_pte(struct kvm_vm *vm, - uint64_t *parent_pte, - uint64_t vaddr, - uint64_t paddr, - int current_level, - int target_level) -{ - uint64_t *pte = virt_get_pte(vm, parent_pte, vaddr, current_level); - - paddr = vm_untag_gpa(vm, paddr); - - if (!(*pte & PTE_PRESENT_MASK)) { - *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK; - if (current_level == target_level) - *pte |= PTE_LARGE_MASK | (paddr & PHYSICAL_PAGE_MASK); - else - *pte |= vm_alloc_page_table(vm) & PHYSICAL_PAGE_MASK; - } else { - /* - * Entry already present. Assert that the caller doesn't want - * a hugepage at this level, and that there isn't a hugepage at - * this level. - */ - TEST_ASSERT(current_level != target_level, - "Cannot create hugepage at level: %u, vaddr: 0x%lx", - current_level, vaddr); - TEST_ASSERT(!(*pte & PTE_LARGE_MASK), - "Cannot create page table at level: %u, vaddr: 0x%lx", - current_level, vaddr); - } - return pte; -} - -void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level) -{ - const uint64_t pg_size = PG_LEVEL_SIZE(level); - uint64_t *pml4e, *pdpe, *pde; - uint64_t *pte; - - TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, - "Unknown or unsupported guest mode, mode: 0x%x", vm->mode); - - TEST_ASSERT((vaddr % pg_size) == 0, - "Virtual address not aligned,\n" - "vaddr: 0x%lx page size: 0x%lx", vaddr, pg_size); - TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, (vaddr >> vm->page_shift)), - "Invalid virtual address, vaddr: 0x%lx", vaddr); - TEST_ASSERT((paddr % pg_size) == 0, - "Physical address not aligned,\n" - " paddr: 0x%lx page size: 0x%lx", paddr, pg_size); - TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn, - "Physical address beyond maximum supported,\n" - " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", - paddr, vm->max_gfn, vm->page_size); - TEST_ASSERT(vm_untag_gpa(vm, paddr) == paddr, - "Unexpected bits in paddr: %lx", paddr); - - /* - * Allocate upper level page tables, if not already present. Return - * early if a hugepage was created. - */ - pml4e = virt_create_upper_pte(vm, &vm->pgd, vaddr, paddr, PG_LEVEL_512G, level); - if (*pml4e & PTE_LARGE_MASK) - return; - - pdpe = virt_create_upper_pte(vm, pml4e, vaddr, paddr, PG_LEVEL_1G, level); - if (*pdpe & PTE_LARGE_MASK) - return; - - pde = virt_create_upper_pte(vm, pdpe, vaddr, paddr, PG_LEVEL_2M, level); - if (*pde & PTE_LARGE_MASK) - return; - - /* Fill in page table entry. */ - pte = virt_get_pte(vm, pde, vaddr, PG_LEVEL_4K); - TEST_ASSERT(!(*pte & PTE_PRESENT_MASK), - "PTE already present for 4k page at vaddr: 0x%lx", vaddr); - *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK); - - /* - * Neither SEV nor TDX supports shared page tables, so only the final - * leaf PTE needs manually set the C/S-bit. - */ - if (vm_is_gpa_protected(vm, paddr)) - *pte |= vm->arch.c_bit; - else - *pte |= vm->arch.s_bit; -} - -void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) -{ - __virt_pg_map(vm, vaddr, paddr, PG_LEVEL_4K); -} - -void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, - uint64_t nr_bytes, int level) -{ - uint64_t pg_size = PG_LEVEL_SIZE(level); - uint64_t nr_pages = nr_bytes / pg_size; - int i; - - TEST_ASSERT(nr_bytes % pg_size == 0, - "Region size not aligned: nr_bytes: 0x%lx, page size: 0x%lx", - nr_bytes, pg_size); - - for (i = 0; i < nr_pages; i++) { - __virt_pg_map(vm, vaddr, paddr, level); - - vaddr += pg_size; - paddr += pg_size; - } -} - -static bool vm_is_target_pte(uint64_t *pte, int *level, int current_level) -{ - if (*pte & PTE_LARGE_MASK) { - TEST_ASSERT(*level == PG_LEVEL_NONE || - *level == current_level, - "Unexpected hugepage at level %d", current_level); - *level = current_level; - } - - return *level == current_level; -} - -uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr, - int *level) -{ - uint64_t *pml4e, *pdpe, *pde; - - TEST_ASSERT(!vm->arch.is_pt_protected, - "Walking page tables of protected guests is impossible"); - - TEST_ASSERT(*level >= PG_LEVEL_NONE && *level < PG_LEVEL_NUM, - "Invalid PG_LEVEL_* '%d'", *level); - - TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " - "unknown or unsupported guest mode, mode: 0x%x", vm->mode); - TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, - (vaddr >> vm->page_shift)), - "Invalid virtual address, vaddr: 0x%lx", - vaddr); - /* - * Based on the mode check above there are 48 bits in the vaddr, so - * shift 16 to sign extend the last bit (bit-47), - */ - TEST_ASSERT(vaddr == (((int64_t)vaddr << 16) >> 16), - "Canonical check failed. The virtual address is invalid."); - - pml4e = virt_get_pte(vm, &vm->pgd, vaddr, PG_LEVEL_512G); - if (vm_is_target_pte(pml4e, level, PG_LEVEL_512G)) - return pml4e; - - pdpe = virt_get_pte(vm, pml4e, vaddr, PG_LEVEL_1G); - if (vm_is_target_pte(pdpe, level, PG_LEVEL_1G)) - return pdpe; - - pde = virt_get_pte(vm, pdpe, vaddr, PG_LEVEL_2M); - if (vm_is_target_pte(pde, level, PG_LEVEL_2M)) - return pde; - - return virt_get_pte(vm, pde, vaddr, PG_LEVEL_4K); -} - -uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr) -{ - int level = PG_LEVEL_4K; - - return __vm_get_page_table_entry(vm, vaddr, &level); -} - -void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) -{ - uint64_t *pml4e, *pml4e_start; - uint64_t *pdpe, *pdpe_start; - uint64_t *pde, *pde_start; - uint64_t *pte, *pte_start; - - if (!vm->pgd_created) - return; - - fprintf(stream, "%*s " - " no\n", indent, ""); - fprintf(stream, "%*s index hvaddr gpaddr " - "addr w exec dirty\n", - indent, ""); - pml4e_start = (uint64_t *) addr_gpa2hva(vm, vm->pgd); - for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) { - pml4e = &pml4e_start[n1]; - if (!(*pml4e & PTE_PRESENT_MASK)) - continue; - fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10llx %u " - " %u\n", - indent, "", - pml4e - pml4e_start, pml4e, - addr_hva2gpa(vm, pml4e), PTE_GET_PFN(*pml4e), - !!(*pml4e & PTE_WRITABLE_MASK), !!(*pml4e & PTE_NX_MASK)); - - pdpe_start = addr_gpa2hva(vm, *pml4e & PHYSICAL_PAGE_MASK); - for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) { - pdpe = &pdpe_start[n2]; - if (!(*pdpe & PTE_PRESENT_MASK)) - continue; - fprintf(stream, "%*spdpe 0x%-3zx %p 0x%-12lx 0x%-10llx " - "%u %u\n", - indent, "", - pdpe - pdpe_start, pdpe, - addr_hva2gpa(vm, pdpe), - PTE_GET_PFN(*pdpe), !!(*pdpe & PTE_WRITABLE_MASK), - !!(*pdpe & PTE_NX_MASK)); - - pde_start = addr_gpa2hva(vm, *pdpe & PHYSICAL_PAGE_MASK); - for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) { - pde = &pde_start[n3]; - if (!(*pde & PTE_PRESENT_MASK)) - continue; - fprintf(stream, "%*spde 0x%-3zx %p " - "0x%-12lx 0x%-10llx %u %u\n", - indent, "", pde - pde_start, pde, - addr_hva2gpa(vm, pde), - PTE_GET_PFN(*pde), !!(*pde & PTE_WRITABLE_MASK), - !!(*pde & PTE_NX_MASK)); - - pte_start = addr_gpa2hva(vm, *pde & PHYSICAL_PAGE_MASK); - for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) { - pte = &pte_start[n4]; - if (!(*pte & PTE_PRESENT_MASK)) - continue; - fprintf(stream, "%*spte 0x%-3zx %p " - "0x%-12lx 0x%-10llx %u %u " - " %u 0x%-10lx\n", - indent, "", - pte - pte_start, pte, - addr_hva2gpa(vm, pte), - PTE_GET_PFN(*pte), - !!(*pte & PTE_WRITABLE_MASK), - !!(*pte & PTE_NX_MASK), - !!(*pte & PTE_DIRTY_MASK), - ((uint64_t) n1 << 27) - | ((uint64_t) n2 << 18) - | ((uint64_t) n3 << 9) - | ((uint64_t) n4)); - } - } - } - } -} - -/* - * Set Unusable Segment - * - * Input Args: None - * - * Output Args: - * segp - Pointer to segment register - * - * Return: None - * - * Sets the segment register pointed to by @segp to an unusable state. - */ -static void kvm_seg_set_unusable(struct kvm_segment *segp) -{ - memset(segp, 0, sizeof(*segp)); - segp->unusable = true; -} - -static void kvm_seg_fill_gdt_64bit(struct kvm_vm *vm, struct kvm_segment *segp) -{ - void *gdt = addr_gva2hva(vm, vm->arch.gdt); - struct desc64 *desc = gdt + (segp->selector >> 3) * 8; - - desc->limit0 = segp->limit & 0xFFFF; - desc->base0 = segp->base & 0xFFFF; - desc->base1 = segp->base >> 16; - desc->type = segp->type; - desc->s = segp->s; - desc->dpl = segp->dpl; - desc->p = segp->present; - desc->limit1 = segp->limit >> 16; - desc->avl = segp->avl; - desc->l = segp->l; - desc->db = segp->db; - desc->g = segp->g; - desc->base2 = segp->base >> 24; - if (!segp->s) - desc->base3 = segp->base >> 32; -} - -static void kvm_seg_set_kernel_code_64bit(struct kvm_segment *segp) -{ - memset(segp, 0, sizeof(*segp)); - segp->selector = KERNEL_CS; - segp->limit = 0xFFFFFFFFu; - segp->s = 0x1; /* kTypeCodeData */ - segp->type = 0x08 | 0x01 | 0x02; /* kFlagCode | kFlagCodeAccessed - * | kFlagCodeReadable - */ - segp->g = true; - segp->l = true; - segp->present = 1; -} - -static void kvm_seg_set_kernel_data_64bit(struct kvm_segment *segp) -{ - memset(segp, 0, sizeof(*segp)); - segp->selector = KERNEL_DS; - segp->limit = 0xFFFFFFFFu; - segp->s = 0x1; /* kTypeCodeData */ - segp->type = 0x00 | 0x01 | 0x02; /* kFlagData | kFlagDataAccessed - * | kFlagDataWritable - */ - segp->g = true; - segp->present = true; -} - -vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) -{ - int level = PG_LEVEL_NONE; - uint64_t *pte = __vm_get_page_table_entry(vm, gva, &level); - - TEST_ASSERT(*pte & PTE_PRESENT_MASK, - "Leaf PTE not PRESENT for gva: 0x%08lx", gva); - - /* - * No need for a hugepage mask on the PTE, x86-64 requires the "unused" - * address bits to be zero. - */ - return vm_untag_gpa(vm, PTE_GET_PA(*pte)) | (gva & ~HUGEPAGE_MASK(level)); -} - -static void kvm_seg_set_tss_64bit(vm_vaddr_t base, struct kvm_segment *segp) -{ - memset(segp, 0, sizeof(*segp)); - segp->base = base; - segp->limit = 0x67; - segp->selector = KERNEL_TSS; - segp->type = 0xb; - segp->present = 1; -} - -static void vcpu_init_sregs(struct kvm_vm *vm, struct kvm_vcpu *vcpu) -{ - struct kvm_sregs sregs; - - TEST_ASSERT_EQ(vm->mode, VM_MODE_PXXV48_4K); - - /* Set mode specific system register values. */ - vcpu_sregs_get(vcpu, &sregs); - - sregs.idt.base = vm->arch.idt; - sregs.idt.limit = NUM_INTERRUPTS * sizeof(struct idt_entry) - 1; - sregs.gdt.base = vm->arch.gdt; - sregs.gdt.limit = getpagesize() - 1; - - sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG; - sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR; - if (kvm_cpu_has(X86_FEATURE_XSAVE)) - sregs.cr4 |= X86_CR4_OSXSAVE; - sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX); - - kvm_seg_set_unusable(&sregs.ldt); - kvm_seg_set_kernel_code_64bit(&sregs.cs); - kvm_seg_set_kernel_data_64bit(&sregs.ds); - kvm_seg_set_kernel_data_64bit(&sregs.es); - kvm_seg_set_kernel_data_64bit(&sregs.gs); - kvm_seg_set_tss_64bit(vm->arch.tss, &sregs.tr); - - sregs.cr3 = vm->pgd; - vcpu_sregs_set(vcpu, &sregs); -} - -static void vcpu_init_xcrs(struct kvm_vm *vm, struct kvm_vcpu *vcpu) -{ - struct kvm_xcrs xcrs = { - .nr_xcrs = 1, - .xcrs[0].xcr = 0, - .xcrs[0].value = kvm_cpu_supported_xcr0(), - }; - - if (!kvm_cpu_has(X86_FEATURE_XSAVE)) - return; - - vcpu_xcrs_set(vcpu, &xcrs); -} - -static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr, - int dpl, unsigned short selector) -{ - struct idt_entry *base = - (struct idt_entry *)addr_gva2hva(vm, vm->arch.idt); - struct idt_entry *e = &base[vector]; - - memset(e, 0, sizeof(*e)); - e->offset0 = addr; - e->selector = selector; - e->ist = 0; - e->type = 14; - e->dpl = dpl; - e->p = 1; - e->offset1 = addr >> 16; - e->offset2 = addr >> 32; -} - -static bool kvm_fixup_exception(struct ex_regs *regs) -{ - if (regs->r9 != KVM_EXCEPTION_MAGIC || regs->rip != regs->r10) - return false; - - if (regs->vector == DE_VECTOR) - return false; - - regs->rip = regs->r11; - regs->r9 = regs->vector; - regs->r10 = regs->error_code; - return true; -} - -void route_exception(struct ex_regs *regs) -{ - typedef void(*handler)(struct ex_regs *); - handler *handlers = (handler *)exception_handlers; - - if (handlers && handlers[regs->vector]) { - handlers[regs->vector](regs); - return; - } - - if (kvm_fixup_exception(regs)) - return; - - GUEST_FAIL("Unhandled exception '0x%lx' at guest RIP '0x%lx'", - regs->vector, regs->rip); -} - -static void vm_init_descriptor_tables(struct kvm_vm *vm) -{ - extern void *idt_handlers; - struct kvm_segment seg; - int i; - - vm->arch.gdt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA); - vm->arch.idt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA); - vm->handlers = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA); - vm->arch.tss = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA); - - /* Handlers have the same address in both address spaces.*/ - for (i = 0; i < NUM_INTERRUPTS; i++) - set_idt_entry(vm, i, (unsigned long)(&idt_handlers)[i], 0, KERNEL_CS); - - *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers; - - kvm_seg_set_kernel_code_64bit(&seg); - kvm_seg_fill_gdt_64bit(vm, &seg); - - kvm_seg_set_kernel_data_64bit(&seg); - kvm_seg_fill_gdt_64bit(vm, &seg); - - kvm_seg_set_tss_64bit(vm->arch.tss, &seg); - kvm_seg_fill_gdt_64bit(vm, &seg); -} - -void vm_install_exception_handler(struct kvm_vm *vm, int vector, - void (*handler)(struct ex_regs *)) -{ - vm_vaddr_t *handlers = (vm_vaddr_t *)addr_gva2hva(vm, vm->handlers); - - handlers[vector] = (vm_vaddr_t)handler; -} - -void assert_on_unhandled_exception(struct kvm_vcpu *vcpu) -{ - struct ucall uc; - - if (get_ucall(vcpu, &uc) == UCALL_ABORT) - REPORT_GUEST_ASSERT(uc); -} - -void kvm_arch_vm_post_create(struct kvm_vm *vm) -{ - int r; - - TEST_ASSERT(kvm_has_cap(KVM_CAP_GET_TSC_KHZ), - "Require KVM_GET_TSC_KHZ to provide udelay() to guest."); - - vm_create_irqchip(vm); - vm_init_descriptor_tables(vm); - - sync_global_to_guest(vm, host_cpu_is_intel); - sync_global_to_guest(vm, host_cpu_is_amd); - sync_global_to_guest(vm, is_forced_emulation_enabled); - - if (vm->type == KVM_X86_SEV_VM || vm->type == KVM_X86_SEV_ES_VM) { - struct kvm_sev_init init = { 0 }; - - vm_sev_ioctl(vm, KVM_SEV_INIT2, &init); - } - - r = __vm_ioctl(vm, KVM_GET_TSC_KHZ, NULL); - TEST_ASSERT(r > 0, "KVM_GET_TSC_KHZ did not provide a valid TSC frequency."); - guest_tsc_khz = r; - sync_global_to_guest(vm, guest_tsc_khz); -} - -void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code) -{ - struct kvm_regs regs; - - vcpu_regs_get(vcpu, ®s); - regs.rip = (unsigned long) guest_code; - vcpu_regs_set(vcpu, ®s); -} - -struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) -{ - struct kvm_mp_state mp_state; - struct kvm_regs regs; - vm_vaddr_t stack_vaddr; - struct kvm_vcpu *vcpu; - - stack_vaddr = __vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(), - DEFAULT_GUEST_STACK_VADDR_MIN, - MEM_REGION_DATA); - - stack_vaddr += DEFAULT_STACK_PGS * getpagesize(); - - /* - * Align stack to match calling sequence requirements in section "The - * Stack Frame" of the System V ABI AMD64 Architecture Processor - * Supplement, which requires the value (%rsp + 8) to be a multiple of - * 16 when control is transferred to the function entry point. - * - * If this code is ever used to launch a vCPU with 32-bit entry point it - * may need to subtract 4 bytes instead of 8 bytes. - */ - TEST_ASSERT(IS_ALIGNED(stack_vaddr, PAGE_SIZE), - "__vm_vaddr_alloc() did not provide a page-aligned address"); - stack_vaddr -= 8; - - vcpu = __vm_vcpu_add(vm, vcpu_id); - vcpu_init_cpuid(vcpu, kvm_get_supported_cpuid()); - vcpu_init_sregs(vm, vcpu); - vcpu_init_xcrs(vm, vcpu); - - /* Setup guest general purpose registers */ - vcpu_regs_get(vcpu, ®s); - regs.rflags = regs.rflags | 0x2; - regs.rsp = stack_vaddr; - vcpu_regs_set(vcpu, ®s); - - /* Setup the MP state */ - mp_state.mp_state = 0; - vcpu_mp_state_set(vcpu, &mp_state); - - /* - * Refresh CPUID after setting SREGS and XCR0, so that KVM's "runtime" - * updates to guest CPUID, e.g. for OSXSAVE and XSAVE state size, are - * reflected into selftests' vCPU CPUID cache, i.e. so that the cache - * is consistent with vCPU state. - */ - vcpu_get_cpuid(vcpu); - return vcpu; -} - -struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm, uint32_t vcpu_id) -{ - struct kvm_vcpu *vcpu = __vm_vcpu_add(vm, vcpu_id); - - vcpu_init_cpuid(vcpu, kvm_get_supported_cpuid()); - - return vcpu; -} - -void vcpu_arch_free(struct kvm_vcpu *vcpu) -{ - if (vcpu->cpuid) - free(vcpu->cpuid); -} - -/* Do not use kvm_supported_cpuid directly except for validity checks. */ -static void *kvm_supported_cpuid; - -const struct kvm_cpuid2 *kvm_get_supported_cpuid(void) -{ - int kvm_fd; - - if (kvm_supported_cpuid) - return kvm_supported_cpuid; - - kvm_supported_cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES); - kvm_fd = open_kvm_dev_path_or_exit(); - - kvm_ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, - (struct kvm_cpuid2 *)kvm_supported_cpuid); - - close(kvm_fd); - return kvm_supported_cpuid; -} - -static uint32_t __kvm_cpu_has(const struct kvm_cpuid2 *cpuid, - uint32_t function, uint32_t index, - uint8_t reg, uint8_t lo, uint8_t hi) -{ - const struct kvm_cpuid_entry2 *entry; - int i; - - for (i = 0; i < cpuid->nent; i++) { - entry = &cpuid->entries[i]; - - /* - * The output registers in kvm_cpuid_entry2 are in alphabetical - * order, but kvm_x86_cpu_feature matches that mess, so yay - * pointer shenanigans! - */ - if (entry->function == function && entry->index == index) - return ((&entry->eax)[reg] & GENMASK(hi, lo)) >> lo; - } - - return 0; -} - -bool kvm_cpuid_has(const struct kvm_cpuid2 *cpuid, - struct kvm_x86_cpu_feature feature) -{ - return __kvm_cpu_has(cpuid, feature.function, feature.index, - feature.reg, feature.bit, feature.bit); -} - -uint32_t kvm_cpuid_property(const struct kvm_cpuid2 *cpuid, - struct kvm_x86_cpu_property property) -{ - return __kvm_cpu_has(cpuid, property.function, property.index, - property.reg, property.lo_bit, property.hi_bit); -} - -uint64_t kvm_get_feature_msr(uint64_t msr_index) -{ - struct { - struct kvm_msrs header; - struct kvm_msr_entry entry; - } buffer = {}; - int r, kvm_fd; - - buffer.header.nmsrs = 1; - buffer.entry.index = msr_index; - kvm_fd = open_kvm_dev_path_or_exit(); - - r = __kvm_ioctl(kvm_fd, KVM_GET_MSRS, &buffer.header); - TEST_ASSERT(r == 1, KVM_IOCTL_ERROR(KVM_GET_MSRS, r)); - - close(kvm_fd); - return buffer.entry.data; -} - -void __vm_xsave_require_permission(uint64_t xfeature, const char *name) -{ - int kvm_fd; - u64 bitmask; - long rc; - struct kvm_device_attr attr = { - .group = 0, - .attr = KVM_X86_XCOMP_GUEST_SUPP, - .addr = (unsigned long) &bitmask, - }; - - TEST_ASSERT(!kvm_supported_cpuid, - "kvm_get_supported_cpuid() cannot be used before ARCH_REQ_XCOMP_GUEST_PERM"); - - TEST_ASSERT(is_power_of_2(xfeature), - "Dynamic XFeatures must be enabled one at a time"); - - kvm_fd = open_kvm_dev_path_or_exit(); - rc = __kvm_ioctl(kvm_fd, KVM_GET_DEVICE_ATTR, &attr); - close(kvm_fd); - - if (rc == -1 && (errno == ENXIO || errno == EINVAL)) - __TEST_REQUIRE(0, "KVM_X86_XCOMP_GUEST_SUPP not supported"); - - TEST_ASSERT(rc == 0, "KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) error: %ld", rc); - - __TEST_REQUIRE(bitmask & xfeature, - "Required XSAVE feature '%s' not supported", name); - - TEST_REQUIRE(!syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, ilog2(xfeature))); - - rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_GUEST_PERM, &bitmask); - TEST_ASSERT(rc == 0, "prctl(ARCH_GET_XCOMP_GUEST_PERM) error: %ld", rc); - TEST_ASSERT(bitmask & xfeature, - "'%s' (0x%lx) not permitted after prctl(ARCH_REQ_XCOMP_GUEST_PERM) permitted=0x%lx", - name, xfeature, bitmask); -} - -void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid) -{ - TEST_ASSERT(cpuid != vcpu->cpuid, "@cpuid can't be the vCPU's CPUID"); - - /* Allow overriding the default CPUID. */ - if (vcpu->cpuid && vcpu->cpuid->nent < cpuid->nent) { - free(vcpu->cpuid); - vcpu->cpuid = NULL; - } - - if (!vcpu->cpuid) - vcpu->cpuid = allocate_kvm_cpuid2(cpuid->nent); - - memcpy(vcpu->cpuid, cpuid, kvm_cpuid2_size(cpuid->nent)); - vcpu_set_cpuid(vcpu); -} - -void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu, - struct kvm_x86_cpu_property property, - uint32_t value) -{ - struct kvm_cpuid_entry2 *entry; - - entry = __vcpu_get_cpuid_entry(vcpu, property.function, property.index); - - (&entry->eax)[property.reg] &= ~GENMASK(property.hi_bit, property.lo_bit); - (&entry->eax)[property.reg] |= value << property.lo_bit; - - vcpu_set_cpuid(vcpu); - - /* Sanity check that @value doesn't exceed the bounds in any way. */ - TEST_ASSERT_EQ(kvm_cpuid_property(vcpu->cpuid, property), value); -} - -void vcpu_clear_cpuid_entry(struct kvm_vcpu *vcpu, uint32_t function) -{ - struct kvm_cpuid_entry2 *entry = vcpu_get_cpuid_entry(vcpu, function); - - entry->eax = 0; - entry->ebx = 0; - entry->ecx = 0; - entry->edx = 0; - vcpu_set_cpuid(vcpu); -} - -void vcpu_set_or_clear_cpuid_feature(struct kvm_vcpu *vcpu, - struct kvm_x86_cpu_feature feature, - bool set) -{ - struct kvm_cpuid_entry2 *entry; - u32 *reg; - - entry = __vcpu_get_cpuid_entry(vcpu, feature.function, feature.index); - reg = (&entry->eax) + feature.reg; - - if (set) - *reg |= BIT(feature.bit); - else - *reg &= ~BIT(feature.bit); - - vcpu_set_cpuid(vcpu); -} - -uint64_t vcpu_get_msr(struct kvm_vcpu *vcpu, uint64_t msr_index) -{ - struct { - struct kvm_msrs header; - struct kvm_msr_entry entry; - } buffer = {}; - - buffer.header.nmsrs = 1; - buffer.entry.index = msr_index; - - vcpu_msrs_get(vcpu, &buffer.header); - - return buffer.entry.data; -} - -int _vcpu_set_msr(struct kvm_vcpu *vcpu, uint64_t msr_index, uint64_t msr_value) -{ - struct { - struct kvm_msrs header; - struct kvm_msr_entry entry; - } buffer = {}; - - memset(&buffer, 0, sizeof(buffer)); - buffer.header.nmsrs = 1; - buffer.entry.index = msr_index; - buffer.entry.data = msr_value; - - return __vcpu_ioctl(vcpu, KVM_SET_MSRS, &buffer.header); -} - -void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...) -{ - va_list ap; - struct kvm_regs regs; - - TEST_ASSERT(num >= 1 && num <= 6, "Unsupported number of args,\n" - " num: %u", - num); - - va_start(ap, num); - vcpu_regs_get(vcpu, ®s); - - if (num >= 1) - regs.rdi = va_arg(ap, uint64_t); - - if (num >= 2) - regs.rsi = va_arg(ap, uint64_t); - - if (num >= 3) - regs.rdx = va_arg(ap, uint64_t); - - if (num >= 4) - regs.rcx = va_arg(ap, uint64_t); - - if (num >= 5) - regs.r8 = va_arg(ap, uint64_t); - - if (num >= 6) - regs.r9 = va_arg(ap, uint64_t); - - vcpu_regs_set(vcpu, ®s); - va_end(ap); -} - -void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent) -{ - struct kvm_regs regs; - struct kvm_sregs sregs; - - fprintf(stream, "%*svCPU ID: %u\n", indent, "", vcpu->id); - - fprintf(stream, "%*sregs:\n", indent + 2, ""); - vcpu_regs_get(vcpu, ®s); - regs_dump(stream, ®s, indent + 4); - - fprintf(stream, "%*ssregs:\n", indent + 2, ""); - vcpu_sregs_get(vcpu, &sregs); - sregs_dump(stream, &sregs, indent + 4); -} - -static struct kvm_msr_list *__kvm_get_msr_index_list(bool feature_msrs) -{ - struct kvm_msr_list *list; - struct kvm_msr_list nmsrs; - int kvm_fd, r; - - kvm_fd = open_kvm_dev_path_or_exit(); - - nmsrs.nmsrs = 0; - if (!feature_msrs) - r = __kvm_ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, &nmsrs); - else - r = __kvm_ioctl(kvm_fd, KVM_GET_MSR_FEATURE_INDEX_LIST, &nmsrs); - - TEST_ASSERT(r == -1 && errno == E2BIG, - "Expected -E2BIG, got rc: %i errno: %i (%s)", - r, errno, strerror(errno)); - - list = malloc(sizeof(*list) + nmsrs.nmsrs * sizeof(list->indices[0])); - TEST_ASSERT(list, "-ENOMEM when allocating MSR index list"); - list->nmsrs = nmsrs.nmsrs; - - if (!feature_msrs) - kvm_ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, list); - else - kvm_ioctl(kvm_fd, KVM_GET_MSR_FEATURE_INDEX_LIST, list); - close(kvm_fd); - - TEST_ASSERT(list->nmsrs == nmsrs.nmsrs, - "Number of MSRs in list changed, was %d, now %d", - nmsrs.nmsrs, list->nmsrs); - return list; -} - -const struct kvm_msr_list *kvm_get_msr_index_list(void) -{ - static const struct kvm_msr_list *list; - - if (!list) - list = __kvm_get_msr_index_list(false); - return list; -} - - -const struct kvm_msr_list *kvm_get_feature_msr_index_list(void) -{ - static const struct kvm_msr_list *list; - - if (!list) - list = __kvm_get_msr_index_list(true); - return list; -} - -bool kvm_msr_is_in_save_restore_list(uint32_t msr_index) -{ - const struct kvm_msr_list *list = kvm_get_msr_index_list(); - int i; - - for (i = 0; i < list->nmsrs; ++i) { - if (list->indices[i] == msr_index) - return true; - } - - return false; -} - -static void vcpu_save_xsave_state(struct kvm_vcpu *vcpu, - struct kvm_x86_state *state) -{ - int size = vm_check_cap(vcpu->vm, KVM_CAP_XSAVE2); - - if (size) { - state->xsave = malloc(size); - vcpu_xsave2_get(vcpu, state->xsave); - } else { - state->xsave = malloc(sizeof(struct kvm_xsave)); - vcpu_xsave_get(vcpu, state->xsave); - } -} - -struct kvm_x86_state *vcpu_save_state(struct kvm_vcpu *vcpu) -{ - const struct kvm_msr_list *msr_list = kvm_get_msr_index_list(); - struct kvm_x86_state *state; - int i; - - static int nested_size = -1; - - if (nested_size == -1) { - nested_size = kvm_check_cap(KVM_CAP_NESTED_STATE); - TEST_ASSERT(nested_size <= sizeof(state->nested_), - "Nested state size too big, %i > %zi", - nested_size, sizeof(state->nested_)); - } - - /* - * When KVM exits to userspace with KVM_EXIT_IO, KVM guarantees - * guest state is consistent only after userspace re-enters the - * kernel with KVM_RUN. Complete IO prior to migrating state - * to a new VM. - */ - vcpu_run_complete_io(vcpu); - - state = malloc(sizeof(*state) + msr_list->nmsrs * sizeof(state->msrs.entries[0])); - TEST_ASSERT(state, "-ENOMEM when allocating kvm state"); - - vcpu_events_get(vcpu, &state->events); - vcpu_mp_state_get(vcpu, &state->mp_state); - vcpu_regs_get(vcpu, &state->regs); - vcpu_save_xsave_state(vcpu, state); - - if (kvm_has_cap(KVM_CAP_XCRS)) - vcpu_xcrs_get(vcpu, &state->xcrs); - - vcpu_sregs_get(vcpu, &state->sregs); - - if (nested_size) { - state->nested.size = sizeof(state->nested_); - - vcpu_nested_state_get(vcpu, &state->nested); - TEST_ASSERT(state->nested.size <= nested_size, - "Nested state size too big, %i (KVM_CHECK_CAP gave %i)", - state->nested.size, nested_size); - } else { - state->nested.size = 0; - } - - state->msrs.nmsrs = msr_list->nmsrs; - for (i = 0; i < msr_list->nmsrs; i++) - state->msrs.entries[i].index = msr_list->indices[i]; - vcpu_msrs_get(vcpu, &state->msrs); - - vcpu_debugregs_get(vcpu, &state->debugregs); - - return state; -} - -void vcpu_load_state(struct kvm_vcpu *vcpu, struct kvm_x86_state *state) -{ - vcpu_sregs_set(vcpu, &state->sregs); - vcpu_msrs_set(vcpu, &state->msrs); - - if (kvm_has_cap(KVM_CAP_XCRS)) - vcpu_xcrs_set(vcpu, &state->xcrs); - - vcpu_xsave_set(vcpu, state->xsave); - vcpu_events_set(vcpu, &state->events); - vcpu_mp_state_set(vcpu, &state->mp_state); - vcpu_debugregs_set(vcpu, &state->debugregs); - vcpu_regs_set(vcpu, &state->regs); - - if (state->nested.size) - vcpu_nested_state_set(vcpu, &state->nested); -} - -void kvm_x86_state_cleanup(struct kvm_x86_state *state) -{ - free(state->xsave); - free(state); -} - -void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits) -{ - if (!kvm_cpu_has_p(X86_PROPERTY_MAX_PHY_ADDR)) { - *pa_bits = kvm_cpu_has(X86_FEATURE_PAE) ? 36 : 32; - *va_bits = 32; - } else { - *pa_bits = kvm_cpu_property(X86_PROPERTY_MAX_PHY_ADDR); - *va_bits = kvm_cpu_property(X86_PROPERTY_MAX_VIRT_ADDR); - } -} - -void kvm_init_vm_address_properties(struct kvm_vm *vm) -{ - if (vm->type == KVM_X86_SEV_VM || vm->type == KVM_X86_SEV_ES_VM) { - vm->arch.sev_fd = open_sev_dev_path_or_exit(); - vm->arch.c_bit = BIT_ULL(this_cpu_property(X86_PROPERTY_SEV_C_BIT)); - vm->gpa_tag_mask = vm->arch.c_bit; - } else { - vm->arch.sev_fd = -1; - } -} - -const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid, - uint32_t function, uint32_t index) -{ - int i; - - for (i = 0; i < cpuid->nent; i++) { - if (cpuid->entries[i].function == function && - cpuid->entries[i].index == index) - return &cpuid->entries[i]; - } - - TEST_FAIL("CPUID function 0x%x index 0x%x not found ", function, index); - - return NULL; -} - -#define X86_HYPERCALL(inputs...) \ -({ \ - uint64_t r; \ - \ - asm volatile("test %[use_vmmcall], %[use_vmmcall]\n\t" \ - "jnz 1f\n\t" \ - "vmcall\n\t" \ - "jmp 2f\n\t" \ - "1: vmmcall\n\t" \ - "2:" \ - : "=a"(r) \ - : [use_vmmcall] "r" (host_cpu_is_amd), inputs); \ - \ - r; \ -}) - -uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2, - uint64_t a3) -{ - return X86_HYPERCALL("a"(nr), "b"(a0), "c"(a1), "d"(a2), "S"(a3)); -} - -uint64_t __xen_hypercall(uint64_t nr, uint64_t a0, void *a1) -{ - return X86_HYPERCALL("a"(nr), "D"(a0), "S"(a1)); -} - -void xen_hypercall(uint64_t nr, uint64_t a0, void *a1) -{ - GUEST_ASSERT(!__xen_hypercall(nr, a0, a1)); -} - -unsigned long vm_compute_max_gfn(struct kvm_vm *vm) -{ - const unsigned long num_ht_pages = 12 << (30 - vm->page_shift); /* 12 GiB */ - unsigned long ht_gfn, max_gfn, max_pfn; - uint8_t maxphyaddr, guest_maxphyaddr; - - /* - * Use "guest MAXPHYADDR" from KVM if it's available. Guest MAXPHYADDR - * enumerates the max _mappable_ GPA, which can be less than the raw - * MAXPHYADDR, e.g. if MAXPHYADDR=52, KVM is using TDP, and the CPU - * doesn't support 5-level TDP. - */ - guest_maxphyaddr = kvm_cpu_property(X86_PROPERTY_GUEST_MAX_PHY_ADDR); - guest_maxphyaddr = guest_maxphyaddr ?: vm->pa_bits; - TEST_ASSERT(guest_maxphyaddr <= vm->pa_bits, - "Guest MAXPHYADDR should never be greater than raw MAXPHYADDR"); - - max_gfn = (1ULL << (guest_maxphyaddr - vm->page_shift)) - 1; - - /* Avoid reserved HyperTransport region on AMD processors. */ - if (!host_cpu_is_amd) - return max_gfn; - - /* On parts with <40 physical address bits, the area is fully hidden */ - if (vm->pa_bits < 40) - return max_gfn; - - /* Before family 17h, the HyperTransport area is just below 1T. */ - ht_gfn = (1 << 28) - num_ht_pages; - if (this_cpu_family() < 0x17) - goto done; - - /* - * Otherwise it's at the top of the physical address space, possibly - * reduced due to SME by bits 11:6 of CPUID[0x8000001f].EBX. Use - * the old conservative value if MAXPHYADDR is not enumerated. - */ - if (!this_cpu_has_p(X86_PROPERTY_MAX_PHY_ADDR)) - goto done; - - maxphyaddr = this_cpu_property(X86_PROPERTY_MAX_PHY_ADDR); - max_pfn = (1ULL << (maxphyaddr - vm->page_shift)) - 1; - - if (this_cpu_has_p(X86_PROPERTY_PHYS_ADDR_REDUCTION)) - max_pfn >>= this_cpu_property(X86_PROPERTY_PHYS_ADDR_REDUCTION); - - ht_gfn = max_pfn - num_ht_pages; -done: - return min(max_gfn, ht_gfn - 1); -} - -/* Returns true if kvm_intel was loaded with unrestricted_guest=1. */ -bool vm_is_unrestricted_guest(struct kvm_vm *vm) -{ - /* Ensure that a KVM vendor-specific module is loaded. */ - if (vm == NULL) - close(open_kvm_dev_path_or_exit()); - - return get_kvm_intel_param_bool("unrestricted_guest"); -} - -void kvm_selftest_arch_init(void) -{ - host_cpu_is_intel = this_cpu_is_intel(); - host_cpu_is_amd = this_cpu_is_amd(); - is_forced_emulation_enabled = kvm_is_forced_emulation_enabled(); -} - -bool sys_clocksource_is_based_on_tsc(void) -{ - char *clk_name = sys_get_cur_clocksource(); - bool ret = !strcmp(clk_name, "tsc\n") || - !strcmp(clk_name, "hyperv_clocksource_tsc_page\n"); - - free(clk_name); - - return ret; -} diff --git a/tools/testing/selftests/kvm/lib/x86_64/sev.c b/tools/testing/selftests/kvm/lib/x86_64/sev.c deleted file mode 100644 index e9535ee20b7f..000000000000 --- a/tools/testing/selftests/kvm/lib/x86_64/sev.c +++ /dev/null @@ -1,141 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -#include -#include - -#include "sev.h" - -/* - * sparsebit_next_clear() can return 0 if [x, 2**64-1] are all set, and the - * -1 would then cause an underflow back to 2**64 - 1. This is expected and - * correct. - * - * If the last range in the sparsebit is [x, y] and we try to iterate, - * sparsebit_next_set() will return 0, and sparsebit_next_clear() will try - * and find the first range, but that's correct because the condition - * expression would cause us to quit the loop. - */ -static void encrypt_region(struct kvm_vm *vm, struct userspace_mem_region *region) -{ - const struct sparsebit *protected_phy_pages = region->protected_phy_pages; - const vm_paddr_t gpa_base = region->region.guest_phys_addr; - const sparsebit_idx_t lowest_page_in_region = gpa_base >> vm->page_shift; - sparsebit_idx_t i, j; - - if (!sparsebit_any_set(protected_phy_pages)) - return; - - sev_register_encrypted_memory(vm, region); - - sparsebit_for_each_set_range(protected_phy_pages, i, j) { - const uint64_t size = (j - i + 1) * vm->page_size; - const uint64_t offset = (i - lowest_page_in_region) * vm->page_size; - - sev_launch_update_data(vm, gpa_base + offset, size); - } -} - -void sev_vm_init(struct kvm_vm *vm) -{ - if (vm->type == KVM_X86_DEFAULT_VM) { - assert(vm->arch.sev_fd == -1); - vm->arch.sev_fd = open_sev_dev_path_or_exit(); - vm_sev_ioctl(vm, KVM_SEV_INIT, NULL); - } else { - struct kvm_sev_init init = { 0 }; - assert(vm->type == KVM_X86_SEV_VM); - vm_sev_ioctl(vm, KVM_SEV_INIT2, &init); - } -} - -void sev_es_vm_init(struct kvm_vm *vm) -{ - if (vm->type == KVM_X86_DEFAULT_VM) { - assert(vm->arch.sev_fd == -1); - vm->arch.sev_fd = open_sev_dev_path_or_exit(); - vm_sev_ioctl(vm, KVM_SEV_ES_INIT, NULL); - } else { - struct kvm_sev_init init = { 0 }; - assert(vm->type == KVM_X86_SEV_ES_VM); - vm_sev_ioctl(vm, KVM_SEV_INIT2, &init); - } -} - -void sev_vm_launch(struct kvm_vm *vm, uint32_t policy) -{ - struct kvm_sev_launch_start launch_start = { - .policy = policy, - }; - struct userspace_mem_region *region; - struct kvm_sev_guest_status status; - int ctr; - - vm_sev_ioctl(vm, KVM_SEV_LAUNCH_START, &launch_start); - vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &status); - - TEST_ASSERT_EQ(status.policy, policy); - TEST_ASSERT_EQ(status.state, SEV_GUEST_STATE_LAUNCH_UPDATE); - - hash_for_each(vm->regions.slot_hash, ctr, region, slot_node) - encrypt_region(vm, region); - - if (policy & SEV_POLICY_ES) - vm_sev_ioctl(vm, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL); - - vm->arch.is_pt_protected = true; -} - -void sev_vm_launch_measure(struct kvm_vm *vm, uint8_t *measurement) -{ - struct kvm_sev_launch_measure launch_measure; - struct kvm_sev_guest_status guest_status; - - launch_measure.len = 256; - launch_measure.uaddr = (__u64)measurement; - vm_sev_ioctl(vm, KVM_SEV_LAUNCH_MEASURE, &launch_measure); - - vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &guest_status); - TEST_ASSERT_EQ(guest_status.state, SEV_GUEST_STATE_LAUNCH_SECRET); -} - -void sev_vm_launch_finish(struct kvm_vm *vm) -{ - struct kvm_sev_guest_status status; - - vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &status); - TEST_ASSERT(status.state == SEV_GUEST_STATE_LAUNCH_UPDATE || - status.state == SEV_GUEST_STATE_LAUNCH_SECRET, - "Unexpected guest state: %d", status.state); - - vm_sev_ioctl(vm, KVM_SEV_LAUNCH_FINISH, NULL); - - vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &status); - TEST_ASSERT_EQ(status.state, SEV_GUEST_STATE_RUNNING); -} - -struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t type, void *guest_code, - struct kvm_vcpu **cpu) -{ - struct vm_shape shape = { - .mode = VM_MODE_DEFAULT, - .type = type, - }; - struct kvm_vm *vm; - struct kvm_vcpu *cpus[1]; - - vm = __vm_create_with_vcpus(shape, 1, 0, guest_code, cpus); - *cpu = cpus[0]; - - return vm; -} - -void vm_sev_launch(struct kvm_vm *vm, uint32_t policy, uint8_t *measurement) -{ - sev_vm_launch(vm, policy); - - if (!measurement) - measurement = alloca(256); - - sev_vm_launch_measure(vm, measurement); - - sev_vm_launch_finish(vm); -} diff --git a/tools/testing/selftests/kvm/lib/x86_64/svm.c b/tools/testing/selftests/kvm/lib/x86_64/svm.c deleted file mode 100644 index 5495a92dfd5a..000000000000 --- a/tools/testing/selftests/kvm/lib/x86_64/svm.c +++ /dev/null @@ -1,164 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * tools/testing/selftests/kvm/lib/x86_64/svm.c - * Helpers used for nested SVM testing - * Largely inspired from KVM unit test svm.c - * - * Copyright (C) 2020, Red Hat, Inc. - */ - -#include "test_util.h" -#include "kvm_util.h" -#include "processor.h" -#include "svm_util.h" - -#define SEV_DEV_PATH "/dev/sev" - -struct gpr64_regs guest_regs; -u64 rflags; - -/* Allocate memory regions for nested SVM tests. - * - * Input Args: - * vm - The VM to allocate guest-virtual addresses in. - * - * Output Args: - * p_svm_gva - The guest virtual address for the struct svm_test_data. - * - * Return: - * Pointer to structure with the addresses of the SVM areas. - */ -struct svm_test_data * -vcpu_alloc_svm(struct kvm_vm *vm, vm_vaddr_t *p_svm_gva) -{ - vm_vaddr_t svm_gva = vm_vaddr_alloc_page(vm); - struct svm_test_data *svm = addr_gva2hva(vm, svm_gva); - - svm->vmcb = (void *)vm_vaddr_alloc_page(vm); - svm->vmcb_hva = addr_gva2hva(vm, (uintptr_t)svm->vmcb); - svm->vmcb_gpa = addr_gva2gpa(vm, (uintptr_t)svm->vmcb); - - svm->save_area = (void *)vm_vaddr_alloc_page(vm); - svm->save_area_hva = addr_gva2hva(vm, (uintptr_t)svm->save_area); - svm->save_area_gpa = addr_gva2gpa(vm, (uintptr_t)svm->save_area); - - svm->msr = (void *)vm_vaddr_alloc_page(vm); - svm->msr_hva = addr_gva2hva(vm, (uintptr_t)svm->msr); - svm->msr_gpa = addr_gva2gpa(vm, (uintptr_t)svm->msr); - memset(svm->msr_hva, 0, getpagesize()); - - *p_svm_gva = svm_gva; - return svm; -} - -static void vmcb_set_seg(struct vmcb_seg *seg, u16 selector, - u64 base, u32 limit, u32 attr) -{ - seg->selector = selector; - seg->attrib = attr; - seg->limit = limit; - seg->base = base; -} - -void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_rsp) -{ - struct vmcb *vmcb = svm->vmcb; - uint64_t vmcb_gpa = svm->vmcb_gpa; - struct vmcb_save_area *save = &vmcb->save; - struct vmcb_control_area *ctrl = &vmcb->control; - u32 data_seg_attr = 3 | SVM_SELECTOR_S_MASK | SVM_SELECTOR_P_MASK - | SVM_SELECTOR_DB_MASK | SVM_SELECTOR_G_MASK; - u32 code_seg_attr = 9 | SVM_SELECTOR_S_MASK | SVM_SELECTOR_P_MASK - | SVM_SELECTOR_L_MASK | SVM_SELECTOR_G_MASK; - uint64_t efer; - - efer = rdmsr(MSR_EFER); - wrmsr(MSR_EFER, efer | EFER_SVME); - wrmsr(MSR_VM_HSAVE_PA, svm->save_area_gpa); - - memset(vmcb, 0, sizeof(*vmcb)); - asm volatile ("vmsave %0\n\t" : : "a" (vmcb_gpa) : "memory"); - vmcb_set_seg(&save->es, get_es(), 0, -1U, data_seg_attr); - vmcb_set_seg(&save->cs, get_cs(), 0, -1U, code_seg_attr); - vmcb_set_seg(&save->ss, get_ss(), 0, -1U, data_seg_attr); - vmcb_set_seg(&save->ds, get_ds(), 0, -1U, data_seg_attr); - vmcb_set_seg(&save->gdtr, 0, get_gdt().address, get_gdt().size, 0); - vmcb_set_seg(&save->idtr, 0, get_idt().address, get_idt().size, 0); - - ctrl->asid = 1; - save->cpl = 0; - save->efer = rdmsr(MSR_EFER); - asm volatile ("mov %%cr4, %0" : "=r"(save->cr4) : : "memory"); - asm volatile ("mov %%cr3, %0" : "=r"(save->cr3) : : "memory"); - asm volatile ("mov %%cr0, %0" : "=r"(save->cr0) : : "memory"); - asm volatile ("mov %%dr7, %0" : "=r"(save->dr7) : : "memory"); - asm volatile ("mov %%dr6, %0" : "=r"(save->dr6) : : "memory"); - asm volatile ("mov %%cr2, %0" : "=r"(save->cr2) : : "memory"); - save->g_pat = rdmsr(MSR_IA32_CR_PAT); - save->dbgctl = rdmsr(MSR_IA32_DEBUGCTLMSR); - ctrl->intercept = (1ULL << INTERCEPT_VMRUN) | - (1ULL << INTERCEPT_VMMCALL); - ctrl->msrpm_base_pa = svm->msr_gpa; - - vmcb->save.rip = (u64)guest_rip; - vmcb->save.rsp = (u64)guest_rsp; - guest_regs.rdi = (u64)svm; -} - -/* - * save/restore 64-bit general registers except rax, rip, rsp - * which are directly handed through the VMCB guest processor state - */ -#define SAVE_GPR_C \ - "xchg %%rbx, guest_regs+0x20\n\t" \ - "xchg %%rcx, guest_regs+0x10\n\t" \ - "xchg %%rdx, guest_regs+0x18\n\t" \ - "xchg %%rbp, guest_regs+0x30\n\t" \ - "xchg %%rsi, guest_regs+0x38\n\t" \ - "xchg %%rdi, guest_regs+0x40\n\t" \ - "xchg %%r8, guest_regs+0x48\n\t" \ - "xchg %%r9, guest_regs+0x50\n\t" \ - "xchg %%r10, guest_regs+0x58\n\t" \ - "xchg %%r11, guest_regs+0x60\n\t" \ - "xchg %%r12, guest_regs+0x68\n\t" \ - "xchg %%r13, guest_regs+0x70\n\t" \ - "xchg %%r14, guest_regs+0x78\n\t" \ - "xchg %%r15, guest_regs+0x80\n\t" - -#define LOAD_GPR_C SAVE_GPR_C - -/* - * selftests do not use interrupts so we dropped clgi/sti/cli/stgi - * for now. registers involved in LOAD/SAVE_GPR_C are eventually - * unmodified so they do not need to be in the clobber list. - */ -void run_guest(struct vmcb *vmcb, uint64_t vmcb_gpa) -{ - asm volatile ( - "vmload %[vmcb_gpa]\n\t" - "mov rflags, %%r15\n\t" // rflags - "mov %%r15, 0x170(%[vmcb])\n\t" - "mov guest_regs, %%r15\n\t" // rax - "mov %%r15, 0x1f8(%[vmcb])\n\t" - LOAD_GPR_C - "vmrun %[vmcb_gpa]\n\t" - SAVE_GPR_C - "mov 0x170(%[vmcb]), %%r15\n\t" // rflags - "mov %%r15, rflags\n\t" - "mov 0x1f8(%[vmcb]), %%r15\n\t" // rax - "mov %%r15, guest_regs\n\t" - "vmsave %[vmcb_gpa]\n\t" - : : [vmcb] "r" (vmcb), [vmcb_gpa] "a" (vmcb_gpa) - : "r15", "memory"); -} - -/* - * Open SEV_DEV_PATH if available, otherwise exit the entire program. - * - * Return: - * The opened file descriptor of /dev/sev. - */ -int open_sev_dev_path_or_exit(void) -{ - return open_path_or_exit(SEV_DEV_PATH, 0); -} diff --git a/tools/testing/selftests/kvm/lib/x86_64/ucall.c b/tools/testing/selftests/kvm/lib/x86_64/ucall.c deleted file mode 100644 index 1265cecc7dd1..000000000000 --- a/tools/testing/selftests/kvm/lib/x86_64/ucall.c +++ /dev/null @@ -1,56 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * ucall support. A ucall is a "hypercall to userspace". - * - * Copyright (C) 2018, Red Hat, Inc. - */ -#include "kvm_util.h" - -#define UCALL_PIO_PORT ((uint16_t)0x1000) - -void ucall_arch_do_ucall(vm_vaddr_t uc) -{ - /* - * FIXME: Revert this hack (the entire commit that added it) once nVMX - * preserves L2 GPRs across a nested VM-Exit. If a ucall from L2, e.g. - * to do a GUEST_SYNC(), lands the vCPU in L1, any and all GPRs can be - * clobbered by L1. Save and restore non-volatile GPRs (clobbering RBP - * in particular is problematic) along with RDX and RDI (which are - * inputs), and clobber volatile GPRs. *sigh* - */ -#define HORRIFIC_L2_UCALL_CLOBBER_HACK \ - "rcx", "rsi", "r8", "r9", "r10", "r11" - - asm volatile("push %%rbp\n\t" - "push %%r15\n\t" - "push %%r14\n\t" - "push %%r13\n\t" - "push %%r12\n\t" - "push %%rbx\n\t" - "push %%rdx\n\t" - "push %%rdi\n\t" - "in %[port], %%al\n\t" - "pop %%rdi\n\t" - "pop %%rdx\n\t" - "pop %%rbx\n\t" - "pop %%r12\n\t" - "pop %%r13\n\t" - "pop %%r14\n\t" - "pop %%r15\n\t" - "pop %%rbp\n\t" - : : [port] "d" (UCALL_PIO_PORT), "D" (uc) : "rax", "memory", - HORRIFIC_L2_UCALL_CLOBBER_HACK); -} - -void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu) -{ - struct kvm_run *run = vcpu->run; - - if (run->exit_reason == KVM_EXIT_IO && run->io.port == UCALL_PIO_PORT) { - struct kvm_regs regs; - - vcpu_regs_get(vcpu, ®s); - return (void *)regs.rdi; - } - return NULL; -} diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c deleted file mode 100644 index d7ac122820bf..000000000000 --- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c +++ /dev/null @@ -1,554 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * tools/testing/selftests/kvm/lib/x86_64/vmx.c - * - * Copyright (C) 2018, Google LLC. - */ - -#include - -#include "test_util.h" -#include "kvm_util.h" -#include "processor.h" -#include "vmx.h" - -#define PAGE_SHIFT_4K 12 - -#define KVM_EPT_PAGE_TABLE_MIN_PADDR 0x1c0000 - -bool enable_evmcs; - -struct hv_enlightened_vmcs *current_evmcs; -struct hv_vp_assist_page *current_vp_assist; - -struct eptPageTableEntry { - uint64_t readable:1; - uint64_t writable:1; - uint64_t executable:1; - uint64_t memory_type:3; - uint64_t ignore_pat:1; - uint64_t page_size:1; - uint64_t accessed:1; - uint64_t dirty:1; - uint64_t ignored_11_10:2; - uint64_t address:40; - uint64_t ignored_62_52:11; - uint64_t suppress_ve:1; -}; - -struct eptPageTablePointer { - uint64_t memory_type:3; - uint64_t page_walk_length:3; - uint64_t ad_enabled:1; - uint64_t reserved_11_07:5; - uint64_t address:40; - uint64_t reserved_63_52:12; -}; -int vcpu_enable_evmcs(struct kvm_vcpu *vcpu) -{ - uint16_t evmcs_ver; - - vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_ENLIGHTENED_VMCS, - (unsigned long)&evmcs_ver); - - /* KVM should return supported EVMCS version range */ - TEST_ASSERT(((evmcs_ver >> 8) >= (evmcs_ver & 0xff)) && - (evmcs_ver & 0xff) > 0, - "Incorrect EVMCS version range: %x:%x", - evmcs_ver & 0xff, evmcs_ver >> 8); - - return evmcs_ver; -} - -/* Allocate memory regions for nested VMX tests. - * - * Input Args: - * vm - The VM to allocate guest-virtual addresses in. - * - * Output Args: - * p_vmx_gva - The guest virtual address for the struct vmx_pages. - * - * Return: - * Pointer to structure with the addresses of the VMX areas. - */ -struct vmx_pages * -vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva) -{ - vm_vaddr_t vmx_gva = vm_vaddr_alloc_page(vm); - struct vmx_pages *vmx = addr_gva2hva(vm, vmx_gva); - - /* Setup of a region of guest memory for the vmxon region. */ - vmx->vmxon = (void *)vm_vaddr_alloc_page(vm); - vmx->vmxon_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmxon); - vmx->vmxon_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmxon); - - /* Setup of a region of guest memory for a vmcs. */ - vmx->vmcs = (void *)vm_vaddr_alloc_page(vm); - vmx->vmcs_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmcs); - vmx->vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmcs); - - /* Setup of a region of guest memory for the MSR bitmap. */ - vmx->msr = (void *)vm_vaddr_alloc_page(vm); - vmx->msr_hva = addr_gva2hva(vm, (uintptr_t)vmx->msr); - vmx->msr_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->msr); - memset(vmx->msr_hva, 0, getpagesize()); - - /* Setup of a region of guest memory for the shadow VMCS. */ - vmx->shadow_vmcs = (void *)vm_vaddr_alloc_page(vm); - vmx->shadow_vmcs_hva = addr_gva2hva(vm, (uintptr_t)vmx->shadow_vmcs); - vmx->shadow_vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->shadow_vmcs); - - /* Setup of a region of guest memory for the VMREAD and VMWRITE bitmaps. */ - vmx->vmread = (void *)vm_vaddr_alloc_page(vm); - vmx->vmread_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmread); - vmx->vmread_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmread); - memset(vmx->vmread_hva, 0, getpagesize()); - - vmx->vmwrite = (void *)vm_vaddr_alloc_page(vm); - vmx->vmwrite_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmwrite); - vmx->vmwrite_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmwrite); - memset(vmx->vmwrite_hva, 0, getpagesize()); - - *p_vmx_gva = vmx_gva; - return vmx; -} - -bool prepare_for_vmx_operation(struct vmx_pages *vmx) -{ - uint64_t feature_control; - uint64_t required; - unsigned long cr0; - unsigned long cr4; - - /* - * Ensure bits in CR0 and CR4 are valid in VMX operation: - * - Bit X is 1 in _FIXED0: bit X is fixed to 1 in CRx. - * - Bit X is 0 in _FIXED1: bit X is fixed to 0 in CRx. - */ - __asm__ __volatile__("mov %%cr0, %0" : "=r"(cr0) : : "memory"); - cr0 &= rdmsr(MSR_IA32_VMX_CR0_FIXED1); - cr0 |= rdmsr(MSR_IA32_VMX_CR0_FIXED0); - __asm__ __volatile__("mov %0, %%cr0" : : "r"(cr0) : "memory"); - - __asm__ __volatile__("mov %%cr4, %0" : "=r"(cr4) : : "memory"); - cr4 &= rdmsr(MSR_IA32_VMX_CR4_FIXED1); - cr4 |= rdmsr(MSR_IA32_VMX_CR4_FIXED0); - /* Enable VMX operation */ - cr4 |= X86_CR4_VMXE; - __asm__ __volatile__("mov %0, %%cr4" : : "r"(cr4) : "memory"); - - /* - * Configure IA32_FEATURE_CONTROL MSR to allow VMXON: - * Bit 0: Lock bit. If clear, VMXON causes a #GP. - * Bit 2: Enables VMXON outside of SMX operation. If clear, VMXON - * outside of SMX causes a #GP. - */ - required = FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX; - required |= FEAT_CTL_LOCKED; - feature_control = rdmsr(MSR_IA32_FEAT_CTL); - if ((feature_control & required) != required) - wrmsr(MSR_IA32_FEAT_CTL, feature_control | required); - - /* Enter VMX root operation. */ - *(uint32_t *)(vmx->vmxon) = vmcs_revision(); - if (vmxon(vmx->vmxon_gpa)) - return false; - - return true; -} - -bool load_vmcs(struct vmx_pages *vmx) -{ - /* Load a VMCS. */ - *(uint32_t *)(vmx->vmcs) = vmcs_revision(); - if (vmclear(vmx->vmcs_gpa)) - return false; - - if (vmptrld(vmx->vmcs_gpa)) - return false; - - /* Setup shadow VMCS, do not load it yet. */ - *(uint32_t *)(vmx->shadow_vmcs) = vmcs_revision() | 0x80000000ul; - if (vmclear(vmx->shadow_vmcs_gpa)) - return false; - - return true; -} - -static bool ept_vpid_cap_supported(uint64_t mask) -{ - return rdmsr(MSR_IA32_VMX_EPT_VPID_CAP) & mask; -} - -bool ept_1g_pages_supported(void) -{ - return ept_vpid_cap_supported(VMX_EPT_VPID_CAP_1G_PAGES); -} - -/* - * Initialize the control fields to the most basic settings possible. - */ -static inline void init_vmcs_control_fields(struct vmx_pages *vmx) -{ - uint32_t sec_exec_ctl = 0; - - vmwrite(VIRTUAL_PROCESSOR_ID, 0); - vmwrite(POSTED_INTR_NV, 0); - - vmwrite(PIN_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PINBASED_CTLS)); - - if (vmx->eptp_gpa) { - uint64_t ept_paddr; - struct eptPageTablePointer eptp = { - .memory_type = X86_MEMTYPE_WB, - .page_walk_length = 3, /* + 1 */ - .ad_enabled = ept_vpid_cap_supported(VMX_EPT_VPID_CAP_AD_BITS), - .address = vmx->eptp_gpa >> PAGE_SHIFT_4K, - }; - - memcpy(&ept_paddr, &eptp, sizeof(ept_paddr)); - vmwrite(EPT_POINTER, ept_paddr); - sec_exec_ctl |= SECONDARY_EXEC_ENABLE_EPT; - } - - if (!vmwrite(SECONDARY_VM_EXEC_CONTROL, sec_exec_ctl)) - vmwrite(CPU_BASED_VM_EXEC_CONTROL, - rdmsr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS) | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS); - else { - vmwrite(CPU_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS)); - GUEST_ASSERT(!sec_exec_ctl); - } - - vmwrite(EXCEPTION_BITMAP, 0); - vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0); - vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, -1); /* Never match */ - vmwrite(CR3_TARGET_COUNT, 0); - vmwrite(VM_EXIT_CONTROLS, rdmsr(MSR_IA32_VMX_EXIT_CTLS) | - VM_EXIT_HOST_ADDR_SPACE_SIZE); /* 64-bit host */ - vmwrite(VM_EXIT_MSR_STORE_COUNT, 0); - vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0); - vmwrite(VM_ENTRY_CONTROLS, rdmsr(MSR_IA32_VMX_ENTRY_CTLS) | - VM_ENTRY_IA32E_MODE); /* 64-bit guest */ - vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0); - vmwrite(VM_ENTRY_INTR_INFO_FIELD, 0); - vmwrite(TPR_THRESHOLD, 0); - - vmwrite(CR0_GUEST_HOST_MASK, 0); - vmwrite(CR4_GUEST_HOST_MASK, 0); - vmwrite(CR0_READ_SHADOW, get_cr0()); - vmwrite(CR4_READ_SHADOW, get_cr4()); - - vmwrite(MSR_BITMAP, vmx->msr_gpa); - vmwrite(VMREAD_BITMAP, vmx->vmread_gpa); - vmwrite(VMWRITE_BITMAP, vmx->vmwrite_gpa); -} - -/* - * Initialize the host state fields based on the current host state, with - * the exception of HOST_RSP and HOST_RIP, which should be set by vmlaunch - * or vmresume. - */ -static inline void init_vmcs_host_state(void) -{ - uint32_t exit_controls = vmreadz(VM_EXIT_CONTROLS); - - vmwrite(HOST_ES_SELECTOR, get_es()); - vmwrite(HOST_CS_SELECTOR, get_cs()); - vmwrite(HOST_SS_SELECTOR, get_ss()); - vmwrite(HOST_DS_SELECTOR, get_ds()); - vmwrite(HOST_FS_SELECTOR, get_fs()); - vmwrite(HOST_GS_SELECTOR, get_gs()); - vmwrite(HOST_TR_SELECTOR, get_tr()); - - if (exit_controls & VM_EXIT_LOAD_IA32_PAT) - vmwrite(HOST_IA32_PAT, rdmsr(MSR_IA32_CR_PAT)); - if (exit_controls & VM_EXIT_LOAD_IA32_EFER) - vmwrite(HOST_IA32_EFER, rdmsr(MSR_EFER)); - if (exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) - vmwrite(HOST_IA32_PERF_GLOBAL_CTRL, - rdmsr(MSR_CORE_PERF_GLOBAL_CTRL)); - - vmwrite(HOST_IA32_SYSENTER_CS, rdmsr(MSR_IA32_SYSENTER_CS)); - - vmwrite(HOST_CR0, get_cr0()); - vmwrite(HOST_CR3, get_cr3()); - vmwrite(HOST_CR4, get_cr4()); - vmwrite(HOST_FS_BASE, rdmsr(MSR_FS_BASE)); - vmwrite(HOST_GS_BASE, rdmsr(MSR_GS_BASE)); - vmwrite(HOST_TR_BASE, - get_desc64_base((struct desc64 *)(get_gdt().address + get_tr()))); - vmwrite(HOST_GDTR_BASE, get_gdt().address); - vmwrite(HOST_IDTR_BASE, get_idt().address); - vmwrite(HOST_IA32_SYSENTER_ESP, rdmsr(MSR_IA32_SYSENTER_ESP)); - vmwrite(HOST_IA32_SYSENTER_EIP, rdmsr(MSR_IA32_SYSENTER_EIP)); -} - -/* - * Initialize the guest state fields essentially as a clone of - * the host state fields. Some host state fields have fixed - * values, and we set the corresponding guest state fields accordingly. - */ -static inline void init_vmcs_guest_state(void *rip, void *rsp) -{ - vmwrite(GUEST_ES_SELECTOR, vmreadz(HOST_ES_SELECTOR)); - vmwrite(GUEST_CS_SELECTOR, vmreadz(HOST_CS_SELECTOR)); - vmwrite(GUEST_SS_SELECTOR, vmreadz(HOST_SS_SELECTOR)); - vmwrite(GUEST_DS_SELECTOR, vmreadz(HOST_DS_SELECTOR)); - vmwrite(GUEST_FS_SELECTOR, vmreadz(HOST_FS_SELECTOR)); - vmwrite(GUEST_GS_SELECTOR, vmreadz(HOST_GS_SELECTOR)); - vmwrite(GUEST_LDTR_SELECTOR, 0); - vmwrite(GUEST_TR_SELECTOR, vmreadz(HOST_TR_SELECTOR)); - vmwrite(GUEST_INTR_STATUS, 0); - vmwrite(GUEST_PML_INDEX, 0); - - vmwrite(VMCS_LINK_POINTER, -1ll); - vmwrite(GUEST_IA32_DEBUGCTL, 0); - vmwrite(GUEST_IA32_PAT, vmreadz(HOST_IA32_PAT)); - vmwrite(GUEST_IA32_EFER, vmreadz(HOST_IA32_EFER)); - vmwrite(GUEST_IA32_PERF_GLOBAL_CTRL, - vmreadz(HOST_IA32_PERF_GLOBAL_CTRL)); - - vmwrite(GUEST_ES_LIMIT, -1); - vmwrite(GUEST_CS_LIMIT, -1); - vmwrite(GUEST_SS_LIMIT, -1); - vmwrite(GUEST_DS_LIMIT, -1); - vmwrite(GUEST_FS_LIMIT, -1); - vmwrite(GUEST_GS_LIMIT, -1); - vmwrite(GUEST_LDTR_LIMIT, -1); - vmwrite(GUEST_TR_LIMIT, 0x67); - vmwrite(GUEST_GDTR_LIMIT, 0xffff); - vmwrite(GUEST_IDTR_LIMIT, 0xffff); - vmwrite(GUEST_ES_AR_BYTES, - vmreadz(GUEST_ES_SELECTOR) == 0 ? 0x10000 : 0xc093); - vmwrite(GUEST_CS_AR_BYTES, 0xa09b); - vmwrite(GUEST_SS_AR_BYTES, 0xc093); - vmwrite(GUEST_DS_AR_BYTES, - vmreadz(GUEST_DS_SELECTOR) == 0 ? 0x10000 : 0xc093); - vmwrite(GUEST_FS_AR_BYTES, - vmreadz(GUEST_FS_SELECTOR) == 0 ? 0x10000 : 0xc093); - vmwrite(GUEST_GS_AR_BYTES, - vmreadz(GUEST_GS_SELECTOR) == 0 ? 0x10000 : 0xc093); - vmwrite(GUEST_LDTR_AR_BYTES, 0x10000); - vmwrite(GUEST_TR_AR_BYTES, 0x8b); - vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0); - vmwrite(GUEST_ACTIVITY_STATE, 0); - vmwrite(GUEST_SYSENTER_CS, vmreadz(HOST_IA32_SYSENTER_CS)); - vmwrite(VMX_PREEMPTION_TIMER_VALUE, 0); - - vmwrite(GUEST_CR0, vmreadz(HOST_CR0)); - vmwrite(GUEST_CR3, vmreadz(HOST_CR3)); - vmwrite(GUEST_CR4, vmreadz(HOST_CR4)); - vmwrite(GUEST_ES_BASE, 0); - vmwrite(GUEST_CS_BASE, 0); - vmwrite(GUEST_SS_BASE, 0); - vmwrite(GUEST_DS_BASE, 0); - vmwrite(GUEST_FS_BASE, vmreadz(HOST_FS_BASE)); - vmwrite(GUEST_GS_BASE, vmreadz(HOST_GS_BASE)); - vmwrite(GUEST_LDTR_BASE, 0); - vmwrite(GUEST_TR_BASE, vmreadz(HOST_TR_BASE)); - vmwrite(GUEST_GDTR_BASE, vmreadz(HOST_GDTR_BASE)); - vmwrite(GUEST_IDTR_BASE, vmreadz(HOST_IDTR_BASE)); - vmwrite(GUEST_DR7, 0x400); - vmwrite(GUEST_RSP, (uint64_t)rsp); - vmwrite(GUEST_RIP, (uint64_t)rip); - vmwrite(GUEST_RFLAGS, 2); - vmwrite(GUEST_PENDING_DBG_EXCEPTIONS, 0); - vmwrite(GUEST_SYSENTER_ESP, vmreadz(HOST_IA32_SYSENTER_ESP)); - vmwrite(GUEST_SYSENTER_EIP, vmreadz(HOST_IA32_SYSENTER_EIP)); -} - -void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp) -{ - init_vmcs_control_fields(vmx); - init_vmcs_host_state(); - init_vmcs_guest_state(guest_rip, guest_rsp); -} - -static void nested_create_pte(struct kvm_vm *vm, - struct eptPageTableEntry *pte, - uint64_t nested_paddr, - uint64_t paddr, - int current_level, - int target_level) -{ - if (!pte->readable) { - pte->writable = true; - pte->readable = true; - pte->executable = true; - pte->page_size = (current_level == target_level); - if (pte->page_size) - pte->address = paddr >> vm->page_shift; - else - pte->address = vm_alloc_page_table(vm) >> vm->page_shift; - } else { - /* - * Entry already present. Assert that the caller doesn't want - * a hugepage at this level, and that there isn't a hugepage at - * this level. - */ - TEST_ASSERT(current_level != target_level, - "Cannot create hugepage at level: %u, nested_paddr: 0x%lx", - current_level, nested_paddr); - TEST_ASSERT(!pte->page_size, - "Cannot create page table at level: %u, nested_paddr: 0x%lx", - current_level, nested_paddr); - } -} - - -void __nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm, - uint64_t nested_paddr, uint64_t paddr, int target_level) -{ - const uint64_t page_size = PG_LEVEL_SIZE(target_level); - struct eptPageTableEntry *pt = vmx->eptp_hva, *pte; - uint16_t index; - - TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " - "unknown or unsupported guest mode, mode: 0x%x", vm->mode); - - TEST_ASSERT((nested_paddr >> 48) == 0, - "Nested physical address 0x%lx requires 5-level paging", - nested_paddr); - TEST_ASSERT((nested_paddr % page_size) == 0, - "Nested physical address not on page boundary,\n" - " nested_paddr: 0x%lx page_size: 0x%lx", - nested_paddr, page_size); - TEST_ASSERT((nested_paddr >> vm->page_shift) <= vm->max_gfn, - "Physical address beyond beyond maximum supported,\n" - " nested_paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", - paddr, vm->max_gfn, vm->page_size); - TEST_ASSERT((paddr % page_size) == 0, - "Physical address not on page boundary,\n" - " paddr: 0x%lx page_size: 0x%lx", - paddr, page_size); - TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn, - "Physical address beyond beyond maximum supported,\n" - " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", - paddr, vm->max_gfn, vm->page_size); - - for (int level = PG_LEVEL_512G; level >= PG_LEVEL_4K; level--) { - index = (nested_paddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu; - pte = &pt[index]; - - nested_create_pte(vm, pte, nested_paddr, paddr, level, target_level); - - if (pte->page_size) - break; - - pt = addr_gpa2hva(vm, pte->address * vm->page_size); - } - - /* - * For now mark these as accessed and dirty because the only - * testcase we have needs that. Can be reconsidered later. - */ - pte->accessed = true; - pte->dirty = true; - -} - -void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm, - uint64_t nested_paddr, uint64_t paddr) -{ - __nested_pg_map(vmx, vm, nested_paddr, paddr, PG_LEVEL_4K); -} - -/* - * Map a range of EPT guest physical addresses to the VM's physical address - * - * Input Args: - * vm - Virtual Machine - * nested_paddr - Nested guest physical address to map - * paddr - VM Physical Address - * size - The size of the range to map - * level - The level at which to map the range - * - * Output Args: None - * - * Return: None - * - * Within the VM given by vm, creates a nested guest translation for the - * page range starting at nested_paddr to the page range starting at paddr. - */ -void __nested_map(struct vmx_pages *vmx, struct kvm_vm *vm, - uint64_t nested_paddr, uint64_t paddr, uint64_t size, - int level) -{ - size_t page_size = PG_LEVEL_SIZE(level); - size_t npages = size / page_size; - - TEST_ASSERT(nested_paddr + size > nested_paddr, "Vaddr overflow"); - TEST_ASSERT(paddr + size > paddr, "Paddr overflow"); - - while (npages--) { - __nested_pg_map(vmx, vm, nested_paddr, paddr, level); - nested_paddr += page_size; - paddr += page_size; - } -} - -void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm, - uint64_t nested_paddr, uint64_t paddr, uint64_t size) -{ - __nested_map(vmx, vm, nested_paddr, paddr, size, PG_LEVEL_4K); -} - -/* Prepare an identity extended page table that maps all the - * physical pages in VM. - */ -void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm, - uint32_t memslot) -{ - sparsebit_idx_t i, last; - struct userspace_mem_region *region = - memslot2region(vm, memslot); - - i = (region->region.guest_phys_addr >> vm->page_shift) - 1; - last = i + (region->region.memory_size >> vm->page_shift); - for (;;) { - i = sparsebit_next_clear(region->unused_phy_pages, i); - if (i > last) - break; - - nested_map(vmx, vm, - (uint64_t)i << vm->page_shift, - (uint64_t)i << vm->page_shift, - 1 << vm->page_shift); - } -} - -/* Identity map a region with 1GiB Pages. */ -void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm, - uint64_t addr, uint64_t size) -{ - __nested_map(vmx, vm, addr, addr, size, PG_LEVEL_1G); -} - -bool kvm_cpu_has_ept(void) -{ - uint64_t ctrl; - - ctrl = kvm_get_feature_msr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS) >> 32; - if (!(ctrl & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) - return false; - - ctrl = kvm_get_feature_msr(MSR_IA32_VMX_PROCBASED_CTLS2) >> 32; - return ctrl & SECONDARY_EXEC_ENABLE_EPT; -} - -void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm, - uint32_t eptp_memslot) -{ - TEST_ASSERT(kvm_cpu_has_ept(), "KVM doesn't support nested EPT"); - - vmx->eptp = (void *)vm_vaddr_alloc_page(vm); - vmx->eptp_hva = addr_gva2hva(vm, (uintptr_t)vmx->eptp); - vmx->eptp_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->eptp); -} - -void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm) -{ - vmx->apic_access = (void *)vm_vaddr_alloc_page(vm); - vmx->apic_access_hva = addr_gva2hva(vm, (uintptr_t)vmx->apic_access); - vmx->apic_access_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->apic_access); -} diff --git a/tools/testing/selftests/kvm/s390/cmma_test.c b/tools/testing/selftests/kvm/s390/cmma_test.c new file mode 100644 index 000000000000..e32dd59703a0 --- /dev/null +++ b/tools/testing/selftests/kvm/s390/cmma_test.c @@ -0,0 +1,695 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Test for s390x CMMA migration + * + * Copyright IBM Corp. 2023 + * + * Authors: + * Nico Boehr + */ +#include +#include +#include +#include +#include + +#include "test_util.h" +#include "kvm_util.h" +#include "kselftest.h" +#include "ucall_common.h" +#include "processor.h" + +#define MAIN_PAGE_COUNT 512 + +#define TEST_DATA_PAGE_COUNT 512 +#define TEST_DATA_MEMSLOT 1 +#define TEST_DATA_START_GFN PAGE_SIZE + +#define TEST_DATA_TWO_PAGE_COUNT 256 +#define TEST_DATA_TWO_MEMSLOT 2 +#define TEST_DATA_TWO_START_GFN (2 * PAGE_SIZE) + +static char cmma_value_buf[MAIN_PAGE_COUNT + TEST_DATA_PAGE_COUNT]; + +/** + * Dirty CMMA attributes of exactly one page in the TEST_DATA memslot, + * so use_cmma goes on and the CMMA related ioctls do something. + */ +static void guest_do_one_essa(void) +{ + asm volatile( + /* load TEST_DATA_START_GFN into r1 */ + " llilf 1,%[start_gfn]\n" + /* calculate the address from the gfn */ + " sllg 1,1,12(0)\n" + /* set the first page in TEST_DATA memslot to STABLE */ + " .insn rrf,0xb9ab0000,2,1,1,0\n" + /* hypercall */ + " diag 0,0,0x501\n" + "0: j 0b" + : + : [start_gfn] "L"(TEST_DATA_START_GFN) + : "r1", "r2", "memory", "cc" + ); +} + +/** + * Touch CMMA attributes of all pages in TEST_DATA memslot. Set them to stable + * state. + */ +static void guest_dirty_test_data(void) +{ + asm volatile( + /* r1 = TEST_DATA_START_GFN */ + " xgr 1,1\n" + " llilf 1,%[start_gfn]\n" + /* r5 = TEST_DATA_PAGE_COUNT */ + " lghi 5,%[page_count]\n" + /* r5 += r1 */ + "2: agfr 5,1\n" + /* r2 = r1 << PAGE_SHIFT */ + "1: sllg 2,1,12(0)\n" + /* essa(r4, r2, SET_STABLE) */ + " .insn rrf,0xb9ab0000,4,2,1,0\n" + /* i++ */ + " agfi 1,1\n" + /* if r1 < r5 goto 1 */ + " cgrjl 1,5,1b\n" + /* hypercall */ + " diag 0,0,0x501\n" + "0: j 0b" + : + : [start_gfn] "L"(TEST_DATA_START_GFN), + [page_count] "L"(TEST_DATA_PAGE_COUNT) + : + /* the counter in our loop over the pages */ + "r1", + /* the calculated page physical address */ + "r2", + /* ESSA output register */ + "r4", + /* last page */ + "r5", + "cc", "memory" + ); +} + +static void create_main_memslot(struct kvm_vm *vm) +{ + int i; + + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, MAIN_PAGE_COUNT, 0); + /* set the array of memslots to zero like __vm_create does */ + for (i = 0; i < NR_MEM_REGIONS; i++) + vm->memslots[i] = 0; +} + +static void create_test_memslot(struct kvm_vm *vm) +{ + vm_userspace_mem_region_add(vm, + VM_MEM_SRC_ANONYMOUS, + TEST_DATA_START_GFN << vm->page_shift, + TEST_DATA_MEMSLOT, + TEST_DATA_PAGE_COUNT, + 0 + ); + vm->memslots[MEM_REGION_TEST_DATA] = TEST_DATA_MEMSLOT; +} + +static void create_memslots(struct kvm_vm *vm) +{ + /* + * Our VM has the following memory layout: + * +------+---------------------------+ + * | GFN | Memslot | + * +------+---------------------------+ + * | 0 | | + * | ... | MAIN (Code, Stack, ...) | + * | 511 | | + * +------+---------------------------+ + * | 4096 | | + * | ... | TEST_DATA | + * | 4607 | | + * +------+---------------------------+ + */ + create_main_memslot(vm); + create_test_memslot(vm); +} + +static void finish_vm_setup(struct kvm_vm *vm) +{ + struct userspace_mem_region *slot0; + + kvm_vm_elf_load(vm, program_invocation_name); + + slot0 = memslot2region(vm, 0); + ucall_init(vm, slot0->region.guest_phys_addr + slot0->region.memory_size); + + kvm_arch_vm_post_create(vm); +} + +static struct kvm_vm *create_vm_two_memslots(void) +{ + struct kvm_vm *vm; + + vm = vm_create_barebones(); + + create_memslots(vm); + + finish_vm_setup(vm); + + return vm; +} + +static void enable_cmma(struct kvm_vm *vm) +{ + int r; + + r = __kvm_device_attr_set(vm->fd, KVM_S390_VM_MEM_CTRL, KVM_S390_VM_MEM_ENABLE_CMMA, NULL); + TEST_ASSERT(!r, "enabling cmma failed r=%d errno=%d", r, errno); +} + +static void enable_dirty_tracking(struct kvm_vm *vm) +{ + vm_mem_region_set_flags(vm, 0, KVM_MEM_LOG_DIRTY_PAGES); + vm_mem_region_set_flags(vm, TEST_DATA_MEMSLOT, KVM_MEM_LOG_DIRTY_PAGES); +} + +static int __enable_migration_mode(struct kvm_vm *vm) +{ + return __kvm_device_attr_set(vm->fd, + KVM_S390_VM_MIGRATION, + KVM_S390_VM_MIGRATION_START, + NULL + ); +} + +static void enable_migration_mode(struct kvm_vm *vm) +{ + int r = __enable_migration_mode(vm); + + TEST_ASSERT(!r, "enabling migration mode failed r=%d errno=%d", r, errno); +} + +static bool is_migration_mode_on(struct kvm_vm *vm) +{ + u64 out; + int r; + + r = __kvm_device_attr_get(vm->fd, + KVM_S390_VM_MIGRATION, + KVM_S390_VM_MIGRATION_STATUS, + &out + ); + TEST_ASSERT(!r, "getting migration mode status failed r=%d errno=%d", r, errno); + return out; +} + +static int vm_get_cmma_bits(struct kvm_vm *vm, u64 flags, int *errno_out) +{ + struct kvm_s390_cmma_log args; + int rc; + + errno = 0; + + args = (struct kvm_s390_cmma_log){ + .start_gfn = 0, + .count = sizeof(cmma_value_buf), + .flags = flags, + .values = (__u64)&cmma_value_buf[0] + }; + rc = __vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args); + + *errno_out = errno; + return rc; +} + +static void test_get_cmma_basic(void) +{ + struct kvm_vm *vm = create_vm_two_memslots(); + struct kvm_vcpu *vcpu; + int rc, errno_out; + + /* GET_CMMA_BITS without CMMA enabled should fail */ + rc = vm_get_cmma_bits(vm, 0, &errno_out); + TEST_ASSERT_EQ(rc, -1); + TEST_ASSERT_EQ(errno_out, ENXIO); + + enable_cmma(vm); + vcpu = vm_vcpu_add(vm, 1, guest_do_one_essa); + + vcpu_run(vcpu); + + /* GET_CMMA_BITS without migration mode and without peeking should fail */ + rc = vm_get_cmma_bits(vm, 0, &errno_out); + TEST_ASSERT_EQ(rc, -1); + TEST_ASSERT_EQ(errno_out, EINVAL); + + /* GET_CMMA_BITS without migration mode and with peeking should work */ + rc = vm_get_cmma_bits(vm, KVM_S390_CMMA_PEEK, &errno_out); + TEST_ASSERT_EQ(rc, 0); + TEST_ASSERT_EQ(errno_out, 0); + + enable_dirty_tracking(vm); + enable_migration_mode(vm); + + /* GET_CMMA_BITS with invalid flags */ + rc = vm_get_cmma_bits(vm, 0xfeedc0fe, &errno_out); + TEST_ASSERT_EQ(rc, -1); + TEST_ASSERT_EQ(errno_out, EINVAL); + + kvm_vm_free(vm); +} + +static void assert_exit_was_hypercall(struct kvm_vcpu *vcpu) +{ + TEST_ASSERT_EQ(vcpu->run->exit_reason, 13); + TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, 4); + TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x8300); + TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipb, 0x5010000); +} + +static void test_migration_mode(void) +{ + struct kvm_vm *vm = vm_create_barebones(); + struct kvm_vcpu *vcpu; + u64 orig_psw; + int rc; + + /* enabling migration mode on a VM without memory should fail */ + rc = __enable_migration_mode(vm); + TEST_ASSERT_EQ(rc, -1); + TEST_ASSERT_EQ(errno, EINVAL); + TEST_ASSERT(!is_migration_mode_on(vm), "migration mode should still be off"); + errno = 0; + + create_memslots(vm); + finish_vm_setup(vm); + + enable_cmma(vm); + vcpu = vm_vcpu_add(vm, 1, guest_do_one_essa); + orig_psw = vcpu->run->psw_addr; + + /* + * Execute one essa instruction in the guest. Otherwise the guest will + * not have use_cmm enabled and GET_CMMA_BITS will return no pages. + */ + vcpu_run(vcpu); + assert_exit_was_hypercall(vcpu); + + /* migration mode when memslots have dirty tracking off should fail */ + rc = __enable_migration_mode(vm); + TEST_ASSERT_EQ(rc, -1); + TEST_ASSERT_EQ(errno, EINVAL); + TEST_ASSERT(!is_migration_mode_on(vm), "migration mode should still be off"); + errno = 0; + + /* enable dirty tracking */ + enable_dirty_tracking(vm); + + /* enabling migration mode should work now */ + rc = __enable_migration_mode(vm); + TEST_ASSERT_EQ(rc, 0); + TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on"); + errno = 0; + + /* execute another ESSA instruction to see this goes fine */ + vcpu->run->psw_addr = orig_psw; + vcpu_run(vcpu); + assert_exit_was_hypercall(vcpu); + + /* + * With migration mode on, create a new memslot with dirty tracking off. + * This should turn off migration mode. + */ + TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on"); + vm_userspace_mem_region_add(vm, + VM_MEM_SRC_ANONYMOUS, + TEST_DATA_TWO_START_GFN << vm->page_shift, + TEST_DATA_TWO_MEMSLOT, + TEST_DATA_TWO_PAGE_COUNT, + 0 + ); + TEST_ASSERT(!is_migration_mode_on(vm), + "creating memslot without dirty tracking turns off migration mode" + ); + + /* ESSA instructions should still execute fine */ + vcpu->run->psw_addr = orig_psw; + vcpu_run(vcpu); + assert_exit_was_hypercall(vcpu); + + /* + * Turn on dirty tracking on the new memslot. + * It should be possible to turn migration mode back on again. + */ + vm_mem_region_set_flags(vm, TEST_DATA_TWO_MEMSLOT, KVM_MEM_LOG_DIRTY_PAGES); + rc = __enable_migration_mode(vm); + TEST_ASSERT_EQ(rc, 0); + TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on"); + errno = 0; + + /* + * Turn off dirty tracking again, this time with just a flag change. + * Again, migration mode should turn off. + */ + TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on"); + vm_mem_region_set_flags(vm, TEST_DATA_TWO_MEMSLOT, 0); + TEST_ASSERT(!is_migration_mode_on(vm), + "disabling dirty tracking should turn off migration mode" + ); + + /* ESSA instructions should still execute fine */ + vcpu->run->psw_addr = orig_psw; + vcpu_run(vcpu); + assert_exit_was_hypercall(vcpu); + + kvm_vm_free(vm); +} + +/** + * Given a VM with the MAIN and TEST_DATA memslot, assert that both slots have + * CMMA attributes of all pages in both memslots and nothing more dirty. + * This has the useful side effect of ensuring nothing is CMMA dirty after this + * function. + */ +static void assert_all_slots_cmma_dirty(struct kvm_vm *vm) +{ + struct kvm_s390_cmma_log args; + + /* + * First iteration - everything should be dirty. + * Start at the main memslot... + */ + args = (struct kvm_s390_cmma_log){ + .start_gfn = 0, + .count = sizeof(cmma_value_buf), + .flags = 0, + .values = (__u64)&cmma_value_buf[0] + }; + memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf)); + vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args); + TEST_ASSERT_EQ(args.count, MAIN_PAGE_COUNT); + TEST_ASSERT_EQ(args.remaining, TEST_DATA_PAGE_COUNT); + TEST_ASSERT_EQ(args.start_gfn, 0); + + /* ...and then - after a hole - the TEST_DATA memslot should follow */ + args = (struct kvm_s390_cmma_log){ + .start_gfn = MAIN_PAGE_COUNT, + .count = sizeof(cmma_value_buf), + .flags = 0, + .values = (__u64)&cmma_value_buf[0] + }; + memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf)); + vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args); + TEST_ASSERT_EQ(args.count, TEST_DATA_PAGE_COUNT); + TEST_ASSERT_EQ(args.start_gfn, TEST_DATA_START_GFN); + TEST_ASSERT_EQ(args.remaining, 0); + + /* ...and nothing else should be there */ + args = (struct kvm_s390_cmma_log){ + .start_gfn = TEST_DATA_START_GFN + TEST_DATA_PAGE_COUNT, + .count = sizeof(cmma_value_buf), + .flags = 0, + .values = (__u64)&cmma_value_buf[0] + }; + memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf)); + vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args); + TEST_ASSERT_EQ(args.count, 0); + TEST_ASSERT_EQ(args.start_gfn, 0); + TEST_ASSERT_EQ(args.remaining, 0); +} + +/** + * Given a VM, assert no pages are CMMA dirty. + */ +static void assert_no_pages_cmma_dirty(struct kvm_vm *vm) +{ + struct kvm_s390_cmma_log args; + + /* If we start from GFN 0 again, nothing should be dirty. */ + args = (struct kvm_s390_cmma_log){ + .start_gfn = 0, + .count = sizeof(cmma_value_buf), + .flags = 0, + .values = (__u64)&cmma_value_buf[0] + }; + memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf)); + vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args); + if (args.count || args.remaining || args.start_gfn) + TEST_FAIL("pages are still dirty start_gfn=0x%llx count=%u remaining=%llu", + args.start_gfn, + args.count, + args.remaining + ); +} + +static void test_get_inital_dirty(void) +{ + struct kvm_vm *vm = create_vm_two_memslots(); + struct kvm_vcpu *vcpu; + + enable_cmma(vm); + vcpu = vm_vcpu_add(vm, 1, guest_do_one_essa); + + /* + * Execute one essa instruction in the guest. Otherwise the guest will + * not have use_cmm enabled and GET_CMMA_BITS will return no pages. + */ + vcpu_run(vcpu); + assert_exit_was_hypercall(vcpu); + + enable_dirty_tracking(vm); + enable_migration_mode(vm); + + assert_all_slots_cmma_dirty(vm); + + /* Start from the beginning again and make sure nothing else is dirty */ + assert_no_pages_cmma_dirty(vm); + + kvm_vm_free(vm); +} + +static void query_cmma_range(struct kvm_vm *vm, + u64 start_gfn, u64 gfn_count, + struct kvm_s390_cmma_log *res_out) +{ + *res_out = (struct kvm_s390_cmma_log){ + .start_gfn = start_gfn, + .count = gfn_count, + .flags = 0, + .values = (__u64)&cmma_value_buf[0] + }; + memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf)); + vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, res_out); +} + +/** + * Assert the given cmma_log struct that was executed by query_cmma_range() + * indicates the first dirty gfn is at first_dirty_gfn and contains exactly + * dirty_gfn_count CMMA values. + */ +static void assert_cmma_dirty(u64 first_dirty_gfn, + u64 dirty_gfn_count, + const struct kvm_s390_cmma_log *res) +{ + TEST_ASSERT_EQ(res->start_gfn, first_dirty_gfn); + TEST_ASSERT_EQ(res->count, dirty_gfn_count); + for (size_t i = 0; i < dirty_gfn_count; i++) + TEST_ASSERT_EQ(cmma_value_buf[0], 0x0); /* stable state */ + TEST_ASSERT_EQ(cmma_value_buf[dirty_gfn_count], 0xff); /* not touched */ +} + +static void test_get_skip_holes(void) +{ + size_t gfn_offset; + struct kvm_vm *vm = create_vm_two_memslots(); + struct kvm_s390_cmma_log log; + struct kvm_vcpu *vcpu; + u64 orig_psw; + + enable_cmma(vm); + vcpu = vm_vcpu_add(vm, 1, guest_dirty_test_data); + + orig_psw = vcpu->run->psw_addr; + + /* + * Execute some essa instructions in the guest. Otherwise the guest will + * not have use_cmm enabled and GET_CMMA_BITS will return no pages. + */ + vcpu_run(vcpu); + assert_exit_was_hypercall(vcpu); + + enable_dirty_tracking(vm); + enable_migration_mode(vm); + + /* un-dirty all pages */ + assert_all_slots_cmma_dirty(vm); + + /* Then, dirty just the TEST_DATA memslot */ + vcpu->run->psw_addr = orig_psw; + vcpu_run(vcpu); + + gfn_offset = TEST_DATA_START_GFN; + /** + * Query CMMA attributes of one page, starting at page 0. Since the + * main memslot was not touched by the VM, this should yield the first + * page of the TEST_DATA memslot. + * The dirty bitmap should now look like this: + * 0: not dirty + * [0x1, 0x200): dirty + */ + query_cmma_range(vm, 0, 1, &log); + assert_cmma_dirty(gfn_offset, 1, &log); + gfn_offset++; + + /** + * Query CMMA attributes of 32 (0x20) pages past the end of the TEST_DATA + * memslot. This should wrap back to the beginning of the TEST_DATA + * memslot, page 1. + * The dirty bitmap should now look like this: + * [0, 0x21): not dirty + * [0x21, 0x200): dirty + */ + query_cmma_range(vm, TEST_DATA_START_GFN + TEST_DATA_PAGE_COUNT, 0x20, &log); + assert_cmma_dirty(gfn_offset, 0x20, &log); + gfn_offset += 0x20; + + /* Skip 32 pages */ + gfn_offset += 0x20; + + /** + * After skipping 32 pages, query the next 32 (0x20) pages. + * The dirty bitmap should now look like this: + * [0, 0x21): not dirty + * [0x21, 0x41): dirty + * [0x41, 0x61): not dirty + * [0x61, 0x200): dirty + */ + query_cmma_range(vm, gfn_offset, 0x20, &log); + assert_cmma_dirty(gfn_offset, 0x20, &log); + gfn_offset += 0x20; + + /** + * Query 1 page from the beginning of the TEST_DATA memslot. This should + * yield page 0x21. + * The dirty bitmap should now look like this: + * [0, 0x22): not dirty + * [0x22, 0x41): dirty + * [0x41, 0x61): not dirty + * [0x61, 0x200): dirty + */ + query_cmma_range(vm, TEST_DATA_START_GFN, 1, &log); + assert_cmma_dirty(TEST_DATA_START_GFN + 0x21, 1, &log); + gfn_offset++; + + /** + * Query 15 (0xF) pages from page 0x23 in TEST_DATA memslot. + * This should yield pages [0x23, 0x33). + * The dirty bitmap should now look like this: + * [0, 0x22): not dirty + * 0x22: dirty + * [0x23, 0x33): not dirty + * [0x33, 0x41): dirty + * [0x41, 0x61): not dirty + * [0x61, 0x200): dirty + */ + gfn_offset = TEST_DATA_START_GFN + 0x23; + query_cmma_range(vm, gfn_offset, 15, &log); + assert_cmma_dirty(gfn_offset, 15, &log); + + /** + * Query 17 (0x11) pages from page 0x22 in TEST_DATA memslot. + * This should yield page [0x22, 0x33) + * The dirty bitmap should now look like this: + * [0, 0x33): not dirty + * [0x33, 0x41): dirty + * [0x41, 0x61): not dirty + * [0x61, 0x200): dirty + */ + gfn_offset = TEST_DATA_START_GFN + 0x22; + query_cmma_range(vm, gfn_offset, 17, &log); + assert_cmma_dirty(gfn_offset, 17, &log); + + /** + * Query 25 (0x19) pages from page 0x40 in TEST_DATA memslot. + * This should yield page 0x40 and nothing more, since there are more + * than 16 non-dirty pages after page 0x40. + * The dirty bitmap should now look like this: + * [0, 0x33): not dirty + * [0x33, 0x40): dirty + * [0x40, 0x61): not dirty + * [0x61, 0x200): dirty + */ + gfn_offset = TEST_DATA_START_GFN + 0x40; + query_cmma_range(vm, gfn_offset, 25, &log); + assert_cmma_dirty(gfn_offset, 1, &log); + + /** + * Query pages [0x33, 0x40). + * The dirty bitmap should now look like this: + * [0, 0x61): not dirty + * [0x61, 0x200): dirty + */ + gfn_offset = TEST_DATA_START_GFN + 0x33; + query_cmma_range(vm, gfn_offset, 0x40 - 0x33, &log); + assert_cmma_dirty(gfn_offset, 0x40 - 0x33, &log); + + /** + * Query the remaining pages [0x61, 0x200). + */ + gfn_offset = TEST_DATA_START_GFN; + query_cmma_range(vm, gfn_offset, TEST_DATA_PAGE_COUNT - 0x61, &log); + assert_cmma_dirty(TEST_DATA_START_GFN + 0x61, TEST_DATA_PAGE_COUNT - 0x61, &log); + + assert_no_pages_cmma_dirty(vm); +} + +struct testdef { + const char *name; + void (*test)(void); +} testlist[] = { + { "migration mode and dirty tracking", test_migration_mode }, + { "GET_CMMA_BITS: basic calls", test_get_cmma_basic }, + { "GET_CMMA_BITS: all pages are dirty initally", test_get_inital_dirty }, + { "GET_CMMA_BITS: holes are skipped", test_get_skip_holes }, +}; + +/** + * The kernel may support CMMA, but the machine may not (i.e. if running as + * guest-3). + * + * In this case, the CMMA capabilities are all there, but the CMMA-related + * ioctls fail. To find out whether the machine supports CMMA, create a + * temporary VM and then query the CMMA feature of the VM. + */ +static int machine_has_cmma(void) +{ + struct kvm_vm *vm = vm_create_barebones(); + int r; + + r = !__kvm_has_device_attr(vm->fd, KVM_S390_VM_MEM_CTRL, KVM_S390_VM_MEM_ENABLE_CMMA); + kvm_vm_free(vm); + + return r; +} + +int main(int argc, char *argv[]) +{ + int idx; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_SYNC_REGS)); + TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_CMMA_MIGRATION)); + TEST_REQUIRE(machine_has_cmma()); + + ksft_print_header(); + + ksft_set_plan(ARRAY_SIZE(testlist)); + + for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) { + testlist[idx].test(); + ksft_test_result_pass("%s\n", testlist[idx].name); + } + + ksft_finished(); /* Print results and exit() accordingly */ +} diff --git a/tools/testing/selftests/kvm/s390/config b/tools/testing/selftests/kvm/s390/config new file mode 100644 index 000000000000..23270f2d679f --- /dev/null +++ b/tools/testing/selftests/kvm/s390/config @@ -0,0 +1,2 @@ +CONFIG_KVM=y +CONFIG_KVM_S390_UCONTROL=y diff --git a/tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c b/tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c new file mode 100644 index 000000000000..27255880dabd --- /dev/null +++ b/tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c @@ -0,0 +1,301 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright IBM Corp. 2024 + * + * Authors: + * Hariharan Mari + * + * The tests compare the result of the KVM ioctl for obtaining CPU subfunction data with those + * from an ASM block performing the same CPU subfunction. Currently KVM doesn't mask instruction + * query data reported via the CPU Model, allowing us to directly compare it with the data + * acquired through executing the queries in the test. + */ + +#include +#include +#include +#include +#include "facility.h" + +#include "kvm_util.h" + +#define PLO_FUNCTION_MAX 256 + +/* Query available CPU subfunctions */ +struct kvm_s390_vm_cpu_subfunc cpu_subfunc; + +static void get_cpu_machine_subfuntions(struct kvm_vm *vm, + struct kvm_s390_vm_cpu_subfunc *cpu_subfunc) +{ + int r; + + r = __kvm_device_attr_get(vm->fd, KVM_S390_VM_CPU_MODEL, + KVM_S390_VM_CPU_MACHINE_SUBFUNC, cpu_subfunc); + + TEST_ASSERT(!r, "Get cpu subfunctions failed r=%d errno=%d", r, errno); +} + +static inline int plo_test_bit(unsigned char nr) +{ + unsigned long function = nr | 0x100; + int cc; + + asm volatile(" lgr 0,%[function]\n" + /* Parameter registers are ignored for "test bit" */ + " plo 0,0,0,0(0)\n" + " ipm %0\n" + " srl %0,28\n" + : "=d" (cc) + : [function] "d" (function) + : "cc", "0"); + return cc == 0; +} + +/* Testing Perform Locked Operation (PLO) CPU subfunction's ASM block */ +static void test_plo_asm_block(u8 (*query)[32]) +{ + for (int i = 0; i < PLO_FUNCTION_MAX; ++i) { + if (plo_test_bit(i)) + (*query)[i >> 3] |= 0x80 >> (i & 7); + } +} + +/* Testing Crypto Compute Message Authentication Code (KMAC) CPU subfunction's ASM block */ +static void test_kmac_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rre,0xb91e0000,0,2\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Cipher Message with Chaining (KMC) CPU subfunction's ASM block */ +static void test_kmc_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rre,0xb92f0000,2,4\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Cipher Message (KM) CPU subfunction's ASM block */ +static void test_km_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rre,0xb92e0000,2,4\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Compute Intermediate Message Digest (KIMD) CPU subfunction's ASM block */ +static void test_kimd_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rre,0xb93e0000,0,2\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Compute Last Message Digest (KLMD) CPU subfunction's ASM block */ +static void test_klmd_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rre,0xb93f0000,0,2\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Cipher Message with Counter (KMCTR) CPU subfunction's ASM block */ +static void test_kmctr_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rrf,0xb92d0000,2,4,6,0\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Cipher Message with Cipher Feedback (KMF) CPU subfunction's ASM block */ +static void test_kmf_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rre,0xb92a0000,2,4\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Cipher Message with Output Feedback (KMO) CPU subfunction's ASM block */ +static void test_kmo_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rre,0xb92b0000,2,4\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Perform Cryptographic Computation (PCC) CPU subfunction's ASM block */ +static void test_pcc_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rre,0xb92c0000,0,0\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Perform Random Number Operation (PRNO) CPU subfunction's ASM block */ +static void test_prno_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rre,0xb93c0000,2,4\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Cipher Message with Authentication (KMA) CPU subfunction's ASM block */ +static void test_kma_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rrf,0xb9290000,2,4,6,0\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Compute Digital Signature Authentication (KDSA) CPU subfunction's ASM block */ +static void test_kdsa_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rre,0xb93a0000,0,2\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Sort Lists (SORTL) CPU subfunction's ASM block */ +static void test_sortl_asm_block(u8 (*query)[32]) +{ + asm volatile(" lghi 0,0\n" + " la 1,%[query]\n" + " .insn rre,0xb9380000,2,4\n" + : [query] "=R" (*query) + : + : "cc", "0", "1"); +} + +/* Testing Deflate Conversion Call (DFLTCC) CPU subfunction's ASM block */ +static void test_dfltcc_asm_block(u8 (*query)[32]) +{ + asm volatile(" lghi 0,0\n" + " la 1,%[query]\n" + " .insn rrf,0xb9390000,2,4,6,0\n" + : [query] "=R" (*query) + : + : "cc", "0", "1"); +} + +/* + * Testing Perform Function with Concurrent Results (PFCR) + * CPU subfunctions's ASM block + */ +static void test_pfcr_asm_block(u8 (*query)[16]) +{ + asm volatile(" lghi 0,0\n" + " .insn rsy,0xeb0000000016,0,0,%[query]\n" + : [query] "=QS" (*query) + : + : "cc", "0"); +} + +typedef void (*testfunc_t)(u8 (*array)[]); + +struct testdef { + const char *subfunc_name; + u8 *subfunc_array; + size_t array_size; + testfunc_t test; + int facility_bit; +} testlist[] = { + /* + * PLO was introduced in the very first 64-bit machine generation. + * Hence it is assumed PLO is always installed in Z Arch. + */ + { "PLO", cpu_subfunc.plo, sizeof(cpu_subfunc.plo), test_plo_asm_block, 1 }, + /* MSA - Facility bit 17 */ + { "KMAC", cpu_subfunc.kmac, sizeof(cpu_subfunc.kmac), test_kmac_asm_block, 17 }, + { "KMC", cpu_subfunc.kmc, sizeof(cpu_subfunc.kmc), test_kmc_asm_block, 17 }, + { "KM", cpu_subfunc.km, sizeof(cpu_subfunc.km), test_km_asm_block, 17 }, + { "KIMD", cpu_subfunc.kimd, sizeof(cpu_subfunc.kimd), test_kimd_asm_block, 17 }, + { "KLMD", cpu_subfunc.klmd, sizeof(cpu_subfunc.klmd), test_klmd_asm_block, 17 }, + /* MSA - Facility bit 77 */ + { "KMCTR", cpu_subfunc.kmctr, sizeof(cpu_subfunc.kmctr), test_kmctr_asm_block, 77 }, + { "KMF", cpu_subfunc.kmf, sizeof(cpu_subfunc.kmf), test_kmf_asm_block, 77 }, + { "KMO", cpu_subfunc.kmo, sizeof(cpu_subfunc.kmo), test_kmo_asm_block, 77 }, + { "PCC", cpu_subfunc.pcc, sizeof(cpu_subfunc.pcc), test_pcc_asm_block, 77 }, + /* MSA5 - Facility bit 57 */ + { "PPNO", cpu_subfunc.ppno, sizeof(cpu_subfunc.ppno), test_prno_asm_block, 57 }, + /* MSA8 - Facility bit 146 */ + { "KMA", cpu_subfunc.kma, sizeof(cpu_subfunc.kma), test_kma_asm_block, 146 }, + /* MSA9 - Facility bit 155 */ + { "KDSA", cpu_subfunc.kdsa, sizeof(cpu_subfunc.kdsa), test_kdsa_asm_block, 155 }, + /* SORTL - Facility bit 150 */ + { "SORTL", cpu_subfunc.sortl, sizeof(cpu_subfunc.sortl), test_sortl_asm_block, 150 }, + /* DFLTCC - Facility bit 151 */ + { "DFLTCC", cpu_subfunc.dfltcc, sizeof(cpu_subfunc.dfltcc), test_dfltcc_asm_block, 151 }, + /* Concurrent-function facility - Facility bit 201 */ + { "PFCR", cpu_subfunc.pfcr, sizeof(cpu_subfunc.pfcr), test_pfcr_asm_block, 201 }, +}; + +int main(int argc, char *argv[]) +{ + struct kvm_vm *vm; + int idx; + + ksft_print_header(); + + vm = vm_create(1); + + memset(&cpu_subfunc, 0, sizeof(cpu_subfunc)); + get_cpu_machine_subfuntions(vm, &cpu_subfunc); + + ksft_set_plan(ARRAY_SIZE(testlist)); + for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) { + if (test_facility(testlist[idx].facility_bit)) { + u8 *array = malloc(testlist[idx].array_size); + + testlist[idx].test((u8 (*)[testlist[idx].array_size])array); + + TEST_ASSERT_EQ(memcmp(testlist[idx].subfunc_array, + array, testlist[idx].array_size), 0); + + ksft_test_result_pass("%s\n", testlist[idx].subfunc_name); + free(array); + } else { + ksft_test_result_skip("%s feature is not avaialable\n", + testlist[idx].subfunc_name); + } + } + + kvm_vm_free(vm); + ksft_finished(); +} diff --git a/tools/testing/selftests/kvm/s390/debug_test.c b/tools/testing/selftests/kvm/s390/debug_test.c new file mode 100644 index 000000000000..ad8095968601 --- /dev/null +++ b/tools/testing/selftests/kvm/s390/debug_test.c @@ -0,0 +1,160 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Test KVM debugging features. */ +#include "kvm_util.h" +#include "test_util.h" +#include "sie.h" + +#include + +#define __LC_SVC_NEW_PSW 0x1c0 +#define __LC_PGM_NEW_PSW 0x1d0 +#define IPA0_DIAG 0x8300 +#define PGM_SPECIFICATION 0x06 + +/* Common code for testing single-stepping interruptions. */ +extern char int_handler[]; +asm("int_handler:\n" + "j .\n"); + +static struct kvm_vm *test_step_int_1(struct kvm_vcpu **vcpu, void *guest_code, + size_t new_psw_off, uint64_t *new_psw) +{ + struct kvm_guest_debug debug = {}; + struct kvm_regs regs; + struct kvm_vm *vm; + char *lowcore; + + vm = vm_create_with_one_vcpu(vcpu, guest_code); + lowcore = addr_gpa2hva(vm, 0); + new_psw[0] = (*vcpu)->run->psw_mask; + new_psw[1] = (uint64_t)int_handler; + memcpy(lowcore + new_psw_off, new_psw, 16); + vcpu_regs_get(*vcpu, ®s); + regs.gprs[2] = -1; + vcpu_regs_set(*vcpu, ®s); + debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP; + vcpu_guest_debug_set(*vcpu, &debug); + vcpu_run(*vcpu); + + return vm; +} + +static void test_step_int(void *guest_code, size_t new_psw_off) +{ + struct kvm_vcpu *vcpu; + uint64_t new_psw[2]; + struct kvm_vm *vm; + + vm = test_step_int_1(&vcpu, guest_code, new_psw_off, new_psw); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_DEBUG); + TEST_ASSERT_EQ(vcpu->run->psw_mask, new_psw[0]); + TEST_ASSERT_EQ(vcpu->run->psw_addr, new_psw[1]); + kvm_vm_free(vm); +} + +/* Test single-stepping "boring" program interruptions. */ +extern char test_step_pgm_guest_code[]; +asm("test_step_pgm_guest_code:\n" + ".insn rr,0x1d00,%r1,%r0 /* dr %r1,%r0 */\n" + "j .\n"); + +static void test_step_pgm(void) +{ + test_step_int(test_step_pgm_guest_code, __LC_PGM_NEW_PSW); +} + +/* + * Test single-stepping program interruptions caused by DIAG. + * Userspace emulation must not interfere with single-stepping. + */ +extern char test_step_pgm_diag_guest_code[]; +asm("test_step_pgm_diag_guest_code:\n" + "diag %r0,%r0,0\n" + "j .\n"); + +static void test_step_pgm_diag(void) +{ + struct kvm_s390_irq irq = { + .type = KVM_S390_PROGRAM_INT, + .u.pgm.code = PGM_SPECIFICATION, + }; + struct kvm_vcpu *vcpu; + uint64_t new_psw[2]; + struct kvm_vm *vm; + + vm = test_step_int_1(&vcpu, test_step_pgm_diag_guest_code, + __LC_PGM_NEW_PSW, new_psw); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC); + TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_INST); + TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa & 0xff00, IPA0_DIAG); + vcpu_ioctl(vcpu, KVM_S390_IRQ, &irq); + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_DEBUG); + TEST_ASSERT_EQ(vcpu->run->psw_mask, new_psw[0]); + TEST_ASSERT_EQ(vcpu->run->psw_addr, new_psw[1]); + kvm_vm_free(vm); +} + +/* + * Test single-stepping program interruptions caused by ISKE. + * CPUSTAT_KSS handling must not interfere with single-stepping. + */ +extern char test_step_pgm_iske_guest_code[]; +asm("test_step_pgm_iske_guest_code:\n" + "iske %r2,%r2\n" + "j .\n"); + +static void test_step_pgm_iske(void) +{ + test_step_int(test_step_pgm_iske_guest_code, __LC_PGM_NEW_PSW); +} + +/* + * Test single-stepping program interruptions caused by LCTL. + * KVM emulation must not interfere with single-stepping. + */ +extern char test_step_pgm_lctl_guest_code[]; +asm("test_step_pgm_lctl_guest_code:\n" + "lctl %c0,%c0,1\n" + "j .\n"); + +static void test_step_pgm_lctl(void) +{ + test_step_int(test_step_pgm_lctl_guest_code, __LC_PGM_NEW_PSW); +} + +/* Test single-stepping supervisor-call interruptions. */ +extern char test_step_svc_guest_code[]; +asm("test_step_svc_guest_code:\n" + "svc 0\n" + "j .\n"); + +static void test_step_svc(void) +{ + test_step_int(test_step_svc_guest_code, __LC_SVC_NEW_PSW); +} + +/* Run all tests above. */ +static struct testdef { + const char *name; + void (*test)(void); +} testlist[] = { + { "single-step pgm", test_step_pgm }, + { "single-step pgm caused by diag", test_step_pgm_diag }, + { "single-step pgm caused by iske", test_step_pgm_iske }, + { "single-step pgm caused by lctl", test_step_pgm_lctl }, + { "single-step svc", test_step_svc }, +}; + +int main(int argc, char *argv[]) +{ + int idx; + + ksft_print_header(); + ksft_set_plan(ARRAY_SIZE(testlist)); + for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) { + testlist[idx].test(); + ksft_test_result_pass("%s\n", testlist[idx].name); + } + ksft_finished(); +} diff --git a/tools/testing/selftests/kvm/s390/memop.c b/tools/testing/selftests/kvm/s390/memop.c new file mode 100644 index 000000000000..4374b4cd2a80 --- /dev/null +++ b/tools/testing/selftests/kvm/s390/memop.c @@ -0,0 +1,1187 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Test for s390x KVM_S390_MEM_OP + * + * Copyright (C) 2019, Red Hat, Inc. + */ +#include +#include +#include +#include +#include + +#include + +#include "test_util.h" +#include "kvm_util.h" +#include "kselftest.h" +#include "ucall_common.h" +#include "processor.h" + +enum mop_target { + LOGICAL, + SIDA, + ABSOLUTE, + INVALID, +}; + +enum mop_access_mode { + READ, + WRITE, + CMPXCHG, +}; + +struct mop_desc { + uintptr_t gaddr; + uintptr_t gaddr_v; + uint64_t set_flags; + unsigned int f_check : 1; + unsigned int f_inject : 1; + unsigned int f_key : 1; + unsigned int _gaddr_v : 1; + unsigned int _set_flags : 1; + unsigned int _sida_offset : 1; + unsigned int _ar : 1; + uint32_t size; + enum mop_target target; + enum mop_access_mode mode; + void *buf; + uint32_t sida_offset; + void *old; + uint8_t old_value[16]; + bool *cmpxchg_success; + uint8_t ar; + uint8_t key; +}; + +const uint8_t NO_KEY = 0xff; + +static struct kvm_s390_mem_op ksmo_from_desc(struct mop_desc *desc) +{ + struct kvm_s390_mem_op ksmo = { + .gaddr = (uintptr_t)desc->gaddr, + .size = desc->size, + .buf = ((uintptr_t)desc->buf), + .reserved = "ignored_ignored_ignored_ignored" + }; + + switch (desc->target) { + case LOGICAL: + if (desc->mode == READ) + ksmo.op = KVM_S390_MEMOP_LOGICAL_READ; + if (desc->mode == WRITE) + ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE; + break; + case SIDA: + if (desc->mode == READ) + ksmo.op = KVM_S390_MEMOP_SIDA_READ; + if (desc->mode == WRITE) + ksmo.op = KVM_S390_MEMOP_SIDA_WRITE; + break; + case ABSOLUTE: + if (desc->mode == READ) + ksmo.op = KVM_S390_MEMOP_ABSOLUTE_READ; + if (desc->mode == WRITE) + ksmo.op = KVM_S390_MEMOP_ABSOLUTE_WRITE; + if (desc->mode == CMPXCHG) { + ksmo.op = KVM_S390_MEMOP_ABSOLUTE_CMPXCHG; + ksmo.old_addr = (uint64_t)desc->old; + memcpy(desc->old_value, desc->old, desc->size); + } + break; + case INVALID: + ksmo.op = -1; + } + if (desc->f_check) + ksmo.flags |= KVM_S390_MEMOP_F_CHECK_ONLY; + if (desc->f_inject) + ksmo.flags |= KVM_S390_MEMOP_F_INJECT_EXCEPTION; + if (desc->_set_flags) + ksmo.flags = desc->set_flags; + if (desc->f_key && desc->key != NO_KEY) { + ksmo.flags |= KVM_S390_MEMOP_F_SKEY_PROTECTION; + ksmo.key = desc->key; + } + if (desc->_ar) + ksmo.ar = desc->ar; + else + ksmo.ar = 0; + if (desc->_sida_offset) + ksmo.sida_offset = desc->sida_offset; + + return ksmo; +} + +struct test_info { + struct kvm_vm *vm; + struct kvm_vcpu *vcpu; +}; + +#define PRINT_MEMOP false +static void print_memop(struct kvm_vcpu *vcpu, const struct kvm_s390_mem_op *ksmo) +{ + if (!PRINT_MEMOP) + return; + + if (!vcpu) + printf("vm memop("); + else + printf("vcpu memop("); + switch (ksmo->op) { + case KVM_S390_MEMOP_LOGICAL_READ: + printf("LOGICAL, READ, "); + break; + case KVM_S390_MEMOP_LOGICAL_WRITE: + printf("LOGICAL, WRITE, "); + break; + case KVM_S390_MEMOP_SIDA_READ: + printf("SIDA, READ, "); + break; + case KVM_S390_MEMOP_SIDA_WRITE: + printf("SIDA, WRITE, "); + break; + case KVM_S390_MEMOP_ABSOLUTE_READ: + printf("ABSOLUTE, READ, "); + break; + case KVM_S390_MEMOP_ABSOLUTE_WRITE: + printf("ABSOLUTE, WRITE, "); + break; + case KVM_S390_MEMOP_ABSOLUTE_CMPXCHG: + printf("ABSOLUTE, CMPXCHG, "); + break; + } + printf("gaddr=%llu, size=%u, buf=%llu, ar=%u, key=%u, old_addr=%llx", + ksmo->gaddr, ksmo->size, ksmo->buf, ksmo->ar, ksmo->key, + ksmo->old_addr); + if (ksmo->flags & KVM_S390_MEMOP_F_CHECK_ONLY) + printf(", CHECK_ONLY"); + if (ksmo->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) + printf(", INJECT_EXCEPTION"); + if (ksmo->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) + printf(", SKEY_PROTECTION"); + puts(")"); +} + +static int err_memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo, + struct mop_desc *desc) +{ + struct kvm_vcpu *vcpu = info.vcpu; + + if (!vcpu) + return __vm_ioctl(info.vm, KVM_S390_MEM_OP, ksmo); + else + return __vcpu_ioctl(vcpu, KVM_S390_MEM_OP, ksmo); +} + +static void memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo, + struct mop_desc *desc) +{ + int r; + + r = err_memop_ioctl(info, ksmo, desc); + if (ksmo->op == KVM_S390_MEMOP_ABSOLUTE_CMPXCHG) { + if (desc->cmpxchg_success) { + int diff = memcmp(desc->old_value, desc->old, desc->size); + *desc->cmpxchg_success = !diff; + } + } + TEST_ASSERT(!r, __KVM_IOCTL_ERROR("KVM_S390_MEM_OP", r)); +} + +#define MEMOP(err, info_p, mop_target_p, access_mode_p, buf_p, size_p, ...) \ +({ \ + struct test_info __info = (info_p); \ + struct mop_desc __desc = { \ + .target = (mop_target_p), \ + .mode = (access_mode_p), \ + .buf = (buf_p), \ + .size = (size_p), \ + __VA_ARGS__ \ + }; \ + struct kvm_s390_mem_op __ksmo; \ + \ + if (__desc._gaddr_v) { \ + if (__desc.target == ABSOLUTE) \ + __desc.gaddr = addr_gva2gpa(__info.vm, __desc.gaddr_v); \ + else \ + __desc.gaddr = __desc.gaddr_v; \ + } \ + __ksmo = ksmo_from_desc(&__desc); \ + print_memop(__info.vcpu, &__ksmo); \ + err##memop_ioctl(__info, &__ksmo, &__desc); \ +}) + +#define MOP(...) MEMOP(, __VA_ARGS__) +#define ERR_MOP(...) MEMOP(err_, __VA_ARGS__) + +#define GADDR(a) .gaddr = ((uintptr_t)a) +#define GADDR_V(v) ._gaddr_v = 1, .gaddr_v = ((uintptr_t)v) +#define CHECK_ONLY .f_check = 1 +#define SET_FLAGS(f) ._set_flags = 1, .set_flags = (f) +#define SIDA_OFFSET(o) ._sida_offset = 1, .sida_offset = (o) +#define AR(a) ._ar = 1, .ar = (a) +#define KEY(a) .f_key = 1, .key = (a) +#define INJECT .f_inject = 1 +#define CMPXCHG_OLD(o) .old = (o) +#define CMPXCHG_SUCCESS(s) .cmpxchg_success = (s) + +#define CHECK_N_DO(f, ...) ({ f(__VA_ARGS__, CHECK_ONLY); f(__VA_ARGS__); }) + +#define CR0_FETCH_PROTECTION_OVERRIDE (1UL << (63 - 38)) +#define CR0_STORAGE_PROTECTION_OVERRIDE (1UL << (63 - 39)) + +static uint8_t __aligned(PAGE_SIZE) mem1[65536]; +static uint8_t __aligned(PAGE_SIZE) mem2[65536]; + +struct test_default { + struct kvm_vm *kvm_vm; + struct test_info vm; + struct test_info vcpu; + struct kvm_run *run; + int size; +}; + +static struct test_default test_default_init(void *guest_code) +{ + struct kvm_vcpu *vcpu; + struct test_default t; + + t.size = min((size_t)kvm_check_cap(KVM_CAP_S390_MEM_OP), sizeof(mem1)); + t.kvm_vm = vm_create_with_one_vcpu(&vcpu, guest_code); + t.vm = (struct test_info) { t.kvm_vm, NULL }; + t.vcpu = (struct test_info) { t.kvm_vm, vcpu }; + t.run = vcpu->run; + return t; +} + +enum stage { + /* Synced state set by host, e.g. DAT */ + STAGE_INITED, + /* Guest did nothing */ + STAGE_IDLED, + /* Guest set storage keys (specifics up to test case) */ + STAGE_SKEYS_SET, + /* Guest copied memory (locations up to test case) */ + STAGE_COPIED, + /* End of guest code reached */ + STAGE_DONE, +}; + +#define HOST_SYNC(info_p, stage) \ +({ \ + struct test_info __info = (info_p); \ + struct kvm_vcpu *__vcpu = __info.vcpu; \ + struct ucall uc; \ + int __stage = (stage); \ + \ + vcpu_run(__vcpu); \ + get_ucall(__vcpu, &uc); \ + if (uc.cmd == UCALL_ABORT) { \ + REPORT_GUEST_ASSERT(uc); \ + } \ + TEST_ASSERT_EQ(uc.cmd, UCALL_SYNC); \ + TEST_ASSERT_EQ(uc.args[1], __stage); \ +}) \ + +static void prepare_mem12(void) +{ + int i; + + for (i = 0; i < sizeof(mem1); i++) + mem1[i] = rand(); + memset(mem2, 0xaa, sizeof(mem2)); +} + +#define ASSERT_MEM_EQ(p1, p2, size) \ + TEST_ASSERT(!memcmp(p1, p2, size), "Memory contents do not match!") + +static void default_write_read(struct test_info copy_cpu, struct test_info mop_cpu, + enum mop_target mop_target, uint32_t size, uint8_t key) +{ + prepare_mem12(); + CHECK_N_DO(MOP, mop_cpu, mop_target, WRITE, mem1, size, + GADDR_V(mem1), KEY(key)); + HOST_SYNC(copy_cpu, STAGE_COPIED); + CHECK_N_DO(MOP, mop_cpu, mop_target, READ, mem2, size, + GADDR_V(mem2), KEY(key)); + ASSERT_MEM_EQ(mem1, mem2, size); +} + +static void default_read(struct test_info copy_cpu, struct test_info mop_cpu, + enum mop_target mop_target, uint32_t size, uint8_t key) +{ + prepare_mem12(); + CHECK_N_DO(MOP, mop_cpu, mop_target, WRITE, mem1, size, GADDR_V(mem1)); + HOST_SYNC(copy_cpu, STAGE_COPIED); + CHECK_N_DO(MOP, mop_cpu, mop_target, READ, mem2, size, + GADDR_V(mem2), KEY(key)); + ASSERT_MEM_EQ(mem1, mem2, size); +} + +static void default_cmpxchg(struct test_default *test, uint8_t key) +{ + for (int size = 1; size <= 16; size *= 2) { + for (int offset = 0; offset < 16; offset += size) { + uint8_t __aligned(16) new[16] = {}; + uint8_t __aligned(16) old[16]; + bool succ; + + prepare_mem12(); + default_write_read(test->vcpu, test->vcpu, LOGICAL, 16, NO_KEY); + + memcpy(&old, mem1, 16); + MOP(test->vm, ABSOLUTE, CMPXCHG, new + offset, + size, GADDR_V(mem1 + offset), + CMPXCHG_OLD(old + offset), + CMPXCHG_SUCCESS(&succ), KEY(key)); + HOST_SYNC(test->vcpu, STAGE_COPIED); + MOP(test->vm, ABSOLUTE, READ, mem2, 16, GADDR_V(mem2)); + TEST_ASSERT(succ, "exchange of values should succeed"); + memcpy(mem1 + offset, new + offset, size); + ASSERT_MEM_EQ(mem1, mem2, 16); + + memcpy(&old, mem1, 16); + new[offset]++; + old[offset]++; + MOP(test->vm, ABSOLUTE, CMPXCHG, new + offset, + size, GADDR_V(mem1 + offset), + CMPXCHG_OLD(old + offset), + CMPXCHG_SUCCESS(&succ), KEY(key)); + HOST_SYNC(test->vcpu, STAGE_COPIED); + MOP(test->vm, ABSOLUTE, READ, mem2, 16, GADDR_V(mem2)); + TEST_ASSERT(!succ, "exchange of values should not succeed"); + ASSERT_MEM_EQ(mem1, mem2, 16); + ASSERT_MEM_EQ(&old, mem1, 16); + } + } +} + +static void guest_copy(void) +{ + GUEST_SYNC(STAGE_INITED); + memcpy(&mem2, &mem1, sizeof(mem2)); + GUEST_SYNC(STAGE_COPIED); +} + +static void test_copy(void) +{ + struct test_default t = test_default_init(guest_copy); + + HOST_SYNC(t.vcpu, STAGE_INITED); + + default_write_read(t.vcpu, t.vcpu, LOGICAL, t.size, NO_KEY); + + kvm_vm_free(t.kvm_vm); +} + +static void test_copy_access_register(void) +{ + struct test_default t = test_default_init(guest_copy); + + HOST_SYNC(t.vcpu, STAGE_INITED); + + prepare_mem12(); + t.run->psw_mask &= ~(3UL << (63 - 17)); + t.run->psw_mask |= 1UL << (63 - 17); /* Enable AR mode */ + + /* + * Primary address space gets used if an access register + * contains zero. The host makes use of AR[1] so is a good + * candidate to ensure the guest AR (of zero) is used. + */ + CHECK_N_DO(MOP, t.vcpu, LOGICAL, WRITE, mem1, t.size, + GADDR_V(mem1), AR(1)); + HOST_SYNC(t.vcpu, STAGE_COPIED); + + CHECK_N_DO(MOP, t.vcpu, LOGICAL, READ, mem2, t.size, + GADDR_V(mem2), AR(1)); + ASSERT_MEM_EQ(mem1, mem2, t.size); + + kvm_vm_free(t.kvm_vm); +} + +static void set_storage_key_range(void *addr, size_t len, uint8_t key) +{ + uintptr_t _addr, abs, i; + int not_mapped = 0; + + _addr = (uintptr_t)addr; + for (i = _addr & PAGE_MASK; i < _addr + len; i += PAGE_SIZE) { + abs = i; + asm volatile ( + "lra %[abs], 0(0,%[abs])\n" + " jz 0f\n" + " llill %[not_mapped],1\n" + " j 1f\n" + "0: sske %[key], %[abs]\n" + "1:" + : [abs] "+&a" (abs), [not_mapped] "+r" (not_mapped) + : [key] "r" (key) + : "cc" + ); + GUEST_ASSERT_EQ(not_mapped, 0); + } +} + +static void guest_copy_key(void) +{ + set_storage_key_range(mem1, sizeof(mem1), 0x90); + set_storage_key_range(mem2, sizeof(mem2), 0x90); + GUEST_SYNC(STAGE_SKEYS_SET); + + for (;;) { + memcpy(&mem2, &mem1, sizeof(mem2)); + GUEST_SYNC(STAGE_COPIED); + } +} + +static void test_copy_key(void) +{ + struct test_default t = test_default_init(guest_copy_key); + + HOST_SYNC(t.vcpu, STAGE_SKEYS_SET); + + /* vm, no key */ + default_write_read(t.vcpu, t.vm, ABSOLUTE, t.size, NO_KEY); + + /* vm/vcpu, machting key or key 0 */ + default_write_read(t.vcpu, t.vcpu, LOGICAL, t.size, 0); + default_write_read(t.vcpu, t.vcpu, LOGICAL, t.size, 9); + default_write_read(t.vcpu, t.vm, ABSOLUTE, t.size, 0); + default_write_read(t.vcpu, t.vm, ABSOLUTE, t.size, 9); + /* + * There used to be different code paths for key handling depending on + * if the region crossed a page boundary. + * There currently are not, but the more tests the merrier. + */ + default_write_read(t.vcpu, t.vcpu, LOGICAL, 1, 0); + default_write_read(t.vcpu, t.vcpu, LOGICAL, 1, 9); + default_write_read(t.vcpu, t.vm, ABSOLUTE, 1, 0); + default_write_read(t.vcpu, t.vm, ABSOLUTE, 1, 9); + + /* vm/vcpu, mismatching keys on read, but no fetch protection */ + default_read(t.vcpu, t.vcpu, LOGICAL, t.size, 2); + default_read(t.vcpu, t.vm, ABSOLUTE, t.size, 2); + + kvm_vm_free(t.kvm_vm); +} + +static void test_cmpxchg_key(void) +{ + struct test_default t = test_default_init(guest_copy_key); + + HOST_SYNC(t.vcpu, STAGE_SKEYS_SET); + + default_cmpxchg(&t, NO_KEY); + default_cmpxchg(&t, 0); + default_cmpxchg(&t, 9); + + kvm_vm_free(t.kvm_vm); +} + +static __uint128_t cut_to_size(int size, __uint128_t val) +{ + switch (size) { + case 1: + return (uint8_t)val; + case 2: + return (uint16_t)val; + case 4: + return (uint32_t)val; + case 8: + return (uint64_t)val; + case 16: + return val; + } + GUEST_FAIL("Invalid size = %u", size); + return 0; +} + +static bool popcount_eq(__uint128_t a, __uint128_t b) +{ + unsigned int count_a, count_b; + + count_a = __builtin_popcountl((uint64_t)(a >> 64)) + + __builtin_popcountl((uint64_t)a); + count_b = __builtin_popcountl((uint64_t)(b >> 64)) + + __builtin_popcountl((uint64_t)b); + return count_a == count_b; +} + +static __uint128_t rotate(int size, __uint128_t val, int amount) +{ + unsigned int bits = size * 8; + + amount = (amount + bits) % bits; + val = cut_to_size(size, val); + if (!amount) + return val; + return (val << (bits - amount)) | (val >> amount); +} + +const unsigned int max_block = 16; + +static void choose_block(bool guest, int i, int *size, int *offset) +{ + unsigned int rand; + + rand = i; + if (guest) { + rand = rand * 19 + 11; + *size = 1 << ((rand % 3) + 2); + rand = rand * 19 + 11; + *offset = (rand % max_block) & ~(*size - 1); + } else { + rand = rand * 17 + 5; + *size = 1 << (rand % 5); + rand = rand * 17 + 5; + *offset = (rand % max_block) & ~(*size - 1); + } +} + +static __uint128_t permutate_bits(bool guest, int i, int size, __uint128_t old) +{ + unsigned int rand; + int amount; + bool swap; + + rand = i; + rand = rand * 3 + 1; + if (guest) + rand = rand * 3 + 1; + swap = rand % 2 == 0; + if (swap) { + int i, j; + __uint128_t new; + uint8_t byte0, byte1; + + rand = rand * 3 + 1; + i = rand % size; + rand = rand * 3 + 1; + j = rand % size; + if (i == j) + return old; + new = rotate(16, old, i * 8); + byte0 = new & 0xff; + new &= ~0xff; + new = rotate(16, new, -i * 8); + new = rotate(16, new, j * 8); + byte1 = new & 0xff; + new = (new & ~0xff) | byte0; + new = rotate(16, new, -j * 8); + new = rotate(16, new, i * 8); + new = new | byte1; + new = rotate(16, new, -i * 8); + return new; + } + rand = rand * 3 + 1; + amount = rand % (size * 8); + return rotate(size, old, amount); +} + +static bool _cmpxchg(int size, void *target, __uint128_t *old_addr, __uint128_t new) +{ + bool ret; + + switch (size) { + case 4: { + uint32_t old = *old_addr; + + asm volatile ("cs %[old],%[new],%[address]" + : [old] "+d" (old), + [address] "+Q" (*(uint32_t *)(target)) + : [new] "d" ((uint32_t)new) + : "cc" + ); + ret = old == (uint32_t)*old_addr; + *old_addr = old; + return ret; + } + case 8: { + uint64_t old = *old_addr; + + asm volatile ("csg %[old],%[new],%[address]" + : [old] "+d" (old), + [address] "+Q" (*(uint64_t *)(target)) + : [new] "d" ((uint64_t)new) + : "cc" + ); + ret = old == (uint64_t)*old_addr; + *old_addr = old; + return ret; + } + case 16: { + __uint128_t old = *old_addr; + + asm volatile ("cdsg %[old],%[new],%[address]" + : [old] "+d" (old), + [address] "+Q" (*(__uint128_t *)(target)) + : [new] "d" (new) + : "cc" + ); + ret = old == *old_addr; + *old_addr = old; + return ret; + } + } + GUEST_FAIL("Invalid size = %u", size); + return 0; +} + +const unsigned int cmpxchg_iter_outer = 100, cmpxchg_iter_inner = 10000; + +static void guest_cmpxchg_key(void) +{ + int size, offset; + __uint128_t old, new; + + set_storage_key_range(mem1, max_block, 0x10); + set_storage_key_range(mem2, max_block, 0x10); + GUEST_SYNC(STAGE_SKEYS_SET); + + for (int i = 0; i < cmpxchg_iter_outer; i++) { + do { + old = 1; + } while (!_cmpxchg(16, mem1, &old, 0)); + for (int j = 0; j < cmpxchg_iter_inner; j++) { + choose_block(true, i + j, &size, &offset); + do { + new = permutate_bits(true, i + j, size, old); + } while (!_cmpxchg(size, mem2 + offset, &old, new)); + } + } + + GUEST_SYNC(STAGE_DONE); +} + +static void *run_guest(void *data) +{ + struct test_info *info = data; + + HOST_SYNC(*info, STAGE_DONE); + return NULL; +} + +static char *quad_to_char(__uint128_t *quad, int size) +{ + return ((char *)quad) + (sizeof(*quad) - size); +} + +static void test_cmpxchg_key_concurrent(void) +{ + struct test_default t = test_default_init(guest_cmpxchg_key); + int size, offset; + __uint128_t old, new; + bool success; + pthread_t thread; + + HOST_SYNC(t.vcpu, STAGE_SKEYS_SET); + prepare_mem12(); + MOP(t.vcpu, LOGICAL, WRITE, mem1, max_block, GADDR_V(mem2)); + pthread_create(&thread, NULL, run_guest, &t.vcpu); + + for (int i = 0; i < cmpxchg_iter_outer; i++) { + do { + old = 0; + new = 1; + MOP(t.vm, ABSOLUTE, CMPXCHG, &new, + sizeof(new), GADDR_V(mem1), + CMPXCHG_OLD(&old), + CMPXCHG_SUCCESS(&success), KEY(1)); + } while (!success); + for (int j = 0; j < cmpxchg_iter_inner; j++) { + choose_block(false, i + j, &size, &offset); + do { + new = permutate_bits(false, i + j, size, old); + MOP(t.vm, ABSOLUTE, CMPXCHG, quad_to_char(&new, size), + size, GADDR_V(mem2 + offset), + CMPXCHG_OLD(quad_to_char(&old, size)), + CMPXCHG_SUCCESS(&success), KEY(1)); + } while (!success); + } + } + + pthread_join(thread, NULL); + + MOP(t.vcpu, LOGICAL, READ, mem2, max_block, GADDR_V(mem2)); + TEST_ASSERT(popcount_eq(*(__uint128_t *)mem1, *(__uint128_t *)mem2), + "Must retain number of set bits"); + + kvm_vm_free(t.kvm_vm); +} + +static void guest_copy_key_fetch_prot(void) +{ + /* + * For some reason combining the first sync with override enablement + * results in an exception when calling HOST_SYNC. + */ + GUEST_SYNC(STAGE_INITED); + /* Storage protection override applies to both store and fetch. */ + set_storage_key_range(mem1, sizeof(mem1), 0x98); + set_storage_key_range(mem2, sizeof(mem2), 0x98); + GUEST_SYNC(STAGE_SKEYS_SET); + + for (;;) { + memcpy(&mem2, &mem1, sizeof(mem2)); + GUEST_SYNC(STAGE_COPIED); + } +} + +static void test_copy_key_storage_prot_override(void) +{ + struct test_default t = test_default_init(guest_copy_key_fetch_prot); + + HOST_SYNC(t.vcpu, STAGE_INITED); + t.run->s.regs.crs[0] |= CR0_STORAGE_PROTECTION_OVERRIDE; + t.run->kvm_dirty_regs = KVM_SYNC_CRS; + HOST_SYNC(t.vcpu, STAGE_SKEYS_SET); + + /* vcpu, mismatching keys, storage protection override in effect */ + default_write_read(t.vcpu, t.vcpu, LOGICAL, t.size, 2); + + kvm_vm_free(t.kvm_vm); +} + +static void test_copy_key_fetch_prot(void) +{ + struct test_default t = test_default_init(guest_copy_key_fetch_prot); + + HOST_SYNC(t.vcpu, STAGE_INITED); + HOST_SYNC(t.vcpu, STAGE_SKEYS_SET); + + /* vm/vcpu, matching key, fetch protection in effect */ + default_read(t.vcpu, t.vcpu, LOGICAL, t.size, 9); + default_read(t.vcpu, t.vm, ABSOLUTE, t.size, 9); + + kvm_vm_free(t.kvm_vm); +} + +#define ERR_PROT_MOP(...) \ +({ \ + int rv; \ + \ + rv = ERR_MOP(__VA_ARGS__); \ + TEST_ASSERT(rv == 4, "Should result in protection exception"); \ +}) + +static void guest_error_key(void) +{ + GUEST_SYNC(STAGE_INITED); + set_storage_key_range(mem1, PAGE_SIZE, 0x18); + set_storage_key_range(mem1 + PAGE_SIZE, sizeof(mem1) - PAGE_SIZE, 0x98); + GUEST_SYNC(STAGE_SKEYS_SET); + GUEST_SYNC(STAGE_IDLED); +} + +static void test_errors_key(void) +{ + struct test_default t = test_default_init(guest_error_key); + + HOST_SYNC(t.vcpu, STAGE_INITED); + HOST_SYNC(t.vcpu, STAGE_SKEYS_SET); + + /* vm/vcpu, mismatching keys, fetch protection in effect */ + CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, WRITE, mem1, t.size, GADDR_V(mem1), KEY(2)); + CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, READ, mem2, t.size, GADDR_V(mem1), KEY(2)); + CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, WRITE, mem1, t.size, GADDR_V(mem1), KEY(2)); + CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, READ, mem2, t.size, GADDR_V(mem1), KEY(2)); + + kvm_vm_free(t.kvm_vm); +} + +static void test_errors_cmpxchg_key(void) +{ + struct test_default t = test_default_init(guest_copy_key_fetch_prot); + int i; + + HOST_SYNC(t.vcpu, STAGE_INITED); + HOST_SYNC(t.vcpu, STAGE_SKEYS_SET); + + for (i = 1; i <= 16; i *= 2) { + __uint128_t old = 0; + + ERR_PROT_MOP(t.vm, ABSOLUTE, CMPXCHG, mem2, i, GADDR_V(mem2), + CMPXCHG_OLD(&old), KEY(2)); + } + + kvm_vm_free(t.kvm_vm); +} + +static void test_termination(void) +{ + struct test_default t = test_default_init(guest_error_key); + uint64_t prefix; + uint64_t teid; + uint64_t teid_mask = BIT(63 - 56) | BIT(63 - 60) | BIT(63 - 61); + uint64_t psw[2]; + + HOST_SYNC(t.vcpu, STAGE_INITED); + HOST_SYNC(t.vcpu, STAGE_SKEYS_SET); + + /* vcpu, mismatching keys after first page */ + ERR_PROT_MOP(t.vcpu, LOGICAL, WRITE, mem1, t.size, GADDR_V(mem1), KEY(1), INJECT); + /* + * The memop injected a program exception and the test needs to check the + * Translation-Exception Identification (TEID). It is necessary to run + * the guest in order to be able to read the TEID from guest memory. + * Set the guest program new PSW, so the guest state is not clobbered. + */ + prefix = t.run->s.regs.prefix; + psw[0] = t.run->psw_mask; + psw[1] = t.run->psw_addr; + MOP(t.vm, ABSOLUTE, WRITE, psw, sizeof(psw), GADDR(prefix + 464)); + HOST_SYNC(t.vcpu, STAGE_IDLED); + MOP(t.vm, ABSOLUTE, READ, &teid, sizeof(teid), GADDR(prefix + 168)); + /* Bits 56, 60, 61 form a code, 0 being the only one allowing for termination */ + TEST_ASSERT_EQ(teid & teid_mask, 0); + + kvm_vm_free(t.kvm_vm); +} + +static void test_errors_key_storage_prot_override(void) +{ + struct test_default t = test_default_init(guest_copy_key_fetch_prot); + + HOST_SYNC(t.vcpu, STAGE_INITED); + t.run->s.regs.crs[0] |= CR0_STORAGE_PROTECTION_OVERRIDE; + t.run->kvm_dirty_regs = KVM_SYNC_CRS; + HOST_SYNC(t.vcpu, STAGE_SKEYS_SET); + + /* vm, mismatching keys, storage protection override not applicable to vm */ + CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, WRITE, mem1, t.size, GADDR_V(mem1), KEY(2)); + CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, READ, mem2, t.size, GADDR_V(mem2), KEY(2)); + + kvm_vm_free(t.kvm_vm); +} + +const uint64_t last_page_addr = -PAGE_SIZE; + +static void guest_copy_key_fetch_prot_override(void) +{ + int i; + char *page_0 = 0; + + GUEST_SYNC(STAGE_INITED); + set_storage_key_range(0, PAGE_SIZE, 0x18); + set_storage_key_range((void *)last_page_addr, PAGE_SIZE, 0x0); + asm volatile ("sske %[key],%[addr]\n" :: [addr] "r"(0L), [key] "r"(0x18) : "cc"); + GUEST_SYNC(STAGE_SKEYS_SET); + + for (;;) { + for (i = 0; i < PAGE_SIZE; i++) + page_0[i] = mem1[i]; + GUEST_SYNC(STAGE_COPIED); + } +} + +static void test_copy_key_fetch_prot_override(void) +{ + struct test_default t = test_default_init(guest_copy_key_fetch_prot_override); + vm_vaddr_t guest_0_page, guest_last_page; + + guest_0_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, 0); + guest_last_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, last_page_addr); + if (guest_0_page != 0 || guest_last_page != last_page_addr) { + print_skip("did not allocate guest pages at required positions"); + goto out; + } + + HOST_SYNC(t.vcpu, STAGE_INITED); + t.run->s.regs.crs[0] |= CR0_FETCH_PROTECTION_OVERRIDE; + t.run->kvm_dirty_regs = KVM_SYNC_CRS; + HOST_SYNC(t.vcpu, STAGE_SKEYS_SET); + + /* vcpu, mismatching keys on fetch, fetch protection override applies */ + prepare_mem12(); + MOP(t.vcpu, LOGICAL, WRITE, mem1, PAGE_SIZE, GADDR_V(mem1)); + HOST_SYNC(t.vcpu, STAGE_COPIED); + CHECK_N_DO(MOP, t.vcpu, LOGICAL, READ, mem2, 2048, GADDR_V(guest_0_page), KEY(2)); + ASSERT_MEM_EQ(mem1, mem2, 2048); + + /* + * vcpu, mismatching keys on fetch, fetch protection override applies, + * wraparound + */ + prepare_mem12(); + MOP(t.vcpu, LOGICAL, WRITE, mem1, 2 * PAGE_SIZE, GADDR_V(guest_last_page)); + HOST_SYNC(t.vcpu, STAGE_COPIED); + CHECK_N_DO(MOP, t.vcpu, LOGICAL, READ, mem2, PAGE_SIZE + 2048, + GADDR_V(guest_last_page), KEY(2)); + ASSERT_MEM_EQ(mem1, mem2, 2048); + +out: + kvm_vm_free(t.kvm_vm); +} + +static void test_errors_key_fetch_prot_override_not_enabled(void) +{ + struct test_default t = test_default_init(guest_copy_key_fetch_prot_override); + vm_vaddr_t guest_0_page, guest_last_page; + + guest_0_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, 0); + guest_last_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, last_page_addr); + if (guest_0_page != 0 || guest_last_page != last_page_addr) { + print_skip("did not allocate guest pages at required positions"); + goto out; + } + HOST_SYNC(t.vcpu, STAGE_INITED); + HOST_SYNC(t.vcpu, STAGE_SKEYS_SET); + + /* vcpu, mismatching keys on fetch, fetch protection override not enabled */ + CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, READ, mem2, 2048, GADDR_V(0), KEY(2)); + +out: + kvm_vm_free(t.kvm_vm); +} + +static void test_errors_key_fetch_prot_override_enabled(void) +{ + struct test_default t = test_default_init(guest_copy_key_fetch_prot_override); + vm_vaddr_t guest_0_page, guest_last_page; + + guest_0_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, 0); + guest_last_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, last_page_addr); + if (guest_0_page != 0 || guest_last_page != last_page_addr) { + print_skip("did not allocate guest pages at required positions"); + goto out; + } + HOST_SYNC(t.vcpu, STAGE_INITED); + t.run->s.regs.crs[0] |= CR0_FETCH_PROTECTION_OVERRIDE; + t.run->kvm_dirty_regs = KVM_SYNC_CRS; + HOST_SYNC(t.vcpu, STAGE_SKEYS_SET); + + /* + * vcpu, mismatching keys on fetch, + * fetch protection override does not apply because memory range exceeded + */ + CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, READ, mem2, 2048 + 1, GADDR_V(0), KEY(2)); + CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, READ, mem2, PAGE_SIZE + 2048 + 1, + GADDR_V(guest_last_page), KEY(2)); + /* vm, fetch protected override does not apply */ + CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, READ, mem2, 2048, GADDR(0), KEY(2)); + CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, READ, mem2, 2048, GADDR_V(guest_0_page), KEY(2)); + +out: + kvm_vm_free(t.kvm_vm); +} + +static void guest_idle(void) +{ + GUEST_SYNC(STAGE_INITED); /* for consistency's sake */ + for (;;) + GUEST_SYNC(STAGE_IDLED); +} + +static void _test_errors_common(struct test_info info, enum mop_target target, int size) +{ + int rv; + + /* Bad size: */ + rv = ERR_MOP(info, target, WRITE, mem1, -1, GADDR_V(mem1)); + TEST_ASSERT(rv == -1 && errno == E2BIG, "ioctl allows insane sizes"); + + /* Zero size: */ + rv = ERR_MOP(info, target, WRITE, mem1, 0, GADDR_V(mem1)); + TEST_ASSERT(rv == -1 && (errno == EINVAL || errno == ENOMEM), + "ioctl allows 0 as size"); + + /* Bad flags: */ + rv = ERR_MOP(info, target, WRITE, mem1, size, GADDR_V(mem1), SET_FLAGS(-1)); + TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows all flags"); + + /* Bad guest address: */ + rv = ERR_MOP(info, target, WRITE, mem1, size, GADDR((void *)~0xfffUL), CHECK_ONLY); + TEST_ASSERT(rv > 0, "ioctl does not report bad guest memory address with CHECK_ONLY"); + rv = ERR_MOP(info, target, WRITE, mem1, size, GADDR((void *)~0xfffUL)); + TEST_ASSERT(rv > 0, "ioctl does not report bad guest memory address on write"); + + /* Bad host address: */ + rv = ERR_MOP(info, target, WRITE, 0, size, GADDR_V(mem1)); + TEST_ASSERT(rv == -1 && errno == EFAULT, + "ioctl does not report bad host memory address"); + + /* Bad key: */ + rv = ERR_MOP(info, target, WRITE, mem1, size, GADDR_V(mem1), KEY(17)); + TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows invalid key"); +} + +static void test_errors(void) +{ + struct test_default t = test_default_init(guest_idle); + int rv; + + HOST_SYNC(t.vcpu, STAGE_INITED); + + _test_errors_common(t.vcpu, LOGICAL, t.size); + _test_errors_common(t.vm, ABSOLUTE, t.size); + + /* Bad operation: */ + rv = ERR_MOP(t.vcpu, INVALID, WRITE, mem1, t.size, GADDR_V(mem1)); + TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows bad operations"); + /* virtual addresses are not translated when passing INVALID */ + rv = ERR_MOP(t.vm, INVALID, WRITE, mem1, PAGE_SIZE, GADDR(0)); + TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows bad operations"); + + /* Bad access register: */ + t.run->psw_mask &= ~(3UL << (63 - 17)); + t.run->psw_mask |= 1UL << (63 - 17); /* Enable AR mode */ + HOST_SYNC(t.vcpu, STAGE_IDLED); /* To sync new state to SIE block */ + rv = ERR_MOP(t.vcpu, LOGICAL, WRITE, mem1, t.size, GADDR_V(mem1), AR(17)); + TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows ARs > 15"); + t.run->psw_mask &= ~(3UL << (63 - 17)); /* Disable AR mode */ + HOST_SYNC(t.vcpu, STAGE_IDLED); /* Run to sync new state */ + + /* Check that the SIDA calls are rejected for non-protected guests */ + rv = ERR_MOP(t.vcpu, SIDA, READ, mem1, 8, GADDR(0), SIDA_OFFSET(0x1c0)); + TEST_ASSERT(rv == -1 && errno == EINVAL, + "ioctl does not reject SIDA_READ in non-protected mode"); + rv = ERR_MOP(t.vcpu, SIDA, WRITE, mem1, 8, GADDR(0), SIDA_OFFSET(0x1c0)); + TEST_ASSERT(rv == -1 && errno == EINVAL, + "ioctl does not reject SIDA_WRITE in non-protected mode"); + + kvm_vm_free(t.kvm_vm); +} + +static void test_errors_cmpxchg(void) +{ + struct test_default t = test_default_init(guest_idle); + __uint128_t old; + int rv, i, power = 1; + + HOST_SYNC(t.vcpu, STAGE_INITED); + + for (i = 0; i < 32; i++) { + if (i == power) { + power *= 2; + continue; + } + rv = ERR_MOP(t.vm, ABSOLUTE, CMPXCHG, mem1, i, GADDR_V(mem1), + CMPXCHG_OLD(&old)); + TEST_ASSERT(rv == -1 && errno == EINVAL, + "ioctl allows bad size for cmpxchg"); + } + for (i = 1; i <= 16; i *= 2) { + rv = ERR_MOP(t.vm, ABSOLUTE, CMPXCHG, mem1, i, GADDR((void *)~0xfffUL), + CMPXCHG_OLD(&old)); + TEST_ASSERT(rv > 0, "ioctl allows bad guest address for cmpxchg"); + } + for (i = 2; i <= 16; i *= 2) { + rv = ERR_MOP(t.vm, ABSOLUTE, CMPXCHG, mem1, i, GADDR_V(mem1 + 1), + CMPXCHG_OLD(&old)); + TEST_ASSERT(rv == -1 && errno == EINVAL, + "ioctl allows bad alignment for cmpxchg"); + } + + kvm_vm_free(t.kvm_vm); +} + +int main(int argc, char *argv[]) +{ + int extension_cap, idx; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_MEM_OP)); + extension_cap = kvm_check_cap(KVM_CAP_S390_MEM_OP_EXTENSION); + + struct testdef { + const char *name; + void (*test)(void); + bool requirements_met; + } testlist[] = { + { + .name = "simple copy", + .test = test_copy, + .requirements_met = true, + }, + { + .name = "generic error checks", + .test = test_errors, + .requirements_met = true, + }, + { + .name = "copy with storage keys", + .test = test_copy_key, + .requirements_met = extension_cap > 0, + }, + { + .name = "cmpxchg with storage keys", + .test = test_cmpxchg_key, + .requirements_met = extension_cap & 0x2, + }, + { + .name = "concurrently cmpxchg with storage keys", + .test = test_cmpxchg_key_concurrent, + .requirements_met = extension_cap & 0x2, + }, + { + .name = "copy with key storage protection override", + .test = test_copy_key_storage_prot_override, + .requirements_met = extension_cap > 0, + }, + { + .name = "copy with key fetch protection", + .test = test_copy_key_fetch_prot, + .requirements_met = extension_cap > 0, + }, + { + .name = "copy with key fetch protection override", + .test = test_copy_key_fetch_prot_override, + .requirements_met = extension_cap > 0, + }, + { + .name = "copy with access register mode", + .test = test_copy_access_register, + .requirements_met = true, + }, + { + .name = "error checks with key", + .test = test_errors_key, + .requirements_met = extension_cap > 0, + }, + { + .name = "error checks for cmpxchg with key", + .test = test_errors_cmpxchg_key, + .requirements_met = extension_cap & 0x2, + }, + { + .name = "error checks for cmpxchg", + .test = test_errors_cmpxchg, + .requirements_met = extension_cap & 0x2, + }, + { + .name = "termination", + .test = test_termination, + .requirements_met = extension_cap > 0, + }, + { + .name = "error checks with key storage protection override", + .test = test_errors_key_storage_prot_override, + .requirements_met = extension_cap > 0, + }, + { + .name = "error checks without key fetch prot override", + .test = test_errors_key_fetch_prot_override_not_enabled, + .requirements_met = extension_cap > 0, + }, + { + .name = "error checks with key fetch prot override", + .test = test_errors_key_fetch_prot_override_enabled, + .requirements_met = extension_cap > 0, + }, + }; + + ksft_print_header(); + ksft_set_plan(ARRAY_SIZE(testlist)); + + for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) { + if (testlist[idx].requirements_met) { + testlist[idx].test(); + ksft_test_result_pass("%s\n", testlist[idx].name); + } else { + ksft_test_result_skip("%s - requirements not met (kernel has extension cap %#x)\n", + testlist[idx].name, extension_cap); + } + } + + ksft_finished(); /* Print results and exit() accordingly */ +} diff --git a/tools/testing/selftests/kvm/s390/resets.c b/tools/testing/selftests/kvm/s390/resets.c new file mode 100644 index 000000000000..b58f75b381e5 --- /dev/null +++ b/tools/testing/selftests/kvm/s390/resets.c @@ -0,0 +1,313 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Test for s390x CPU resets + * + * Copyright (C) 2020, IBM + */ + +#include +#include +#include +#include + +#include "test_util.h" +#include "kvm_util.h" +#include "kselftest.h" + +#define LOCAL_IRQS 32 + +#define ARBITRARY_NON_ZERO_VCPU_ID 3 + +struct kvm_s390_irq buf[ARBITRARY_NON_ZERO_VCPU_ID + LOCAL_IRQS]; + +static uint8_t regs_null[512]; + +static void guest_code_initial(void) +{ + /* set several CRs to "safe" value */ + unsigned long cr2_59 = 0x10; /* enable guarded storage */ + unsigned long cr8_63 = 0x1; /* monitor mask = 1 */ + unsigned long cr10 = 1; /* PER START */ + unsigned long cr11 = -1; /* PER END */ + + + /* Dirty registers */ + asm volatile ( + " lghi 2,0x11\n" /* Round toward 0 */ + " sfpc 2\n" /* set fpc to !=0 */ + " lctlg 2,2,%0\n" + " lctlg 8,8,%1\n" + " lctlg 10,10,%2\n" + " lctlg 11,11,%3\n" + /* now clobber some general purpose regs */ + " llihh 0,0xffff\n" + " llihl 1,0x5555\n" + " llilh 2,0xaaaa\n" + " llill 3,0x0000\n" + /* now clobber a floating point reg */ + " lghi 4,0x1\n" + " cdgbr 0,4\n" + /* now clobber an access reg */ + " sar 9,4\n" + /* We embed diag 501 here to control register content */ + " diag 0,0,0x501\n" + : + : "m" (cr2_59), "m" (cr8_63), "m" (cr10), "m" (cr11) + /* no clobber list as this should not return */ + ); +} + +static void test_one_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t value) +{ + uint64_t eval_reg; + + eval_reg = vcpu_get_reg(vcpu, id); + TEST_ASSERT(eval_reg == value, "value == 0x%lx", value); +} + +static void assert_noirq(struct kvm_vcpu *vcpu) +{ + struct kvm_s390_irq_state irq_state; + int irqs; + + irq_state.len = sizeof(buf); + irq_state.buf = (unsigned long)buf; + irqs = __vcpu_ioctl(vcpu, KVM_S390_GET_IRQ_STATE, &irq_state); + /* + * irqs contains the number of retrieved interrupts. Any interrupt + * (notably, the emergency call interrupt we have injected) should + * be cleared by the resets, so this should be 0. + */ + TEST_ASSERT(irqs >= 0, "Could not fetch IRQs: errno %d", errno); + TEST_ASSERT(!irqs, "IRQ pending"); +} + +static void assert_clear(struct kvm_vcpu *vcpu) +{ + struct kvm_sync_regs *sync_regs = &vcpu->run->s.regs; + struct kvm_sregs sregs; + struct kvm_regs regs; + struct kvm_fpu fpu; + + vcpu_regs_get(vcpu, ®s); + TEST_ASSERT(!memcmp(®s.gprs, regs_null, sizeof(regs.gprs)), "grs == 0"); + + vcpu_sregs_get(vcpu, &sregs); + TEST_ASSERT(!memcmp(&sregs.acrs, regs_null, sizeof(sregs.acrs)), "acrs == 0"); + + vcpu_fpu_get(vcpu, &fpu); + TEST_ASSERT(!memcmp(&fpu.fprs, regs_null, sizeof(fpu.fprs)), "fprs == 0"); + + /* sync regs */ + TEST_ASSERT(!memcmp(sync_regs->gprs, regs_null, sizeof(sync_regs->gprs)), + "gprs0-15 == 0 (sync_regs)"); + + TEST_ASSERT(!memcmp(sync_regs->acrs, regs_null, sizeof(sync_regs->acrs)), + "acrs0-15 == 0 (sync_regs)"); + + TEST_ASSERT(!memcmp(sync_regs->vrs, regs_null, sizeof(sync_regs->vrs)), + "vrs0-15 == 0 (sync_regs)"); +} + +static void assert_initial_noclear(struct kvm_vcpu *vcpu) +{ + struct kvm_sync_regs *sync_regs = &vcpu->run->s.regs; + + TEST_ASSERT(sync_regs->gprs[0] == 0xffff000000000000UL, + "gpr0 == 0xffff000000000000 (sync_regs)"); + TEST_ASSERT(sync_regs->gprs[1] == 0x0000555500000000UL, + "gpr1 == 0x0000555500000000 (sync_regs)"); + TEST_ASSERT(sync_regs->gprs[2] == 0x00000000aaaa0000UL, + "gpr2 == 0x00000000aaaa0000 (sync_regs)"); + TEST_ASSERT(sync_regs->gprs[3] == 0x0000000000000000UL, + "gpr3 == 0x0000000000000000 (sync_regs)"); + TEST_ASSERT(sync_regs->fprs[0] == 0x3ff0000000000000UL, + "fpr0 == 0f1 (sync_regs)"); + TEST_ASSERT(sync_regs->acrs[9] == 1, "ar9 == 1 (sync_regs)"); +} + +static void assert_initial(struct kvm_vcpu *vcpu) +{ + struct kvm_sync_regs *sync_regs = &vcpu->run->s.regs; + struct kvm_sregs sregs; + struct kvm_fpu fpu; + + /* KVM_GET_SREGS */ + vcpu_sregs_get(vcpu, &sregs); + TEST_ASSERT(sregs.crs[0] == 0xE0UL, "cr0 == 0xE0 (KVM_GET_SREGS)"); + TEST_ASSERT(sregs.crs[14] == 0xC2000000UL, + "cr14 == 0xC2000000 (KVM_GET_SREGS)"); + TEST_ASSERT(!memcmp(&sregs.crs[1], regs_null, sizeof(sregs.crs[1]) * 12), + "cr1-13 == 0 (KVM_GET_SREGS)"); + TEST_ASSERT(sregs.crs[15] == 0, "cr15 == 0 (KVM_GET_SREGS)"); + + /* sync regs */ + TEST_ASSERT(sync_regs->crs[0] == 0xE0UL, "cr0 == 0xE0 (sync_regs)"); + TEST_ASSERT(sync_regs->crs[14] == 0xC2000000UL, + "cr14 == 0xC2000000 (sync_regs)"); + TEST_ASSERT(!memcmp(&sync_regs->crs[1], regs_null, 8 * 12), + "cr1-13 == 0 (sync_regs)"); + TEST_ASSERT(sync_regs->crs[15] == 0, "cr15 == 0 (sync_regs)"); + TEST_ASSERT(sync_regs->fpc == 0, "fpc == 0 (sync_regs)"); + TEST_ASSERT(sync_regs->todpr == 0, "todpr == 0 (sync_regs)"); + TEST_ASSERT(sync_regs->cputm == 0, "cputm == 0 (sync_regs)"); + TEST_ASSERT(sync_regs->ckc == 0, "ckc == 0 (sync_regs)"); + TEST_ASSERT(sync_regs->pp == 0, "pp == 0 (sync_regs)"); + TEST_ASSERT(sync_regs->gbea == 1, "gbea == 1 (sync_regs)"); + + /* kvm_run */ + TEST_ASSERT(vcpu->run->psw_addr == 0, "psw_addr == 0 (kvm_run)"); + TEST_ASSERT(vcpu->run->psw_mask == 0, "psw_mask == 0 (kvm_run)"); + + vcpu_fpu_get(vcpu, &fpu); + TEST_ASSERT(!fpu.fpc, "fpc == 0"); + + test_one_reg(vcpu, KVM_REG_S390_GBEA, 1); + test_one_reg(vcpu, KVM_REG_S390_PP, 0); + test_one_reg(vcpu, KVM_REG_S390_TODPR, 0); + test_one_reg(vcpu, KVM_REG_S390_CPU_TIMER, 0); + test_one_reg(vcpu, KVM_REG_S390_CLOCK_COMP, 0); +} + +static void assert_normal_noclear(struct kvm_vcpu *vcpu) +{ + struct kvm_sync_regs *sync_regs = &vcpu->run->s.regs; + + TEST_ASSERT(sync_regs->crs[2] == 0x10, "cr2 == 10 (sync_regs)"); + TEST_ASSERT(sync_regs->crs[8] == 1, "cr10 == 1 (sync_regs)"); + TEST_ASSERT(sync_regs->crs[10] == 1, "cr10 == 1 (sync_regs)"); + TEST_ASSERT(sync_regs->crs[11] == -1, "cr11 == -1 (sync_regs)"); +} + +static void assert_normal(struct kvm_vcpu *vcpu) +{ + test_one_reg(vcpu, KVM_REG_S390_PFTOKEN, KVM_S390_PFAULT_TOKEN_INVALID); + TEST_ASSERT(vcpu->run->s.regs.pft == KVM_S390_PFAULT_TOKEN_INVALID, + "pft == 0xff..... (sync_regs)"); + assert_noirq(vcpu); +} + +static void inject_irq(struct kvm_vcpu *vcpu) +{ + struct kvm_s390_irq_state irq_state; + struct kvm_s390_irq *irq = &buf[0]; + int irqs; + + /* Inject IRQ */ + irq_state.len = sizeof(struct kvm_s390_irq); + irq_state.buf = (unsigned long)buf; + irq->type = KVM_S390_INT_EMERGENCY; + irq->u.emerg.code = vcpu->id; + irqs = __vcpu_ioctl(vcpu, KVM_S390_SET_IRQ_STATE, &irq_state); + TEST_ASSERT(irqs >= 0, "Error injecting EMERGENCY IRQ errno %d", errno); +} + +static struct kvm_vm *create_vm(struct kvm_vcpu **vcpu) +{ + struct kvm_vm *vm; + + vm = vm_create(1); + + *vcpu = vm_vcpu_add(vm, ARBITRARY_NON_ZERO_VCPU_ID, guest_code_initial); + + return vm; +} + +static void test_normal(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + ksft_print_msg("Testing normal reset\n"); + vm = create_vm(&vcpu); + + vcpu_run(vcpu); + + inject_irq(vcpu); + + vcpu_ioctl(vcpu, KVM_S390_NORMAL_RESET, NULL); + + /* must clears */ + assert_normal(vcpu); + /* must not clears */ + assert_normal_noclear(vcpu); + assert_initial_noclear(vcpu); + + kvm_vm_free(vm); +} + +static void test_initial(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + ksft_print_msg("Testing initial reset\n"); + vm = create_vm(&vcpu); + + vcpu_run(vcpu); + + inject_irq(vcpu); + + vcpu_ioctl(vcpu, KVM_S390_INITIAL_RESET, NULL); + + /* must clears */ + assert_normal(vcpu); + assert_initial(vcpu); + /* must not clears */ + assert_initial_noclear(vcpu); + + kvm_vm_free(vm); +} + +static void test_clear(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + ksft_print_msg("Testing clear reset\n"); + vm = create_vm(&vcpu); + + vcpu_run(vcpu); + + inject_irq(vcpu); + + vcpu_ioctl(vcpu, KVM_S390_CLEAR_RESET, NULL); + + /* must clears */ + assert_normal(vcpu); + assert_initial(vcpu); + assert_clear(vcpu); + + kvm_vm_free(vm); +} + +struct testdef { + const char *name; + void (*test)(void); + bool needs_cap; +} testlist[] = { + { "initial", test_initial, false }, + { "normal", test_normal, true }, + { "clear", test_clear, true }, +}; + +int main(int argc, char *argv[]) +{ + bool has_s390_vcpu_resets = kvm_check_cap(KVM_CAP_S390_VCPU_RESETS); + int idx; + + ksft_print_header(); + ksft_set_plan(ARRAY_SIZE(testlist)); + + for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) { + if (!testlist[idx].needs_cap || has_s390_vcpu_resets) { + testlist[idx].test(); + ksft_test_result_pass("%s\n", testlist[idx].name); + } else { + ksft_test_result_skip("%s - no VCPU_RESETS capability\n", + testlist[idx].name); + } + } + + ksft_finished(); /* Print results and exit() accordingly */ +} diff --git a/tools/testing/selftests/kvm/s390/shared_zeropage_test.c b/tools/testing/selftests/kvm/s390/shared_zeropage_test.c new file mode 100644 index 000000000000..bba0d9a6dcc8 --- /dev/null +++ b/tools/testing/selftests/kvm/s390/shared_zeropage_test.c @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Test shared zeropage handling (with/without storage keys) + * + * Copyright (C) 2024, Red Hat, Inc. + */ +#include + +#include + +#include "test_util.h" +#include "kvm_util.h" +#include "kselftest.h" +#include "ucall_common.h" + +static void set_storage_key(void *addr, uint8_t skey) +{ + asm volatile("sske %0,%1" : : "d" (skey), "a" (addr)); +} + +static void guest_code(void) +{ + /* Issue some storage key instruction. */ + set_storage_key((void *)0, 0x98); + GUEST_DONE(); +} + +/* + * Returns 1 if the shared zeropage is mapped, 0 if something else is mapped. + * Returns < 0 on error or if nothing is mapped. + */ +static int maps_shared_zeropage(int pagemap_fd, void *addr) +{ + struct page_region region; + struct pm_scan_arg arg = { + .start = (uintptr_t)addr, + .end = (uintptr_t)addr + 4096, + .vec = (uintptr_t)®ion, + .vec_len = 1, + .size = sizeof(struct pm_scan_arg), + .category_mask = PAGE_IS_PFNZERO, + .category_anyof_mask = PAGE_IS_PRESENT, + .return_mask = PAGE_IS_PFNZERO, + }; + return ioctl(pagemap_fd, PAGEMAP_SCAN, &arg); +} + +int main(int argc, char *argv[]) +{ + char *mem, *page0, *page1, *page2, tmp; + const size_t pagesize = getpagesize(); + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + struct ucall uc; + int pagemap_fd; + + ksft_print_header(); + ksft_set_plan(3); + + /* + * We'll use memory that is not mapped into the VM for simplicity. + * Shared zeropages are enabled/disabled per-process. + */ + mem = mmap(0, 3 * pagesize, PROT_READ, MAP_PRIVATE | MAP_ANON, -1, 0); + TEST_ASSERT(mem != MAP_FAILED, "mmap() failed"); + + /* Disable THP. Ignore errors on older kernels. */ + madvise(mem, 3 * pagesize, MADV_NOHUGEPAGE); + + page0 = mem; + page1 = page0 + pagesize; + page2 = page1 + pagesize; + + /* Can we even detect shared zeropages? */ + pagemap_fd = open("/proc/self/pagemap", O_RDONLY); + TEST_REQUIRE(pagemap_fd >= 0); + + tmp = *page0; + asm volatile("" : "+r" (tmp)); + TEST_REQUIRE(maps_shared_zeropage(pagemap_fd, page0) == 1); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + /* Verify that we get the shared zeropage after VM creation. */ + tmp = *page1; + asm volatile("" : "+r" (tmp)); + ksft_test_result(maps_shared_zeropage(pagemap_fd, page1) == 1, + "Shared zeropages should be enabled\n"); + + /* + * Let our VM execute a storage key instruction that should + * unshare all shared zeropages. + */ + vcpu_run(vcpu); + get_ucall(vcpu, &uc); + TEST_ASSERT_EQ(uc.cmd, UCALL_DONE); + + /* Verify that we don't have a shared zeropage anymore. */ + ksft_test_result(!maps_shared_zeropage(pagemap_fd, page1), + "Shared zeropage should be gone\n"); + + /* Verify that we don't get any new shared zeropages. */ + tmp = *page2; + asm volatile("" : "+r" (tmp)); + ksft_test_result(!maps_shared_zeropage(pagemap_fd, page2), + "Shared zeropages should be disabled\n"); + + kvm_vm_free(vm); + + ksft_finished(); +} diff --git a/tools/testing/selftests/kvm/s390/sync_regs_test.c b/tools/testing/selftests/kvm/s390/sync_regs_test.c new file mode 100644 index 000000000000..53def355ccba --- /dev/null +++ b/tools/testing/selftests/kvm/s390/sync_regs_test.c @@ -0,0 +1,238 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Test for s390x KVM_CAP_SYNC_REGS + * + * Based on the same test for x86: + * Copyright (C) 2018, Google LLC. + * + * Adaptions for s390x: + * Copyright (C) 2019, Red Hat, Inc. + * + * Test expected behavior of the KVM_CAP_SYNC_REGS functionality. + */ +#include +#include +#include +#include +#include + +#include "test_util.h" +#include "kvm_util.h" +#include "diag318_test_handler.h" +#include "kselftest.h" + +static void guest_code(void) +{ + /* + * We embed diag 501 here instead of doing a ucall to avoid that + * the compiler has messed with r11 at the time of the ucall. + */ + asm volatile ( + "0: diag 0,0,0x501\n" + " ahi 11,1\n" + " j 0b\n" + ); +} + +#define REG_COMPARE(reg) \ + TEST_ASSERT(left->reg == right->reg, \ + "Register " #reg \ + " values did not match: 0x%llx, 0x%llx", \ + left->reg, right->reg) + +#define REG_COMPARE32(reg) \ + TEST_ASSERT(left->reg == right->reg, \ + "Register " #reg \ + " values did not match: 0x%x, 0x%x", \ + left->reg, right->reg) + + +static void compare_regs(struct kvm_regs *left, struct kvm_sync_regs *right) +{ + int i; + + for (i = 0; i < 16; i++) + REG_COMPARE(gprs[i]); +} + +static void compare_sregs(struct kvm_sregs *left, struct kvm_sync_regs *right) +{ + int i; + + for (i = 0; i < 16; i++) + REG_COMPARE32(acrs[i]); + + for (i = 0; i < 16; i++) + REG_COMPARE(crs[i]); +} + +#undef REG_COMPARE + +#define TEST_SYNC_FIELDS (KVM_SYNC_GPRS|KVM_SYNC_ACRS|KVM_SYNC_CRS|KVM_SYNC_DIAG318) +#define INVALID_SYNC_FIELD 0x80000000 + +void test_read_invalid(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + int rv; + + /* Request reading invalid register set from VCPU. */ + run->kvm_valid_regs = INVALID_SYNC_FIELD; + rv = _vcpu_run(vcpu); + TEST_ASSERT(rv < 0 && errno == EINVAL, + "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d", + rv); + run->kvm_valid_regs = 0; + + run->kvm_valid_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS; + rv = _vcpu_run(vcpu); + TEST_ASSERT(rv < 0 && errno == EINVAL, + "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d", + rv); + run->kvm_valid_regs = 0; +} + +void test_set_invalid(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + int rv; + + /* Request setting invalid register set into VCPU. */ + run->kvm_dirty_regs = INVALID_SYNC_FIELD; + rv = _vcpu_run(vcpu); + TEST_ASSERT(rv < 0 && errno == EINVAL, + "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d", + rv); + run->kvm_dirty_regs = 0; + + run->kvm_dirty_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS; + rv = _vcpu_run(vcpu); + TEST_ASSERT(rv < 0 && errno == EINVAL, + "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d", + rv); + run->kvm_dirty_regs = 0; +} + +void test_req_and_verify_all_valid_regs(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + struct kvm_sregs sregs; + struct kvm_regs regs; + int rv; + + /* Request and verify all valid register sets. */ + run->kvm_valid_regs = TEST_SYNC_FIELDS; + rv = _vcpu_run(vcpu); + TEST_ASSERT(rv == 0, "vcpu_run failed: %d", rv); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC); + TEST_ASSERT(run->s390_sieic.icptcode == 4 && + (run->s390_sieic.ipa >> 8) == 0x83 && + (run->s390_sieic.ipb >> 16) == 0x501, + "Unexpected interception code: ic=%u, ipa=0x%x, ipb=0x%x", + run->s390_sieic.icptcode, run->s390_sieic.ipa, + run->s390_sieic.ipb); + + vcpu_regs_get(vcpu, ®s); + compare_regs(®s, &run->s.regs); + + vcpu_sregs_get(vcpu, &sregs); + compare_sregs(&sregs, &run->s.regs); +} + +void test_set_and_verify_various_reg_values(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + struct kvm_sregs sregs; + struct kvm_regs regs; + int rv; + + /* Set and verify various register values */ + run->s.regs.gprs[11] = 0xBAD1DEA; + run->s.regs.acrs[0] = 1 << 11; + + run->kvm_valid_regs = TEST_SYNC_FIELDS; + run->kvm_dirty_regs = KVM_SYNC_GPRS | KVM_SYNC_ACRS; + + if (get_diag318_info() > 0) { + run->s.regs.diag318 = get_diag318_info(); + run->kvm_dirty_regs |= KVM_SYNC_DIAG318; + } + + rv = _vcpu_run(vcpu); + TEST_ASSERT(rv == 0, "vcpu_run failed: %d", rv); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC); + TEST_ASSERT(run->s.regs.gprs[11] == 0xBAD1DEA + 1, + "r11 sync regs value incorrect 0x%llx.", + run->s.regs.gprs[11]); + TEST_ASSERT(run->s.regs.acrs[0] == 1 << 11, + "acr0 sync regs value incorrect 0x%x.", + run->s.regs.acrs[0]); + TEST_ASSERT(run->s.regs.diag318 == get_diag318_info(), + "diag318 sync regs value incorrect 0x%llx.", + run->s.regs.diag318); + + vcpu_regs_get(vcpu, ®s); + compare_regs(®s, &run->s.regs); + + vcpu_sregs_get(vcpu, &sregs); + compare_sregs(&sregs, &run->s.regs); +} + +void test_clear_kvm_dirty_regs_bits(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + int rv; + + /* Clear kvm_dirty_regs bits, verify new s.regs values are + * overwritten with existing guest values. + */ + run->kvm_valid_regs = TEST_SYNC_FIELDS; + run->kvm_dirty_regs = 0; + run->s.regs.gprs[11] = 0xDEADBEEF; + run->s.regs.diag318 = 0x4B1D; + rv = _vcpu_run(vcpu); + TEST_ASSERT(rv == 0, "vcpu_run failed: %d", rv); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC); + TEST_ASSERT(run->s.regs.gprs[11] != 0xDEADBEEF, + "r11 sync regs value incorrect 0x%llx.", + run->s.regs.gprs[11]); + TEST_ASSERT(run->s.regs.diag318 != 0x4B1D, + "diag318 sync regs value incorrect 0x%llx.", + run->s.regs.diag318); +} + +struct testdef { + const char *name; + void (*test)(struct kvm_vcpu *vcpu); +} testlist[] = { + { "read invalid", test_read_invalid }, + { "set invalid", test_set_invalid }, + { "request+verify all valid regs", test_req_and_verify_all_valid_regs }, + { "set+verify various regs", test_set_and_verify_various_reg_values }, + { "clear kvm_dirty_regs bits", test_clear_kvm_dirty_regs_bits }, +}; + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + int idx; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_SYNC_REGS)); + + ksft_print_header(); + + ksft_set_plan(ARRAY_SIZE(testlist)); + + /* Create VM */ + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) { + testlist[idx].test(vcpu); + ksft_test_result_pass("%s\n", testlist[idx].name); + } + + kvm_vm_free(vm); + + ksft_finished(); /* Print results and exit() accordingly */ +} diff --git a/tools/testing/selftests/kvm/s390/tprot.c b/tools/testing/selftests/kvm/s390/tprot.c new file mode 100644 index 000000000000..12d5e1cb62e3 --- /dev/null +++ b/tools/testing/selftests/kvm/s390/tprot.c @@ -0,0 +1,244 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Test TEST PROTECTION emulation. + * + * Copyright IBM Corp. 2021 + */ +#include +#include "test_util.h" +#include "kvm_util.h" +#include "kselftest.h" +#include "ucall_common.h" +#include "processor.h" + +#define CR0_FETCH_PROTECTION_OVERRIDE (1UL << (63 - 38)) +#define CR0_STORAGE_PROTECTION_OVERRIDE (1UL << (63 - 39)) + +static __aligned(PAGE_SIZE) uint8_t pages[2][PAGE_SIZE]; +static uint8_t *const page_store_prot = pages[0]; +static uint8_t *const page_fetch_prot = pages[1]; + +/* Nonzero return value indicates that address not mapped */ +static int set_storage_key(void *addr, uint8_t key) +{ + int not_mapped = 0; + + asm volatile ( + "lra %[addr], 0(0,%[addr])\n" + " jz 0f\n" + " llill %[not_mapped],1\n" + " j 1f\n" + "0: sske %[key], %[addr]\n" + "1:" + : [addr] "+&a" (addr), [not_mapped] "+r" (not_mapped) + : [key] "r" (key) + : "cc" + ); + return -not_mapped; +} + +enum permission { + READ_WRITE = 0, + READ = 1, + RW_PROTECTED = 2, + TRANSL_UNAVAIL = 3, +}; + +static enum permission test_protection(void *addr, uint8_t key) +{ + uint64_t mask; + + asm volatile ( + "tprot %[addr], 0(%[key])\n" + " ipm %[mask]\n" + : [mask] "=r" (mask) + : [addr] "Q" (*(char *)addr), + [key] "a" (key) + : "cc" + ); + + return (enum permission)(mask >> 28); +} + +enum stage { + STAGE_INIT_SIMPLE, + TEST_SIMPLE, + STAGE_INIT_FETCH_PROT_OVERRIDE, + TEST_FETCH_PROT_OVERRIDE, + TEST_STORAGE_PROT_OVERRIDE, + STAGE_END /* must be the last entry (it's the amount of tests) */ +}; + +struct test { + enum stage stage; + void *addr; + uint8_t key; + enum permission expected; +} tests[] = { + /* + * We perform each test in the array by executing TEST PROTECTION on + * the specified addr with the specified key and checking if the returned + * permissions match the expected value. + * Both guest and host cooperate to set up the required test conditions. + * A central condition is that the page targeted by addr has to be DAT + * protected in the host mappings, in order for KVM to emulate the + * TEST PROTECTION instruction. + * Since the page tables are shared, the host uses mprotect to achieve + * this. + * + * Test resulting in RW_PROTECTED/TRANSL_UNAVAIL will be interpreted + * by SIE, not KVM, but there is no harm in testing them also. + * See Enhanced Suppression-on-Protection Facilities in the + * Interpretive-Execution Mode + */ + /* + * guest: set storage key of page_store_prot to 1 + * storage key of page_fetch_prot to 9 and enable + * protection for it + * STAGE_INIT_SIMPLE + * host: write protect both via mprotect + */ + /* access key 0 matches any storage key -> RW */ + { TEST_SIMPLE, page_store_prot, 0x00, READ_WRITE }, + /* access key matches storage key -> RW */ + { TEST_SIMPLE, page_store_prot, 0x10, READ_WRITE }, + /* mismatched keys, but no fetch protection -> RO */ + { TEST_SIMPLE, page_store_prot, 0x20, READ }, + /* access key 0 matches any storage key -> RW */ + { TEST_SIMPLE, page_fetch_prot, 0x00, READ_WRITE }, + /* access key matches storage key -> RW */ + { TEST_SIMPLE, page_fetch_prot, 0x90, READ_WRITE }, + /* mismatched keys, fetch protection -> inaccessible */ + { TEST_SIMPLE, page_fetch_prot, 0x10, RW_PROTECTED }, + /* page 0 not mapped yet -> translation not available */ + { TEST_SIMPLE, (void *)0x00, 0x10, TRANSL_UNAVAIL }, + /* + * host: try to map page 0 + * guest: set storage key of page 0 to 9 and enable fetch protection + * STAGE_INIT_FETCH_PROT_OVERRIDE + * host: write protect page 0 + * enable fetch protection override + */ + /* mismatched keys, fetch protection, but override applies -> RO */ + { TEST_FETCH_PROT_OVERRIDE, (void *)0x00, 0x10, READ }, + /* mismatched keys, fetch protection, override applies to 0-2048 only -> inaccessible */ + { TEST_FETCH_PROT_OVERRIDE, (void *)2049, 0x10, RW_PROTECTED }, + /* + * host: enable storage protection override + */ + /* mismatched keys, but override applies (storage key 9) -> RW */ + { TEST_STORAGE_PROT_OVERRIDE, page_fetch_prot, 0x10, READ_WRITE }, + /* mismatched keys, no fetch protection, override doesn't apply -> RO */ + { TEST_STORAGE_PROT_OVERRIDE, page_store_prot, 0x20, READ }, + /* mismatched keys, but override applies (storage key 9) -> RW */ + { TEST_STORAGE_PROT_OVERRIDE, (void *)2049, 0x10, READ_WRITE }, + /* end marker */ + { STAGE_END, 0, 0, 0 }, +}; + +static enum stage perform_next_stage(int *i, bool mapped_0) +{ + enum stage stage = tests[*i].stage; + enum permission result; + bool skip; + + for (; tests[*i].stage == stage; (*i)++) { + /* + * Some fetch protection override tests require that page 0 + * be mapped, however, when the hosts tries to map that page via + * vm_vaddr_alloc, it may happen that some other page gets mapped + * instead. + * In order to skip these tests we detect this inside the guest + */ + skip = tests[*i].addr < (void *)PAGE_SIZE && + tests[*i].expected != TRANSL_UNAVAIL && + !mapped_0; + if (!skip) { + result = test_protection(tests[*i].addr, tests[*i].key); + __GUEST_ASSERT(result == tests[*i].expected, + "Wanted %u, got %u, for i = %u", + tests[*i].expected, result, *i); + } + } + return stage; +} + +static void guest_code(void) +{ + bool mapped_0; + int i = 0; + + GUEST_ASSERT_EQ(set_storage_key(page_store_prot, 0x10), 0); + GUEST_ASSERT_EQ(set_storage_key(page_fetch_prot, 0x98), 0); + GUEST_SYNC(STAGE_INIT_SIMPLE); + GUEST_SYNC(perform_next_stage(&i, false)); + + /* Fetch-protection override */ + mapped_0 = !set_storage_key((void *)0, 0x98); + GUEST_SYNC(STAGE_INIT_FETCH_PROT_OVERRIDE); + GUEST_SYNC(perform_next_stage(&i, mapped_0)); + + /* Storage-protection override */ + GUEST_SYNC(perform_next_stage(&i, mapped_0)); +} + +#define HOST_SYNC_NO_TAP(vcpup, stage) \ +({ \ + struct kvm_vcpu *__vcpu = (vcpup); \ + struct ucall uc; \ + int __stage = (stage); \ + \ + vcpu_run(__vcpu); \ + get_ucall(__vcpu, &uc); \ + if (uc.cmd == UCALL_ABORT) \ + REPORT_GUEST_ASSERT(uc); \ + TEST_ASSERT_EQ(uc.cmd, UCALL_SYNC); \ + TEST_ASSERT_EQ(uc.args[1], __stage); \ +}) + +#define HOST_SYNC(vcpu, stage) \ +({ \ + HOST_SYNC_NO_TAP(vcpu, stage); \ + ksft_test_result_pass("" #stage "\n"); \ +}) + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + struct kvm_run *run; + vm_vaddr_t guest_0_page; + + ksft_print_header(); + ksft_set_plan(STAGE_END); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + run = vcpu->run; + + HOST_SYNC(vcpu, STAGE_INIT_SIMPLE); + mprotect(addr_gva2hva(vm, (vm_vaddr_t)pages), PAGE_SIZE * 2, PROT_READ); + HOST_SYNC(vcpu, TEST_SIMPLE); + + guest_0_page = vm_vaddr_alloc(vm, PAGE_SIZE, 0); + if (guest_0_page != 0) { + /* Use NO_TAP so we don't get a PASS print */ + HOST_SYNC_NO_TAP(vcpu, STAGE_INIT_FETCH_PROT_OVERRIDE); + ksft_test_result_skip("STAGE_INIT_FETCH_PROT_OVERRIDE - " + "Did not allocate page at 0\n"); + } else { + HOST_SYNC(vcpu, STAGE_INIT_FETCH_PROT_OVERRIDE); + } + if (guest_0_page == 0) + mprotect(addr_gva2hva(vm, (vm_vaddr_t)0), PAGE_SIZE, PROT_READ); + run->s.regs.crs[0] |= CR0_FETCH_PROTECTION_OVERRIDE; + run->kvm_dirty_regs = KVM_SYNC_CRS; + HOST_SYNC(vcpu, TEST_FETCH_PROT_OVERRIDE); + + run->s.regs.crs[0] |= CR0_STORAGE_PROTECTION_OVERRIDE; + run->kvm_dirty_regs = KVM_SYNC_CRS; + HOST_SYNC(vcpu, TEST_STORAGE_PROT_OVERRIDE); + + kvm_vm_free(vm); + + ksft_finished(); /* Print results and exit() accordingly */ +} diff --git a/tools/testing/selftests/kvm/s390/ucontrol_test.c b/tools/testing/selftests/kvm/s390/ucontrol_test.c new file mode 100644 index 000000000000..0c112319dab1 --- /dev/null +++ b/tools/testing/selftests/kvm/s390/ucontrol_test.c @@ -0,0 +1,638 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Test code for the s390x kvm ucontrol interface + * + * Copyright IBM Corp. 2024 + * + * Authors: + * Christoph Schlameuss + */ +#include "debug_print.h" +#include "kselftest_harness.h" +#include "kvm_util.h" +#include "processor.h" +#include "sie.h" + +#include +#include + +#define PGM_SEGMENT_TRANSLATION 0x10 + +#define VM_MEM_SIZE (4 * SZ_1M) +#define VM_MEM_EXT_SIZE (2 * SZ_1M) +#define VM_MEM_MAX_M ((VM_MEM_SIZE + VM_MEM_EXT_SIZE) / SZ_1M) + +/* so directly declare capget to check caps without libcap */ +int capget(cap_user_header_t header, cap_user_data_t data); + +/** + * In order to create user controlled virtual machines on S390, + * check KVM_CAP_S390_UCONTROL and use the flag KVM_VM_S390_UCONTROL + * as privileged user (SYS_ADMIN). + */ +void require_ucontrol_admin(void) +{ + struct __user_cap_data_struct data[_LINUX_CAPABILITY_U32S_3]; + struct __user_cap_header_struct hdr = { + .version = _LINUX_CAPABILITY_VERSION_3, + }; + int rc; + + rc = capget(&hdr, data); + TEST_ASSERT_EQ(0, rc); + TEST_REQUIRE((data->effective & CAP_TO_MASK(CAP_SYS_ADMIN)) > 0); + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_UCONTROL)); +} + +/* Test program setting some registers and looping */ +extern char test_gprs_asm[]; +asm("test_gprs_asm:\n" + "xgr %r0, %r0\n" + "lgfi %r1,1\n" + "lgfi %r2,2\n" + "lgfi %r3,3\n" + "lgfi %r4,4\n" + "lgfi %r5,5\n" + "lgfi %r6,6\n" + "lgfi %r7,7\n" + "0:\n" + " diag 0,0,0x44\n" + " ahi %r0,1\n" + " j 0b\n" +); + +/* Test program manipulating memory */ +extern char test_mem_asm[]; +asm("test_mem_asm:\n" + "xgr %r0, %r0\n" + + "0:\n" + " ahi %r0,1\n" + " st %r1,0(%r5,%r6)\n" + + " xgr %r1,%r1\n" + " l %r1,0(%r5,%r6)\n" + " ahi %r0,1\n" + " diag 0,0,0x44\n" + + " j 0b\n" +); + +/* Test program manipulating storage keys */ +extern char test_skey_asm[]; +asm("test_skey_asm:\n" + "xgr %r0, %r0\n" + + "0:\n" + " ahi %r0,1\n" + " st %r1,0(%r5,%r6)\n" + + " iske %r1,%r6\n" + " ahi %r0,1\n" + " diag 0,0,0x44\n" + + " sske %r1,%r6\n" + " xgr %r1,%r1\n" + " iske %r1,%r6\n" + " ahi %r0,1\n" + " diag 0,0,0x44\n" + + " rrbe %r1,%r6\n" + " iske %r1,%r6\n" + " ahi %r0,1\n" + " diag 0,0,0x44\n" + + " j 0b\n" +); + +FIXTURE(uc_kvm) +{ + struct kvm_s390_sie_block *sie_block; + struct kvm_run *run; + uintptr_t base_gpa; + uintptr_t code_gpa; + uintptr_t base_hva; + uintptr_t code_hva; + int kvm_run_size; + vm_paddr_t pgd; + void *vm_mem; + int vcpu_fd; + int kvm_fd; + int vm_fd; +}; + +/** + * create VM with single vcpu, map kvm_run and SIE control block for easy access + */ +FIXTURE_SETUP(uc_kvm) +{ + struct kvm_s390_vm_cpu_processor info; + int rc; + + require_ucontrol_admin(); + + self->kvm_fd = open_kvm_dev_path_or_exit(); + self->vm_fd = ioctl(self->kvm_fd, KVM_CREATE_VM, KVM_VM_S390_UCONTROL); + ASSERT_GE(self->vm_fd, 0); + + kvm_device_attr_get(self->vm_fd, KVM_S390_VM_CPU_MODEL, + KVM_S390_VM_CPU_PROCESSOR, &info); + TH_LOG("create VM 0x%llx", info.cpuid); + + self->vcpu_fd = ioctl(self->vm_fd, KVM_CREATE_VCPU, 0); + ASSERT_GE(self->vcpu_fd, 0); + + self->kvm_run_size = ioctl(self->kvm_fd, KVM_GET_VCPU_MMAP_SIZE, NULL); + ASSERT_GE(self->kvm_run_size, sizeof(struct kvm_run)) + TH_LOG(KVM_IOCTL_ERROR(KVM_GET_VCPU_MMAP_SIZE, self->kvm_run_size)); + self->run = (struct kvm_run *)mmap(NULL, self->kvm_run_size, + PROT_READ | PROT_WRITE, MAP_SHARED, self->vcpu_fd, 0); + ASSERT_NE(self->run, MAP_FAILED); + /** + * For virtual cpus that have been created with S390 user controlled + * virtual machines, the resulting vcpu fd can be memory mapped at page + * offset KVM_S390_SIE_PAGE_OFFSET in order to obtain a memory map of + * the virtual cpu's hardware control block. + */ + self->sie_block = (struct kvm_s390_sie_block *)mmap(NULL, PAGE_SIZE, + PROT_READ | PROT_WRITE, MAP_SHARED, + self->vcpu_fd, KVM_S390_SIE_PAGE_OFFSET << PAGE_SHIFT); + ASSERT_NE(self->sie_block, MAP_FAILED); + + TH_LOG("VM created %p %p", self->run, self->sie_block); + + self->base_gpa = 0; + self->code_gpa = self->base_gpa + (3 * SZ_1M); + + self->vm_mem = aligned_alloc(SZ_1M, VM_MEM_MAX_M * SZ_1M); + ASSERT_NE(NULL, self->vm_mem) TH_LOG("malloc failed %u", errno); + self->base_hva = (uintptr_t)self->vm_mem; + self->code_hva = self->base_hva - self->base_gpa + self->code_gpa; + struct kvm_s390_ucas_mapping map = { + .user_addr = self->base_hva, + .vcpu_addr = self->base_gpa, + .length = VM_MEM_SIZE, + }; + TH_LOG("ucas map %p %p 0x%llx", + (void *)map.user_addr, (void *)map.vcpu_addr, map.length); + rc = ioctl(self->vcpu_fd, KVM_S390_UCAS_MAP, &map); + ASSERT_EQ(0, rc) TH_LOG("ucas map result %d not expected, %s", + rc, strerror(errno)); + + TH_LOG("page in %p", (void *)self->base_gpa); + rc = ioctl(self->vcpu_fd, KVM_S390_VCPU_FAULT, self->base_gpa); + ASSERT_EQ(0, rc) TH_LOG("vcpu fault (%p) result %d not expected, %s", + (void *)self->base_hva, rc, strerror(errno)); + + self->sie_block->cpuflags &= ~CPUSTAT_STOPPED; +} + +FIXTURE_TEARDOWN(uc_kvm) +{ + munmap(self->sie_block, PAGE_SIZE); + munmap(self->run, self->kvm_run_size); + close(self->vcpu_fd); + close(self->vm_fd); + close(self->kvm_fd); + free(self->vm_mem); +} + +TEST_F(uc_kvm, uc_sie_assertions) +{ + /* assert interception of Code 08 (Program Interruption) is set */ + EXPECT_EQ(0, self->sie_block->ecb & ECB_SPECI); +} + +TEST_F(uc_kvm, uc_attr_mem_limit) +{ + u64 limit; + struct kvm_device_attr attr = { + .group = KVM_S390_VM_MEM_CTRL, + .attr = KVM_S390_VM_MEM_LIMIT_SIZE, + .addr = (unsigned long)&limit, + }; + int rc; + + rc = ioctl(self->vm_fd, KVM_GET_DEVICE_ATTR, &attr); + EXPECT_EQ(0, rc); + EXPECT_EQ(~0UL, limit); + + /* assert set not supported */ + rc = ioctl(self->vm_fd, KVM_SET_DEVICE_ATTR, &attr); + EXPECT_EQ(-1, rc); + EXPECT_EQ(EINVAL, errno); +} + +TEST_F(uc_kvm, uc_no_dirty_log) +{ + struct kvm_dirty_log dlog; + int rc; + + rc = ioctl(self->vm_fd, KVM_GET_DIRTY_LOG, &dlog); + EXPECT_EQ(-1, rc); + EXPECT_EQ(EINVAL, errno); +} + +/** + * Assert HPAGE CAP cannot be enabled on UCONTROL VM + */ +TEST(uc_cap_hpage) +{ + int rc, kvm_fd, vm_fd, vcpu_fd; + struct kvm_enable_cap cap = { + .cap = KVM_CAP_S390_HPAGE_1M, + }; + + require_ucontrol_admin(); + + kvm_fd = open_kvm_dev_path_or_exit(); + vm_fd = ioctl(kvm_fd, KVM_CREATE_VM, KVM_VM_S390_UCONTROL); + ASSERT_GE(vm_fd, 0); + + /* assert hpages are not supported on ucontrol vm */ + rc = ioctl(vm_fd, KVM_CHECK_EXTENSION, KVM_CAP_S390_HPAGE_1M); + EXPECT_EQ(0, rc); + + /* Test that KVM_CAP_S390_HPAGE_1M can't be enabled for a ucontrol vm */ + rc = ioctl(vm_fd, KVM_ENABLE_CAP, cap); + EXPECT_EQ(-1, rc); + EXPECT_EQ(EINVAL, errno); + + /* assert HPAGE CAP is rejected after vCPU creation */ + vcpu_fd = ioctl(vm_fd, KVM_CREATE_VCPU, 0); + ASSERT_GE(vcpu_fd, 0); + rc = ioctl(vm_fd, KVM_ENABLE_CAP, cap); + EXPECT_EQ(-1, rc); + EXPECT_EQ(EBUSY, errno); + + close(vcpu_fd); + close(vm_fd); + close(kvm_fd); +} + +/* calculate host virtual addr from guest physical addr */ +static void *gpa2hva(FIXTURE_DATA(uc_kvm) *self, u64 gpa) +{ + return (void *)(self->base_hva - self->base_gpa + gpa); +} + +/* map / make additional memory available */ +static int uc_map_ext(FIXTURE_DATA(uc_kvm) *self, u64 vcpu_addr, u64 length) +{ + struct kvm_s390_ucas_mapping map = { + .user_addr = (u64)gpa2hva(self, vcpu_addr), + .vcpu_addr = vcpu_addr, + .length = length, + }; + pr_info("ucas map %p %p 0x%llx", + (void *)map.user_addr, (void *)map.vcpu_addr, map.length); + return ioctl(self->vcpu_fd, KVM_S390_UCAS_MAP, &map); +} + +/* unmap previously mapped memory */ +static int uc_unmap_ext(FIXTURE_DATA(uc_kvm) *self, u64 vcpu_addr, u64 length) +{ + struct kvm_s390_ucas_mapping map = { + .user_addr = (u64)gpa2hva(self, vcpu_addr), + .vcpu_addr = vcpu_addr, + .length = length, + }; + pr_info("ucas unmap %p %p 0x%llx", + (void *)map.user_addr, (void *)map.vcpu_addr, map.length); + return ioctl(self->vcpu_fd, KVM_S390_UCAS_UNMAP, &map); +} + +/* handle ucontrol exit by mapping the accessed segment */ +static void uc_handle_exit_ucontrol(FIXTURE_DATA(uc_kvm) *self) +{ + struct kvm_run *run = self->run; + u64 seg_addr; + int rc; + + TEST_ASSERT_EQ(KVM_EXIT_S390_UCONTROL, run->exit_reason); + switch (run->s390_ucontrol.pgm_code) { + case PGM_SEGMENT_TRANSLATION: + seg_addr = run->s390_ucontrol.trans_exc_code & ~(SZ_1M - 1); + pr_info("ucontrol pic segment translation 0x%llx, mapping segment 0x%lx\n", + run->s390_ucontrol.trans_exc_code, seg_addr); + /* map / make additional memory available */ + rc = uc_map_ext(self, seg_addr, SZ_1M); + TEST_ASSERT_EQ(0, rc); + break; + default: + TEST_FAIL("UNEXPECTED PGM CODE %d", run->s390_ucontrol.pgm_code); + } +} + +/* + * Handle the SIEIC exit + * * fail on codes not expected in the test cases + * Returns if interception is handled / execution can be continued + */ +static void uc_skey_enable(FIXTURE_DATA(uc_kvm) *self) +{ + struct kvm_s390_sie_block *sie_block = self->sie_block; + + /* disable KSS */ + sie_block->cpuflags &= ~CPUSTAT_KSS; + /* disable skey inst interception */ + sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE); +} + +/* + * Handle the instruction intercept + * Returns if interception is handled / execution can be continued + */ +static bool uc_handle_insn_ic(FIXTURE_DATA(uc_kvm) *self) +{ + struct kvm_s390_sie_block *sie_block = self->sie_block; + int ilen = insn_length(sie_block->ipa >> 8); + struct kvm_run *run = self->run; + + switch (run->s390_sieic.ipa) { + case 0xB229: /* ISKE */ + case 0xB22b: /* SSKE */ + case 0xB22a: /* RRBE */ + uc_skey_enable(self); + + /* rewind to reexecute intercepted instruction */ + run->psw_addr = run->psw_addr - ilen; + pr_info("rewind guest addr to 0x%.16llx\n", run->psw_addr); + return true; + default: + return false; + } +} + +/* + * Handle the SIEIC exit + * * fail on codes not expected in the test cases + * Returns if interception is handled / execution can be continued + */ +static bool uc_handle_sieic(FIXTURE_DATA(uc_kvm) *self) +{ + struct kvm_s390_sie_block *sie_block = self->sie_block; + struct kvm_run *run = self->run; + + /* check SIE interception code */ + pr_info("sieic: 0x%.2x 0x%.4x 0x%.8x\n", + run->s390_sieic.icptcode, + run->s390_sieic.ipa, + run->s390_sieic.ipb); + switch (run->s390_sieic.icptcode) { + case ICPT_INST: + /* end execution in caller on intercepted instruction */ + pr_info("sie instruction interception\n"); + return uc_handle_insn_ic(self); + case ICPT_KSS: + uc_skey_enable(self); + return true; + case ICPT_OPEREXC: + /* operation exception */ + TEST_FAIL("sie exception on %.4x%.8x", sie_block->ipa, sie_block->ipb); + default: + TEST_FAIL("UNEXPECTED SIEIC CODE %d", run->s390_sieic.icptcode); + } + return true; +} + +/* verify VM state on exit */ +static bool uc_handle_exit(FIXTURE_DATA(uc_kvm) *self) +{ + struct kvm_run *run = self->run; + + switch (run->exit_reason) { + case KVM_EXIT_S390_UCONTROL: + /** check program interruption code + * handle page fault --> ucas map + */ + uc_handle_exit_ucontrol(self); + break; + case KVM_EXIT_S390_SIEIC: + return uc_handle_sieic(self); + default: + pr_info("exit_reason %2d not handled\n", run->exit_reason); + } + return true; +} + +/* run the VM until interrupted */ +static int uc_run_once(FIXTURE_DATA(uc_kvm) *self) +{ + int rc; + + rc = ioctl(self->vcpu_fd, KVM_RUN, NULL); + print_run(self->run, self->sie_block); + print_regs(self->run); + pr_debug("run %d / %d %s\n", rc, errno, strerror(errno)); + return rc; +} + +static void uc_assert_diag44(FIXTURE_DATA(uc_kvm) *self) +{ + struct kvm_s390_sie_block *sie_block = self->sie_block; + + /* assert vm was interrupted by diag 0x0044 */ + TEST_ASSERT_EQ(KVM_EXIT_S390_SIEIC, self->run->exit_reason); + TEST_ASSERT_EQ(ICPT_INST, sie_block->icptcode); + TEST_ASSERT_EQ(0x8300, sie_block->ipa); + TEST_ASSERT_EQ(0x440000, sie_block->ipb); +} + +TEST_F(uc_kvm, uc_no_user_region) +{ + struct kvm_userspace_memory_region region = { + .slot = 1, + .guest_phys_addr = self->code_gpa, + .memory_size = VM_MEM_EXT_SIZE, + .userspace_addr = (uintptr_t)self->code_hva, + }; + struct kvm_userspace_memory_region2 region2 = { + .slot = 1, + .guest_phys_addr = self->code_gpa, + .memory_size = VM_MEM_EXT_SIZE, + .userspace_addr = (uintptr_t)self->code_hva, + }; + + ASSERT_EQ(-1, ioctl(self->vm_fd, KVM_SET_USER_MEMORY_REGION, ®ion)); + ASSERT_EQ(EINVAL, errno); + + ASSERT_EQ(-1, ioctl(self->vm_fd, KVM_SET_USER_MEMORY_REGION2, ®ion2)); + ASSERT_EQ(EINVAL, errno); +} + +TEST_F(uc_kvm, uc_map_unmap) +{ + struct kvm_sync_regs *sync_regs = &self->run->s.regs; + struct kvm_run *run = self->run; + const u64 disp = 1; + int rc; + + /* copy test_mem_asm to code_hva / code_gpa */ + TH_LOG("copy code %p to vm mapped memory %p / %p", + &test_mem_asm, (void *)self->code_hva, (void *)self->code_gpa); + memcpy((void *)self->code_hva, &test_mem_asm, PAGE_SIZE); + + /* DAT disabled + 64 bit mode */ + run->psw_mask = 0x0000000180000000ULL; + run->psw_addr = self->code_gpa; + + /* set register content for test_mem_asm to access not mapped memory*/ + sync_regs->gprs[1] = 0x55; + sync_regs->gprs[5] = self->base_gpa; + sync_regs->gprs[6] = VM_MEM_SIZE + disp; + run->kvm_dirty_regs |= KVM_SYNC_GPRS; + + /* run and expect to fail with ucontrol pic segment translation */ + ASSERT_EQ(0, uc_run_once(self)); + ASSERT_EQ(1, sync_regs->gprs[0]); + ASSERT_EQ(KVM_EXIT_S390_UCONTROL, run->exit_reason); + + ASSERT_EQ(PGM_SEGMENT_TRANSLATION, run->s390_ucontrol.pgm_code); + ASSERT_EQ(self->base_gpa + VM_MEM_SIZE, run->s390_ucontrol.trans_exc_code); + + /* fail to map memory with not segment aligned address */ + rc = uc_map_ext(self, self->base_gpa + VM_MEM_SIZE + disp, VM_MEM_EXT_SIZE); + ASSERT_GT(0, rc) + TH_LOG("ucas map for non segment address should fail but didn't; " + "result %d not expected, %s", rc, strerror(errno)); + + /* map / make additional memory available */ + rc = uc_map_ext(self, self->base_gpa + VM_MEM_SIZE, VM_MEM_EXT_SIZE); + ASSERT_EQ(0, rc) + TH_LOG("ucas map result %d not expected, %s", rc, strerror(errno)); + ASSERT_EQ(0, uc_run_once(self)); + ASSERT_EQ(false, uc_handle_exit(self)); + uc_assert_diag44(self); + + /* assert registers and memory are in expected state */ + ASSERT_EQ(2, sync_regs->gprs[0]); + ASSERT_EQ(0x55, sync_regs->gprs[1]); + ASSERT_EQ(0x55, *(u32 *)gpa2hva(self, self->base_gpa + VM_MEM_SIZE + disp)); + + /* unmap and run loop again */ + rc = uc_unmap_ext(self, self->base_gpa + VM_MEM_SIZE, VM_MEM_EXT_SIZE); + ASSERT_EQ(0, rc) + TH_LOG("ucas unmap result %d not expected, %s", rc, strerror(errno)); + ASSERT_EQ(0, uc_run_once(self)); + ASSERT_EQ(3, sync_regs->gprs[0]); + ASSERT_EQ(KVM_EXIT_S390_UCONTROL, run->exit_reason); + ASSERT_EQ(PGM_SEGMENT_TRANSLATION, run->s390_ucontrol.pgm_code); + /* handle ucontrol exit and remap memory after previous map and unmap */ + ASSERT_EQ(true, uc_handle_exit(self)); +} + +TEST_F(uc_kvm, uc_gprs) +{ + struct kvm_sync_regs *sync_regs = &self->run->s.regs; + struct kvm_run *run = self->run; + struct kvm_regs regs = {}; + + /* Set registers to values that are different from the ones that we expect below */ + for (int i = 0; i < 8; i++) + sync_regs->gprs[i] = 8; + run->kvm_dirty_regs |= KVM_SYNC_GPRS; + + /* copy test_gprs_asm to code_hva / code_gpa */ + TH_LOG("copy code %p to vm mapped memory %p / %p", + &test_gprs_asm, (void *)self->code_hva, (void *)self->code_gpa); + memcpy((void *)self->code_hva, &test_gprs_asm, PAGE_SIZE); + + /* DAT disabled + 64 bit mode */ + run->psw_mask = 0x0000000180000000ULL; + run->psw_addr = self->code_gpa; + + /* run and expect interception of diag 44 */ + ASSERT_EQ(0, uc_run_once(self)); + ASSERT_EQ(false, uc_handle_exit(self)); + uc_assert_diag44(self); + + /* Retrieve and check guest register values */ + ASSERT_EQ(0, ioctl(self->vcpu_fd, KVM_GET_REGS, ®s)); + for (int i = 0; i < 8; i++) { + ASSERT_EQ(i, regs.gprs[i]); + ASSERT_EQ(i, sync_regs->gprs[i]); + } + + /* run and expect interception of diag 44 again */ + ASSERT_EQ(0, uc_run_once(self)); + ASSERT_EQ(false, uc_handle_exit(self)); + uc_assert_diag44(self); + + /* check continued increment of register 0 value */ + ASSERT_EQ(0, ioctl(self->vcpu_fd, KVM_GET_REGS, ®s)); + ASSERT_EQ(1, regs.gprs[0]); + ASSERT_EQ(1, sync_regs->gprs[0]); +} + +TEST_F(uc_kvm, uc_skey) +{ + struct kvm_s390_sie_block *sie_block = self->sie_block; + struct kvm_sync_regs *sync_regs = &self->run->s.regs; + u64 test_vaddr = VM_MEM_SIZE - (SZ_1M / 2); + struct kvm_run *run = self->run; + const u8 skeyvalue = 0x34; + + /* copy test_skey_asm to code_hva / code_gpa */ + TH_LOG("copy code %p to vm mapped memory %p / %p", + &test_skey_asm, (void *)self->code_hva, (void *)self->code_gpa); + memcpy((void *)self->code_hva, &test_skey_asm, PAGE_SIZE); + + /* set register content for test_skey_asm to access not mapped memory */ + sync_regs->gprs[1] = skeyvalue; + sync_regs->gprs[5] = self->base_gpa; + sync_regs->gprs[6] = test_vaddr; + run->kvm_dirty_regs |= KVM_SYNC_GPRS; + + /* DAT disabled + 64 bit mode */ + run->psw_mask = 0x0000000180000000ULL; + run->psw_addr = self->code_gpa; + + ASSERT_EQ(0, uc_run_once(self)); + ASSERT_EQ(true, uc_handle_exit(self)); + ASSERT_EQ(1, sync_regs->gprs[0]); + + /* ISKE */ + ASSERT_EQ(0, uc_run_once(self)); + + /* + * Bail out and skip the test after uc_skey_enable was executed but iske + * is still intercepted. Instructions are not handled by the kernel. + * Thus there is no need to test this here. + */ + TEST_ASSERT_EQ(0, sie_block->cpuflags & CPUSTAT_KSS); + TEST_ASSERT_EQ(0, sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE)); + TEST_ASSERT_EQ(KVM_EXIT_S390_SIEIC, self->run->exit_reason); + TEST_ASSERT_EQ(ICPT_INST, sie_block->icptcode); + TEST_REQUIRE(sie_block->ipa != 0xb229); + + /* ISKE contd. */ + ASSERT_EQ(false, uc_handle_exit(self)); + ASSERT_EQ(2, sync_regs->gprs[0]); + /* assert initial skey (ACC = 0, R & C = 1) */ + ASSERT_EQ(0x06, sync_regs->gprs[1]); + uc_assert_diag44(self); + + /* SSKE + ISKE */ + sync_regs->gprs[1] = skeyvalue; + run->kvm_dirty_regs |= KVM_SYNC_GPRS; + ASSERT_EQ(0, uc_run_once(self)); + ASSERT_EQ(false, uc_handle_exit(self)); + ASSERT_EQ(3, sync_regs->gprs[0]); + ASSERT_EQ(skeyvalue, sync_regs->gprs[1]); + uc_assert_diag44(self); + + /* RRBE + ISKE */ + sync_regs->gprs[1] = skeyvalue; + run->kvm_dirty_regs |= KVM_SYNC_GPRS; + ASSERT_EQ(0, uc_run_once(self)); + ASSERT_EQ(false, uc_handle_exit(self)); + ASSERT_EQ(4, sync_regs->gprs[0]); + /* assert R reset but rest of skey unchanged */ + ASSERT_EQ(skeyvalue & 0xfa, sync_regs->gprs[1]); + ASSERT_EQ(0, sync_regs->gprs[1] & 0x04); + uc_assert_diag44(self); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/kvm/s390x/cmma_test.c b/tools/testing/selftests/kvm/s390x/cmma_test.c deleted file mode 100644 index e32dd59703a0..000000000000 --- a/tools/testing/selftests/kvm/s390x/cmma_test.c +++ /dev/null @@ -1,695 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Test for s390x CMMA migration - * - * Copyright IBM Corp. 2023 - * - * Authors: - * Nico Boehr - */ -#include -#include -#include -#include -#include - -#include "test_util.h" -#include "kvm_util.h" -#include "kselftest.h" -#include "ucall_common.h" -#include "processor.h" - -#define MAIN_PAGE_COUNT 512 - -#define TEST_DATA_PAGE_COUNT 512 -#define TEST_DATA_MEMSLOT 1 -#define TEST_DATA_START_GFN PAGE_SIZE - -#define TEST_DATA_TWO_PAGE_COUNT 256 -#define TEST_DATA_TWO_MEMSLOT 2 -#define TEST_DATA_TWO_START_GFN (2 * PAGE_SIZE) - -static char cmma_value_buf[MAIN_PAGE_COUNT + TEST_DATA_PAGE_COUNT]; - -/** - * Dirty CMMA attributes of exactly one page in the TEST_DATA memslot, - * so use_cmma goes on and the CMMA related ioctls do something. - */ -static void guest_do_one_essa(void) -{ - asm volatile( - /* load TEST_DATA_START_GFN into r1 */ - " llilf 1,%[start_gfn]\n" - /* calculate the address from the gfn */ - " sllg 1,1,12(0)\n" - /* set the first page in TEST_DATA memslot to STABLE */ - " .insn rrf,0xb9ab0000,2,1,1,0\n" - /* hypercall */ - " diag 0,0,0x501\n" - "0: j 0b" - : - : [start_gfn] "L"(TEST_DATA_START_GFN) - : "r1", "r2", "memory", "cc" - ); -} - -/** - * Touch CMMA attributes of all pages in TEST_DATA memslot. Set them to stable - * state. - */ -static void guest_dirty_test_data(void) -{ - asm volatile( - /* r1 = TEST_DATA_START_GFN */ - " xgr 1,1\n" - " llilf 1,%[start_gfn]\n" - /* r5 = TEST_DATA_PAGE_COUNT */ - " lghi 5,%[page_count]\n" - /* r5 += r1 */ - "2: agfr 5,1\n" - /* r2 = r1 << PAGE_SHIFT */ - "1: sllg 2,1,12(0)\n" - /* essa(r4, r2, SET_STABLE) */ - " .insn rrf,0xb9ab0000,4,2,1,0\n" - /* i++ */ - " agfi 1,1\n" - /* if r1 < r5 goto 1 */ - " cgrjl 1,5,1b\n" - /* hypercall */ - " diag 0,0,0x501\n" - "0: j 0b" - : - : [start_gfn] "L"(TEST_DATA_START_GFN), - [page_count] "L"(TEST_DATA_PAGE_COUNT) - : - /* the counter in our loop over the pages */ - "r1", - /* the calculated page physical address */ - "r2", - /* ESSA output register */ - "r4", - /* last page */ - "r5", - "cc", "memory" - ); -} - -static void create_main_memslot(struct kvm_vm *vm) -{ - int i; - - vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, MAIN_PAGE_COUNT, 0); - /* set the array of memslots to zero like __vm_create does */ - for (i = 0; i < NR_MEM_REGIONS; i++) - vm->memslots[i] = 0; -} - -static void create_test_memslot(struct kvm_vm *vm) -{ - vm_userspace_mem_region_add(vm, - VM_MEM_SRC_ANONYMOUS, - TEST_DATA_START_GFN << vm->page_shift, - TEST_DATA_MEMSLOT, - TEST_DATA_PAGE_COUNT, - 0 - ); - vm->memslots[MEM_REGION_TEST_DATA] = TEST_DATA_MEMSLOT; -} - -static void create_memslots(struct kvm_vm *vm) -{ - /* - * Our VM has the following memory layout: - * +------+---------------------------+ - * | GFN | Memslot | - * +------+---------------------------+ - * | 0 | | - * | ... | MAIN (Code, Stack, ...) | - * | 511 | | - * +------+---------------------------+ - * | 4096 | | - * | ... | TEST_DATA | - * | 4607 | | - * +------+---------------------------+ - */ - create_main_memslot(vm); - create_test_memslot(vm); -} - -static void finish_vm_setup(struct kvm_vm *vm) -{ - struct userspace_mem_region *slot0; - - kvm_vm_elf_load(vm, program_invocation_name); - - slot0 = memslot2region(vm, 0); - ucall_init(vm, slot0->region.guest_phys_addr + slot0->region.memory_size); - - kvm_arch_vm_post_create(vm); -} - -static struct kvm_vm *create_vm_two_memslots(void) -{ - struct kvm_vm *vm; - - vm = vm_create_barebones(); - - create_memslots(vm); - - finish_vm_setup(vm); - - return vm; -} - -static void enable_cmma(struct kvm_vm *vm) -{ - int r; - - r = __kvm_device_attr_set(vm->fd, KVM_S390_VM_MEM_CTRL, KVM_S390_VM_MEM_ENABLE_CMMA, NULL); - TEST_ASSERT(!r, "enabling cmma failed r=%d errno=%d", r, errno); -} - -static void enable_dirty_tracking(struct kvm_vm *vm) -{ - vm_mem_region_set_flags(vm, 0, KVM_MEM_LOG_DIRTY_PAGES); - vm_mem_region_set_flags(vm, TEST_DATA_MEMSLOT, KVM_MEM_LOG_DIRTY_PAGES); -} - -static int __enable_migration_mode(struct kvm_vm *vm) -{ - return __kvm_device_attr_set(vm->fd, - KVM_S390_VM_MIGRATION, - KVM_S390_VM_MIGRATION_START, - NULL - ); -} - -static void enable_migration_mode(struct kvm_vm *vm) -{ - int r = __enable_migration_mode(vm); - - TEST_ASSERT(!r, "enabling migration mode failed r=%d errno=%d", r, errno); -} - -static bool is_migration_mode_on(struct kvm_vm *vm) -{ - u64 out; - int r; - - r = __kvm_device_attr_get(vm->fd, - KVM_S390_VM_MIGRATION, - KVM_S390_VM_MIGRATION_STATUS, - &out - ); - TEST_ASSERT(!r, "getting migration mode status failed r=%d errno=%d", r, errno); - return out; -} - -static int vm_get_cmma_bits(struct kvm_vm *vm, u64 flags, int *errno_out) -{ - struct kvm_s390_cmma_log args; - int rc; - - errno = 0; - - args = (struct kvm_s390_cmma_log){ - .start_gfn = 0, - .count = sizeof(cmma_value_buf), - .flags = flags, - .values = (__u64)&cmma_value_buf[0] - }; - rc = __vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args); - - *errno_out = errno; - return rc; -} - -static void test_get_cmma_basic(void) -{ - struct kvm_vm *vm = create_vm_two_memslots(); - struct kvm_vcpu *vcpu; - int rc, errno_out; - - /* GET_CMMA_BITS without CMMA enabled should fail */ - rc = vm_get_cmma_bits(vm, 0, &errno_out); - TEST_ASSERT_EQ(rc, -1); - TEST_ASSERT_EQ(errno_out, ENXIO); - - enable_cmma(vm); - vcpu = vm_vcpu_add(vm, 1, guest_do_one_essa); - - vcpu_run(vcpu); - - /* GET_CMMA_BITS without migration mode and without peeking should fail */ - rc = vm_get_cmma_bits(vm, 0, &errno_out); - TEST_ASSERT_EQ(rc, -1); - TEST_ASSERT_EQ(errno_out, EINVAL); - - /* GET_CMMA_BITS without migration mode and with peeking should work */ - rc = vm_get_cmma_bits(vm, KVM_S390_CMMA_PEEK, &errno_out); - TEST_ASSERT_EQ(rc, 0); - TEST_ASSERT_EQ(errno_out, 0); - - enable_dirty_tracking(vm); - enable_migration_mode(vm); - - /* GET_CMMA_BITS with invalid flags */ - rc = vm_get_cmma_bits(vm, 0xfeedc0fe, &errno_out); - TEST_ASSERT_EQ(rc, -1); - TEST_ASSERT_EQ(errno_out, EINVAL); - - kvm_vm_free(vm); -} - -static void assert_exit_was_hypercall(struct kvm_vcpu *vcpu) -{ - TEST_ASSERT_EQ(vcpu->run->exit_reason, 13); - TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, 4); - TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x8300); - TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipb, 0x5010000); -} - -static void test_migration_mode(void) -{ - struct kvm_vm *vm = vm_create_barebones(); - struct kvm_vcpu *vcpu; - u64 orig_psw; - int rc; - - /* enabling migration mode on a VM without memory should fail */ - rc = __enable_migration_mode(vm); - TEST_ASSERT_EQ(rc, -1); - TEST_ASSERT_EQ(errno, EINVAL); - TEST_ASSERT(!is_migration_mode_on(vm), "migration mode should still be off"); - errno = 0; - - create_memslots(vm); - finish_vm_setup(vm); - - enable_cmma(vm); - vcpu = vm_vcpu_add(vm, 1, guest_do_one_essa); - orig_psw = vcpu->run->psw_addr; - - /* - * Execute one essa instruction in the guest. Otherwise the guest will - * not have use_cmm enabled and GET_CMMA_BITS will return no pages. - */ - vcpu_run(vcpu); - assert_exit_was_hypercall(vcpu); - - /* migration mode when memslots have dirty tracking off should fail */ - rc = __enable_migration_mode(vm); - TEST_ASSERT_EQ(rc, -1); - TEST_ASSERT_EQ(errno, EINVAL); - TEST_ASSERT(!is_migration_mode_on(vm), "migration mode should still be off"); - errno = 0; - - /* enable dirty tracking */ - enable_dirty_tracking(vm); - - /* enabling migration mode should work now */ - rc = __enable_migration_mode(vm); - TEST_ASSERT_EQ(rc, 0); - TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on"); - errno = 0; - - /* execute another ESSA instruction to see this goes fine */ - vcpu->run->psw_addr = orig_psw; - vcpu_run(vcpu); - assert_exit_was_hypercall(vcpu); - - /* - * With migration mode on, create a new memslot with dirty tracking off. - * This should turn off migration mode. - */ - TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on"); - vm_userspace_mem_region_add(vm, - VM_MEM_SRC_ANONYMOUS, - TEST_DATA_TWO_START_GFN << vm->page_shift, - TEST_DATA_TWO_MEMSLOT, - TEST_DATA_TWO_PAGE_COUNT, - 0 - ); - TEST_ASSERT(!is_migration_mode_on(vm), - "creating memslot without dirty tracking turns off migration mode" - ); - - /* ESSA instructions should still execute fine */ - vcpu->run->psw_addr = orig_psw; - vcpu_run(vcpu); - assert_exit_was_hypercall(vcpu); - - /* - * Turn on dirty tracking on the new memslot. - * It should be possible to turn migration mode back on again. - */ - vm_mem_region_set_flags(vm, TEST_DATA_TWO_MEMSLOT, KVM_MEM_LOG_DIRTY_PAGES); - rc = __enable_migration_mode(vm); - TEST_ASSERT_EQ(rc, 0); - TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on"); - errno = 0; - - /* - * Turn off dirty tracking again, this time with just a flag change. - * Again, migration mode should turn off. - */ - TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on"); - vm_mem_region_set_flags(vm, TEST_DATA_TWO_MEMSLOT, 0); - TEST_ASSERT(!is_migration_mode_on(vm), - "disabling dirty tracking should turn off migration mode" - ); - - /* ESSA instructions should still execute fine */ - vcpu->run->psw_addr = orig_psw; - vcpu_run(vcpu); - assert_exit_was_hypercall(vcpu); - - kvm_vm_free(vm); -} - -/** - * Given a VM with the MAIN and TEST_DATA memslot, assert that both slots have - * CMMA attributes of all pages in both memslots and nothing more dirty. - * This has the useful side effect of ensuring nothing is CMMA dirty after this - * function. - */ -static void assert_all_slots_cmma_dirty(struct kvm_vm *vm) -{ - struct kvm_s390_cmma_log args; - - /* - * First iteration - everything should be dirty. - * Start at the main memslot... - */ - args = (struct kvm_s390_cmma_log){ - .start_gfn = 0, - .count = sizeof(cmma_value_buf), - .flags = 0, - .values = (__u64)&cmma_value_buf[0] - }; - memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf)); - vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args); - TEST_ASSERT_EQ(args.count, MAIN_PAGE_COUNT); - TEST_ASSERT_EQ(args.remaining, TEST_DATA_PAGE_COUNT); - TEST_ASSERT_EQ(args.start_gfn, 0); - - /* ...and then - after a hole - the TEST_DATA memslot should follow */ - args = (struct kvm_s390_cmma_log){ - .start_gfn = MAIN_PAGE_COUNT, - .count = sizeof(cmma_value_buf), - .flags = 0, - .values = (__u64)&cmma_value_buf[0] - }; - memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf)); - vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args); - TEST_ASSERT_EQ(args.count, TEST_DATA_PAGE_COUNT); - TEST_ASSERT_EQ(args.start_gfn, TEST_DATA_START_GFN); - TEST_ASSERT_EQ(args.remaining, 0); - - /* ...and nothing else should be there */ - args = (struct kvm_s390_cmma_log){ - .start_gfn = TEST_DATA_START_GFN + TEST_DATA_PAGE_COUNT, - .count = sizeof(cmma_value_buf), - .flags = 0, - .values = (__u64)&cmma_value_buf[0] - }; - memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf)); - vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args); - TEST_ASSERT_EQ(args.count, 0); - TEST_ASSERT_EQ(args.start_gfn, 0); - TEST_ASSERT_EQ(args.remaining, 0); -} - -/** - * Given a VM, assert no pages are CMMA dirty. - */ -static void assert_no_pages_cmma_dirty(struct kvm_vm *vm) -{ - struct kvm_s390_cmma_log args; - - /* If we start from GFN 0 again, nothing should be dirty. */ - args = (struct kvm_s390_cmma_log){ - .start_gfn = 0, - .count = sizeof(cmma_value_buf), - .flags = 0, - .values = (__u64)&cmma_value_buf[0] - }; - memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf)); - vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args); - if (args.count || args.remaining || args.start_gfn) - TEST_FAIL("pages are still dirty start_gfn=0x%llx count=%u remaining=%llu", - args.start_gfn, - args.count, - args.remaining - ); -} - -static void test_get_inital_dirty(void) -{ - struct kvm_vm *vm = create_vm_two_memslots(); - struct kvm_vcpu *vcpu; - - enable_cmma(vm); - vcpu = vm_vcpu_add(vm, 1, guest_do_one_essa); - - /* - * Execute one essa instruction in the guest. Otherwise the guest will - * not have use_cmm enabled and GET_CMMA_BITS will return no pages. - */ - vcpu_run(vcpu); - assert_exit_was_hypercall(vcpu); - - enable_dirty_tracking(vm); - enable_migration_mode(vm); - - assert_all_slots_cmma_dirty(vm); - - /* Start from the beginning again and make sure nothing else is dirty */ - assert_no_pages_cmma_dirty(vm); - - kvm_vm_free(vm); -} - -static void query_cmma_range(struct kvm_vm *vm, - u64 start_gfn, u64 gfn_count, - struct kvm_s390_cmma_log *res_out) -{ - *res_out = (struct kvm_s390_cmma_log){ - .start_gfn = start_gfn, - .count = gfn_count, - .flags = 0, - .values = (__u64)&cmma_value_buf[0] - }; - memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf)); - vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, res_out); -} - -/** - * Assert the given cmma_log struct that was executed by query_cmma_range() - * indicates the first dirty gfn is at first_dirty_gfn and contains exactly - * dirty_gfn_count CMMA values. - */ -static void assert_cmma_dirty(u64 first_dirty_gfn, - u64 dirty_gfn_count, - const struct kvm_s390_cmma_log *res) -{ - TEST_ASSERT_EQ(res->start_gfn, first_dirty_gfn); - TEST_ASSERT_EQ(res->count, dirty_gfn_count); - for (size_t i = 0; i < dirty_gfn_count; i++) - TEST_ASSERT_EQ(cmma_value_buf[0], 0x0); /* stable state */ - TEST_ASSERT_EQ(cmma_value_buf[dirty_gfn_count], 0xff); /* not touched */ -} - -static void test_get_skip_holes(void) -{ - size_t gfn_offset; - struct kvm_vm *vm = create_vm_two_memslots(); - struct kvm_s390_cmma_log log; - struct kvm_vcpu *vcpu; - u64 orig_psw; - - enable_cmma(vm); - vcpu = vm_vcpu_add(vm, 1, guest_dirty_test_data); - - orig_psw = vcpu->run->psw_addr; - - /* - * Execute some essa instructions in the guest. Otherwise the guest will - * not have use_cmm enabled and GET_CMMA_BITS will return no pages. - */ - vcpu_run(vcpu); - assert_exit_was_hypercall(vcpu); - - enable_dirty_tracking(vm); - enable_migration_mode(vm); - - /* un-dirty all pages */ - assert_all_slots_cmma_dirty(vm); - - /* Then, dirty just the TEST_DATA memslot */ - vcpu->run->psw_addr = orig_psw; - vcpu_run(vcpu); - - gfn_offset = TEST_DATA_START_GFN; - /** - * Query CMMA attributes of one page, starting at page 0. Since the - * main memslot was not touched by the VM, this should yield the first - * page of the TEST_DATA memslot. - * The dirty bitmap should now look like this: - * 0: not dirty - * [0x1, 0x200): dirty - */ - query_cmma_range(vm, 0, 1, &log); - assert_cmma_dirty(gfn_offset, 1, &log); - gfn_offset++; - - /** - * Query CMMA attributes of 32 (0x20) pages past the end of the TEST_DATA - * memslot. This should wrap back to the beginning of the TEST_DATA - * memslot, page 1. - * The dirty bitmap should now look like this: - * [0, 0x21): not dirty - * [0x21, 0x200): dirty - */ - query_cmma_range(vm, TEST_DATA_START_GFN + TEST_DATA_PAGE_COUNT, 0x20, &log); - assert_cmma_dirty(gfn_offset, 0x20, &log); - gfn_offset += 0x20; - - /* Skip 32 pages */ - gfn_offset += 0x20; - - /** - * After skipping 32 pages, query the next 32 (0x20) pages. - * The dirty bitmap should now look like this: - * [0, 0x21): not dirty - * [0x21, 0x41): dirty - * [0x41, 0x61): not dirty - * [0x61, 0x200): dirty - */ - query_cmma_range(vm, gfn_offset, 0x20, &log); - assert_cmma_dirty(gfn_offset, 0x20, &log); - gfn_offset += 0x20; - - /** - * Query 1 page from the beginning of the TEST_DATA memslot. This should - * yield page 0x21. - * The dirty bitmap should now look like this: - * [0, 0x22): not dirty - * [0x22, 0x41): dirty - * [0x41, 0x61): not dirty - * [0x61, 0x200): dirty - */ - query_cmma_range(vm, TEST_DATA_START_GFN, 1, &log); - assert_cmma_dirty(TEST_DATA_START_GFN + 0x21, 1, &log); - gfn_offset++; - - /** - * Query 15 (0xF) pages from page 0x23 in TEST_DATA memslot. - * This should yield pages [0x23, 0x33). - * The dirty bitmap should now look like this: - * [0, 0x22): not dirty - * 0x22: dirty - * [0x23, 0x33): not dirty - * [0x33, 0x41): dirty - * [0x41, 0x61): not dirty - * [0x61, 0x200): dirty - */ - gfn_offset = TEST_DATA_START_GFN + 0x23; - query_cmma_range(vm, gfn_offset, 15, &log); - assert_cmma_dirty(gfn_offset, 15, &log); - - /** - * Query 17 (0x11) pages from page 0x22 in TEST_DATA memslot. - * This should yield page [0x22, 0x33) - * The dirty bitmap should now look like this: - * [0, 0x33): not dirty - * [0x33, 0x41): dirty - * [0x41, 0x61): not dirty - * [0x61, 0x200): dirty - */ - gfn_offset = TEST_DATA_START_GFN + 0x22; - query_cmma_range(vm, gfn_offset, 17, &log); - assert_cmma_dirty(gfn_offset, 17, &log); - - /** - * Query 25 (0x19) pages from page 0x40 in TEST_DATA memslot. - * This should yield page 0x40 and nothing more, since there are more - * than 16 non-dirty pages after page 0x40. - * The dirty bitmap should now look like this: - * [0, 0x33): not dirty - * [0x33, 0x40): dirty - * [0x40, 0x61): not dirty - * [0x61, 0x200): dirty - */ - gfn_offset = TEST_DATA_START_GFN + 0x40; - query_cmma_range(vm, gfn_offset, 25, &log); - assert_cmma_dirty(gfn_offset, 1, &log); - - /** - * Query pages [0x33, 0x40). - * The dirty bitmap should now look like this: - * [0, 0x61): not dirty - * [0x61, 0x200): dirty - */ - gfn_offset = TEST_DATA_START_GFN + 0x33; - query_cmma_range(vm, gfn_offset, 0x40 - 0x33, &log); - assert_cmma_dirty(gfn_offset, 0x40 - 0x33, &log); - - /** - * Query the remaining pages [0x61, 0x200). - */ - gfn_offset = TEST_DATA_START_GFN; - query_cmma_range(vm, gfn_offset, TEST_DATA_PAGE_COUNT - 0x61, &log); - assert_cmma_dirty(TEST_DATA_START_GFN + 0x61, TEST_DATA_PAGE_COUNT - 0x61, &log); - - assert_no_pages_cmma_dirty(vm); -} - -struct testdef { - const char *name; - void (*test)(void); -} testlist[] = { - { "migration mode and dirty tracking", test_migration_mode }, - { "GET_CMMA_BITS: basic calls", test_get_cmma_basic }, - { "GET_CMMA_BITS: all pages are dirty initally", test_get_inital_dirty }, - { "GET_CMMA_BITS: holes are skipped", test_get_skip_holes }, -}; - -/** - * The kernel may support CMMA, but the machine may not (i.e. if running as - * guest-3). - * - * In this case, the CMMA capabilities are all there, but the CMMA-related - * ioctls fail. To find out whether the machine supports CMMA, create a - * temporary VM and then query the CMMA feature of the VM. - */ -static int machine_has_cmma(void) -{ - struct kvm_vm *vm = vm_create_barebones(); - int r; - - r = !__kvm_has_device_attr(vm->fd, KVM_S390_VM_MEM_CTRL, KVM_S390_VM_MEM_ENABLE_CMMA); - kvm_vm_free(vm); - - return r; -} - -int main(int argc, char *argv[]) -{ - int idx; - - TEST_REQUIRE(kvm_has_cap(KVM_CAP_SYNC_REGS)); - TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_CMMA_MIGRATION)); - TEST_REQUIRE(machine_has_cmma()); - - ksft_print_header(); - - ksft_set_plan(ARRAY_SIZE(testlist)); - - for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) { - testlist[idx].test(); - ksft_test_result_pass("%s\n", testlist[idx].name); - } - - ksft_finished(); /* Print results and exit() accordingly */ -} diff --git a/tools/testing/selftests/kvm/s390x/config b/tools/testing/selftests/kvm/s390x/config deleted file mode 100644 index 23270f2d679f..000000000000 --- a/tools/testing/selftests/kvm/s390x/config +++ /dev/null @@ -1,2 +0,0 @@ -CONFIG_KVM=y -CONFIG_KVM_S390_UCONTROL=y diff --git a/tools/testing/selftests/kvm/s390x/cpumodel_subfuncs_test.c b/tools/testing/selftests/kvm/s390x/cpumodel_subfuncs_test.c deleted file mode 100644 index 27255880dabd..000000000000 --- a/tools/testing/selftests/kvm/s390x/cpumodel_subfuncs_test.c +++ /dev/null @@ -1,301 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright IBM Corp. 2024 - * - * Authors: - * Hariharan Mari - * - * The tests compare the result of the KVM ioctl for obtaining CPU subfunction data with those - * from an ASM block performing the same CPU subfunction. Currently KVM doesn't mask instruction - * query data reported via the CPU Model, allowing us to directly compare it with the data - * acquired through executing the queries in the test. - */ - -#include -#include -#include -#include -#include "facility.h" - -#include "kvm_util.h" - -#define PLO_FUNCTION_MAX 256 - -/* Query available CPU subfunctions */ -struct kvm_s390_vm_cpu_subfunc cpu_subfunc; - -static void get_cpu_machine_subfuntions(struct kvm_vm *vm, - struct kvm_s390_vm_cpu_subfunc *cpu_subfunc) -{ - int r; - - r = __kvm_device_attr_get(vm->fd, KVM_S390_VM_CPU_MODEL, - KVM_S390_VM_CPU_MACHINE_SUBFUNC, cpu_subfunc); - - TEST_ASSERT(!r, "Get cpu subfunctions failed r=%d errno=%d", r, errno); -} - -static inline int plo_test_bit(unsigned char nr) -{ - unsigned long function = nr | 0x100; - int cc; - - asm volatile(" lgr 0,%[function]\n" - /* Parameter registers are ignored for "test bit" */ - " plo 0,0,0,0(0)\n" - " ipm %0\n" - " srl %0,28\n" - : "=d" (cc) - : [function] "d" (function) - : "cc", "0"); - return cc == 0; -} - -/* Testing Perform Locked Operation (PLO) CPU subfunction's ASM block */ -static void test_plo_asm_block(u8 (*query)[32]) -{ - for (int i = 0; i < PLO_FUNCTION_MAX; ++i) { - if (plo_test_bit(i)) - (*query)[i >> 3] |= 0x80 >> (i & 7); - } -} - -/* Testing Crypto Compute Message Authentication Code (KMAC) CPU subfunction's ASM block */ -static void test_kmac_asm_block(u8 (*query)[16]) -{ - asm volatile(" la %%r1,%[query]\n" - " xgr %%r0,%%r0\n" - " .insn rre,0xb91e0000,0,2\n" - : [query] "=R" (*query) - : - : "cc", "r0", "r1"); -} - -/* Testing Crypto Cipher Message with Chaining (KMC) CPU subfunction's ASM block */ -static void test_kmc_asm_block(u8 (*query)[16]) -{ - asm volatile(" la %%r1,%[query]\n" - " xgr %%r0,%%r0\n" - " .insn rre,0xb92f0000,2,4\n" - : [query] "=R" (*query) - : - : "cc", "r0", "r1"); -} - -/* Testing Crypto Cipher Message (KM) CPU subfunction's ASM block */ -static void test_km_asm_block(u8 (*query)[16]) -{ - asm volatile(" la %%r1,%[query]\n" - " xgr %%r0,%%r0\n" - " .insn rre,0xb92e0000,2,4\n" - : [query] "=R" (*query) - : - : "cc", "r0", "r1"); -} - -/* Testing Crypto Compute Intermediate Message Digest (KIMD) CPU subfunction's ASM block */ -static void test_kimd_asm_block(u8 (*query)[16]) -{ - asm volatile(" la %%r1,%[query]\n" - " xgr %%r0,%%r0\n" - " .insn rre,0xb93e0000,0,2\n" - : [query] "=R" (*query) - : - : "cc", "r0", "r1"); -} - -/* Testing Crypto Compute Last Message Digest (KLMD) CPU subfunction's ASM block */ -static void test_klmd_asm_block(u8 (*query)[16]) -{ - asm volatile(" la %%r1,%[query]\n" - " xgr %%r0,%%r0\n" - " .insn rre,0xb93f0000,0,2\n" - : [query] "=R" (*query) - : - : "cc", "r0", "r1"); -} - -/* Testing Crypto Cipher Message with Counter (KMCTR) CPU subfunction's ASM block */ -static void test_kmctr_asm_block(u8 (*query)[16]) -{ - asm volatile(" la %%r1,%[query]\n" - " xgr %%r0,%%r0\n" - " .insn rrf,0xb92d0000,2,4,6,0\n" - : [query] "=R" (*query) - : - : "cc", "r0", "r1"); -} - -/* Testing Crypto Cipher Message with Cipher Feedback (KMF) CPU subfunction's ASM block */ -static void test_kmf_asm_block(u8 (*query)[16]) -{ - asm volatile(" la %%r1,%[query]\n" - " xgr %%r0,%%r0\n" - " .insn rre,0xb92a0000,2,4\n" - : [query] "=R" (*query) - : - : "cc", "r0", "r1"); -} - -/* Testing Crypto Cipher Message with Output Feedback (KMO) CPU subfunction's ASM block */ -static void test_kmo_asm_block(u8 (*query)[16]) -{ - asm volatile(" la %%r1,%[query]\n" - " xgr %%r0,%%r0\n" - " .insn rre,0xb92b0000,2,4\n" - : [query] "=R" (*query) - : - : "cc", "r0", "r1"); -} - -/* Testing Crypto Perform Cryptographic Computation (PCC) CPU subfunction's ASM block */ -static void test_pcc_asm_block(u8 (*query)[16]) -{ - asm volatile(" la %%r1,%[query]\n" - " xgr %%r0,%%r0\n" - " .insn rre,0xb92c0000,0,0\n" - : [query] "=R" (*query) - : - : "cc", "r0", "r1"); -} - -/* Testing Crypto Perform Random Number Operation (PRNO) CPU subfunction's ASM block */ -static void test_prno_asm_block(u8 (*query)[16]) -{ - asm volatile(" la %%r1,%[query]\n" - " xgr %%r0,%%r0\n" - " .insn rre,0xb93c0000,2,4\n" - : [query] "=R" (*query) - : - : "cc", "r0", "r1"); -} - -/* Testing Crypto Cipher Message with Authentication (KMA) CPU subfunction's ASM block */ -static void test_kma_asm_block(u8 (*query)[16]) -{ - asm volatile(" la %%r1,%[query]\n" - " xgr %%r0,%%r0\n" - " .insn rrf,0xb9290000,2,4,6,0\n" - : [query] "=R" (*query) - : - : "cc", "r0", "r1"); -} - -/* Testing Crypto Compute Digital Signature Authentication (KDSA) CPU subfunction's ASM block */ -static void test_kdsa_asm_block(u8 (*query)[16]) -{ - asm volatile(" la %%r1,%[query]\n" - " xgr %%r0,%%r0\n" - " .insn rre,0xb93a0000,0,2\n" - : [query] "=R" (*query) - : - : "cc", "r0", "r1"); -} - -/* Testing Sort Lists (SORTL) CPU subfunction's ASM block */ -static void test_sortl_asm_block(u8 (*query)[32]) -{ - asm volatile(" lghi 0,0\n" - " la 1,%[query]\n" - " .insn rre,0xb9380000,2,4\n" - : [query] "=R" (*query) - : - : "cc", "0", "1"); -} - -/* Testing Deflate Conversion Call (DFLTCC) CPU subfunction's ASM block */ -static void test_dfltcc_asm_block(u8 (*query)[32]) -{ - asm volatile(" lghi 0,0\n" - " la 1,%[query]\n" - " .insn rrf,0xb9390000,2,4,6,0\n" - : [query] "=R" (*query) - : - : "cc", "0", "1"); -} - -/* - * Testing Perform Function with Concurrent Results (PFCR) - * CPU subfunctions's ASM block - */ -static void test_pfcr_asm_block(u8 (*query)[16]) -{ - asm volatile(" lghi 0,0\n" - " .insn rsy,0xeb0000000016,0,0,%[query]\n" - : [query] "=QS" (*query) - : - : "cc", "0"); -} - -typedef void (*testfunc_t)(u8 (*array)[]); - -struct testdef { - const char *subfunc_name; - u8 *subfunc_array; - size_t array_size; - testfunc_t test; - int facility_bit; -} testlist[] = { - /* - * PLO was introduced in the very first 64-bit machine generation. - * Hence it is assumed PLO is always installed in Z Arch. - */ - { "PLO", cpu_subfunc.plo, sizeof(cpu_subfunc.plo), test_plo_asm_block, 1 }, - /* MSA - Facility bit 17 */ - { "KMAC", cpu_subfunc.kmac, sizeof(cpu_subfunc.kmac), test_kmac_asm_block, 17 }, - { "KMC", cpu_subfunc.kmc, sizeof(cpu_subfunc.kmc), test_kmc_asm_block, 17 }, - { "KM", cpu_subfunc.km, sizeof(cpu_subfunc.km), test_km_asm_block, 17 }, - { "KIMD", cpu_subfunc.kimd, sizeof(cpu_subfunc.kimd), test_kimd_asm_block, 17 }, - { "KLMD", cpu_subfunc.klmd, sizeof(cpu_subfunc.klmd), test_klmd_asm_block, 17 }, - /* MSA - Facility bit 77 */ - { "KMCTR", cpu_subfunc.kmctr, sizeof(cpu_subfunc.kmctr), test_kmctr_asm_block, 77 }, - { "KMF", cpu_subfunc.kmf, sizeof(cpu_subfunc.kmf), test_kmf_asm_block, 77 }, - { "KMO", cpu_subfunc.kmo, sizeof(cpu_subfunc.kmo), test_kmo_asm_block, 77 }, - { "PCC", cpu_subfunc.pcc, sizeof(cpu_subfunc.pcc), test_pcc_asm_block, 77 }, - /* MSA5 - Facility bit 57 */ - { "PPNO", cpu_subfunc.ppno, sizeof(cpu_subfunc.ppno), test_prno_asm_block, 57 }, - /* MSA8 - Facility bit 146 */ - { "KMA", cpu_subfunc.kma, sizeof(cpu_subfunc.kma), test_kma_asm_block, 146 }, - /* MSA9 - Facility bit 155 */ - { "KDSA", cpu_subfunc.kdsa, sizeof(cpu_subfunc.kdsa), test_kdsa_asm_block, 155 }, - /* SORTL - Facility bit 150 */ - { "SORTL", cpu_subfunc.sortl, sizeof(cpu_subfunc.sortl), test_sortl_asm_block, 150 }, - /* DFLTCC - Facility bit 151 */ - { "DFLTCC", cpu_subfunc.dfltcc, sizeof(cpu_subfunc.dfltcc), test_dfltcc_asm_block, 151 }, - /* Concurrent-function facility - Facility bit 201 */ - { "PFCR", cpu_subfunc.pfcr, sizeof(cpu_subfunc.pfcr), test_pfcr_asm_block, 201 }, -}; - -int main(int argc, char *argv[]) -{ - struct kvm_vm *vm; - int idx; - - ksft_print_header(); - - vm = vm_create(1); - - memset(&cpu_subfunc, 0, sizeof(cpu_subfunc)); - get_cpu_machine_subfuntions(vm, &cpu_subfunc); - - ksft_set_plan(ARRAY_SIZE(testlist)); - for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) { - if (test_facility(testlist[idx].facility_bit)) { - u8 *array = malloc(testlist[idx].array_size); - - testlist[idx].test((u8 (*)[testlist[idx].array_size])array); - - TEST_ASSERT_EQ(memcmp(testlist[idx].subfunc_array, - array, testlist[idx].array_size), 0); - - ksft_test_result_pass("%s\n", testlist[idx].subfunc_name); - free(array); - } else { - ksft_test_result_skip("%s feature is not avaialable\n", - testlist[idx].subfunc_name); - } - } - - kvm_vm_free(vm); - ksft_finished(); -} diff --git a/tools/testing/selftests/kvm/s390x/debug_test.c b/tools/testing/selftests/kvm/s390x/debug_test.c deleted file mode 100644 index ad8095968601..000000000000 --- a/tools/testing/selftests/kvm/s390x/debug_test.c +++ /dev/null @@ -1,160 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* Test KVM debugging features. */ -#include "kvm_util.h" -#include "test_util.h" -#include "sie.h" - -#include - -#define __LC_SVC_NEW_PSW 0x1c0 -#define __LC_PGM_NEW_PSW 0x1d0 -#define IPA0_DIAG 0x8300 -#define PGM_SPECIFICATION 0x06 - -/* Common code for testing single-stepping interruptions. */ -extern char int_handler[]; -asm("int_handler:\n" - "j .\n"); - -static struct kvm_vm *test_step_int_1(struct kvm_vcpu **vcpu, void *guest_code, - size_t new_psw_off, uint64_t *new_psw) -{ - struct kvm_guest_debug debug = {}; - struct kvm_regs regs; - struct kvm_vm *vm; - char *lowcore; - - vm = vm_create_with_one_vcpu(vcpu, guest_code); - lowcore = addr_gpa2hva(vm, 0); - new_psw[0] = (*vcpu)->run->psw_mask; - new_psw[1] = (uint64_t)int_handler; - memcpy(lowcore + new_psw_off, new_psw, 16); - vcpu_regs_get(*vcpu, ®s); - regs.gprs[2] = -1; - vcpu_regs_set(*vcpu, ®s); - debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP; - vcpu_guest_debug_set(*vcpu, &debug); - vcpu_run(*vcpu); - - return vm; -} - -static void test_step_int(void *guest_code, size_t new_psw_off) -{ - struct kvm_vcpu *vcpu; - uint64_t new_psw[2]; - struct kvm_vm *vm; - - vm = test_step_int_1(&vcpu, guest_code, new_psw_off, new_psw); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_DEBUG); - TEST_ASSERT_EQ(vcpu->run->psw_mask, new_psw[0]); - TEST_ASSERT_EQ(vcpu->run->psw_addr, new_psw[1]); - kvm_vm_free(vm); -} - -/* Test single-stepping "boring" program interruptions. */ -extern char test_step_pgm_guest_code[]; -asm("test_step_pgm_guest_code:\n" - ".insn rr,0x1d00,%r1,%r0 /* dr %r1,%r0 */\n" - "j .\n"); - -static void test_step_pgm(void) -{ - test_step_int(test_step_pgm_guest_code, __LC_PGM_NEW_PSW); -} - -/* - * Test single-stepping program interruptions caused by DIAG. - * Userspace emulation must not interfere with single-stepping. - */ -extern char test_step_pgm_diag_guest_code[]; -asm("test_step_pgm_diag_guest_code:\n" - "diag %r0,%r0,0\n" - "j .\n"); - -static void test_step_pgm_diag(void) -{ - struct kvm_s390_irq irq = { - .type = KVM_S390_PROGRAM_INT, - .u.pgm.code = PGM_SPECIFICATION, - }; - struct kvm_vcpu *vcpu; - uint64_t new_psw[2]; - struct kvm_vm *vm; - - vm = test_step_int_1(&vcpu, test_step_pgm_diag_guest_code, - __LC_PGM_NEW_PSW, new_psw); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC); - TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_INST); - TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa & 0xff00, IPA0_DIAG); - vcpu_ioctl(vcpu, KVM_S390_IRQ, &irq); - vcpu_run(vcpu); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_DEBUG); - TEST_ASSERT_EQ(vcpu->run->psw_mask, new_psw[0]); - TEST_ASSERT_EQ(vcpu->run->psw_addr, new_psw[1]); - kvm_vm_free(vm); -} - -/* - * Test single-stepping program interruptions caused by ISKE. - * CPUSTAT_KSS handling must not interfere with single-stepping. - */ -extern char test_step_pgm_iske_guest_code[]; -asm("test_step_pgm_iske_guest_code:\n" - "iske %r2,%r2\n" - "j .\n"); - -static void test_step_pgm_iske(void) -{ - test_step_int(test_step_pgm_iske_guest_code, __LC_PGM_NEW_PSW); -} - -/* - * Test single-stepping program interruptions caused by LCTL. - * KVM emulation must not interfere with single-stepping. - */ -extern char test_step_pgm_lctl_guest_code[]; -asm("test_step_pgm_lctl_guest_code:\n" - "lctl %c0,%c0,1\n" - "j .\n"); - -static void test_step_pgm_lctl(void) -{ - test_step_int(test_step_pgm_lctl_guest_code, __LC_PGM_NEW_PSW); -} - -/* Test single-stepping supervisor-call interruptions. */ -extern char test_step_svc_guest_code[]; -asm("test_step_svc_guest_code:\n" - "svc 0\n" - "j .\n"); - -static void test_step_svc(void) -{ - test_step_int(test_step_svc_guest_code, __LC_SVC_NEW_PSW); -} - -/* Run all tests above. */ -static struct testdef { - const char *name; - void (*test)(void); -} testlist[] = { - { "single-step pgm", test_step_pgm }, - { "single-step pgm caused by diag", test_step_pgm_diag }, - { "single-step pgm caused by iske", test_step_pgm_iske }, - { "single-step pgm caused by lctl", test_step_pgm_lctl }, - { "single-step svc", test_step_svc }, -}; - -int main(int argc, char *argv[]) -{ - int idx; - - ksft_print_header(); - ksft_set_plan(ARRAY_SIZE(testlist)); - for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) { - testlist[idx].test(); - ksft_test_result_pass("%s\n", testlist[idx].name); - } - ksft_finished(); -} diff --git a/tools/testing/selftests/kvm/s390x/memop.c b/tools/testing/selftests/kvm/s390x/memop.c deleted file mode 100644 index 4374b4cd2a80..000000000000 --- a/tools/testing/selftests/kvm/s390x/memop.c +++ /dev/null @@ -1,1187 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Test for s390x KVM_S390_MEM_OP - * - * Copyright (C) 2019, Red Hat, Inc. - */ -#include -#include -#include -#include -#include - -#include - -#include "test_util.h" -#include "kvm_util.h" -#include "kselftest.h" -#include "ucall_common.h" -#include "processor.h" - -enum mop_target { - LOGICAL, - SIDA, - ABSOLUTE, - INVALID, -}; - -enum mop_access_mode { - READ, - WRITE, - CMPXCHG, -}; - -struct mop_desc { - uintptr_t gaddr; - uintptr_t gaddr_v; - uint64_t set_flags; - unsigned int f_check : 1; - unsigned int f_inject : 1; - unsigned int f_key : 1; - unsigned int _gaddr_v : 1; - unsigned int _set_flags : 1; - unsigned int _sida_offset : 1; - unsigned int _ar : 1; - uint32_t size; - enum mop_target target; - enum mop_access_mode mode; - void *buf; - uint32_t sida_offset; - void *old; - uint8_t old_value[16]; - bool *cmpxchg_success; - uint8_t ar; - uint8_t key; -}; - -const uint8_t NO_KEY = 0xff; - -static struct kvm_s390_mem_op ksmo_from_desc(struct mop_desc *desc) -{ - struct kvm_s390_mem_op ksmo = { - .gaddr = (uintptr_t)desc->gaddr, - .size = desc->size, - .buf = ((uintptr_t)desc->buf), - .reserved = "ignored_ignored_ignored_ignored" - }; - - switch (desc->target) { - case LOGICAL: - if (desc->mode == READ) - ksmo.op = KVM_S390_MEMOP_LOGICAL_READ; - if (desc->mode == WRITE) - ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE; - break; - case SIDA: - if (desc->mode == READ) - ksmo.op = KVM_S390_MEMOP_SIDA_READ; - if (desc->mode == WRITE) - ksmo.op = KVM_S390_MEMOP_SIDA_WRITE; - break; - case ABSOLUTE: - if (desc->mode == READ) - ksmo.op = KVM_S390_MEMOP_ABSOLUTE_READ; - if (desc->mode == WRITE) - ksmo.op = KVM_S390_MEMOP_ABSOLUTE_WRITE; - if (desc->mode == CMPXCHG) { - ksmo.op = KVM_S390_MEMOP_ABSOLUTE_CMPXCHG; - ksmo.old_addr = (uint64_t)desc->old; - memcpy(desc->old_value, desc->old, desc->size); - } - break; - case INVALID: - ksmo.op = -1; - } - if (desc->f_check) - ksmo.flags |= KVM_S390_MEMOP_F_CHECK_ONLY; - if (desc->f_inject) - ksmo.flags |= KVM_S390_MEMOP_F_INJECT_EXCEPTION; - if (desc->_set_flags) - ksmo.flags = desc->set_flags; - if (desc->f_key && desc->key != NO_KEY) { - ksmo.flags |= KVM_S390_MEMOP_F_SKEY_PROTECTION; - ksmo.key = desc->key; - } - if (desc->_ar) - ksmo.ar = desc->ar; - else - ksmo.ar = 0; - if (desc->_sida_offset) - ksmo.sida_offset = desc->sida_offset; - - return ksmo; -} - -struct test_info { - struct kvm_vm *vm; - struct kvm_vcpu *vcpu; -}; - -#define PRINT_MEMOP false -static void print_memop(struct kvm_vcpu *vcpu, const struct kvm_s390_mem_op *ksmo) -{ - if (!PRINT_MEMOP) - return; - - if (!vcpu) - printf("vm memop("); - else - printf("vcpu memop("); - switch (ksmo->op) { - case KVM_S390_MEMOP_LOGICAL_READ: - printf("LOGICAL, READ, "); - break; - case KVM_S390_MEMOP_LOGICAL_WRITE: - printf("LOGICAL, WRITE, "); - break; - case KVM_S390_MEMOP_SIDA_READ: - printf("SIDA, READ, "); - break; - case KVM_S390_MEMOP_SIDA_WRITE: - printf("SIDA, WRITE, "); - break; - case KVM_S390_MEMOP_ABSOLUTE_READ: - printf("ABSOLUTE, READ, "); - break; - case KVM_S390_MEMOP_ABSOLUTE_WRITE: - printf("ABSOLUTE, WRITE, "); - break; - case KVM_S390_MEMOP_ABSOLUTE_CMPXCHG: - printf("ABSOLUTE, CMPXCHG, "); - break; - } - printf("gaddr=%llu, size=%u, buf=%llu, ar=%u, key=%u, old_addr=%llx", - ksmo->gaddr, ksmo->size, ksmo->buf, ksmo->ar, ksmo->key, - ksmo->old_addr); - if (ksmo->flags & KVM_S390_MEMOP_F_CHECK_ONLY) - printf(", CHECK_ONLY"); - if (ksmo->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) - printf(", INJECT_EXCEPTION"); - if (ksmo->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) - printf(", SKEY_PROTECTION"); - puts(")"); -} - -static int err_memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo, - struct mop_desc *desc) -{ - struct kvm_vcpu *vcpu = info.vcpu; - - if (!vcpu) - return __vm_ioctl(info.vm, KVM_S390_MEM_OP, ksmo); - else - return __vcpu_ioctl(vcpu, KVM_S390_MEM_OP, ksmo); -} - -static void memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo, - struct mop_desc *desc) -{ - int r; - - r = err_memop_ioctl(info, ksmo, desc); - if (ksmo->op == KVM_S390_MEMOP_ABSOLUTE_CMPXCHG) { - if (desc->cmpxchg_success) { - int diff = memcmp(desc->old_value, desc->old, desc->size); - *desc->cmpxchg_success = !diff; - } - } - TEST_ASSERT(!r, __KVM_IOCTL_ERROR("KVM_S390_MEM_OP", r)); -} - -#define MEMOP(err, info_p, mop_target_p, access_mode_p, buf_p, size_p, ...) \ -({ \ - struct test_info __info = (info_p); \ - struct mop_desc __desc = { \ - .target = (mop_target_p), \ - .mode = (access_mode_p), \ - .buf = (buf_p), \ - .size = (size_p), \ - __VA_ARGS__ \ - }; \ - struct kvm_s390_mem_op __ksmo; \ - \ - if (__desc._gaddr_v) { \ - if (__desc.target == ABSOLUTE) \ - __desc.gaddr = addr_gva2gpa(__info.vm, __desc.gaddr_v); \ - else \ - __desc.gaddr = __desc.gaddr_v; \ - } \ - __ksmo = ksmo_from_desc(&__desc); \ - print_memop(__info.vcpu, &__ksmo); \ - err##memop_ioctl(__info, &__ksmo, &__desc); \ -}) - -#define MOP(...) MEMOP(, __VA_ARGS__) -#define ERR_MOP(...) MEMOP(err_, __VA_ARGS__) - -#define GADDR(a) .gaddr = ((uintptr_t)a) -#define GADDR_V(v) ._gaddr_v = 1, .gaddr_v = ((uintptr_t)v) -#define CHECK_ONLY .f_check = 1 -#define SET_FLAGS(f) ._set_flags = 1, .set_flags = (f) -#define SIDA_OFFSET(o) ._sida_offset = 1, .sida_offset = (o) -#define AR(a) ._ar = 1, .ar = (a) -#define KEY(a) .f_key = 1, .key = (a) -#define INJECT .f_inject = 1 -#define CMPXCHG_OLD(o) .old = (o) -#define CMPXCHG_SUCCESS(s) .cmpxchg_success = (s) - -#define CHECK_N_DO(f, ...) ({ f(__VA_ARGS__, CHECK_ONLY); f(__VA_ARGS__); }) - -#define CR0_FETCH_PROTECTION_OVERRIDE (1UL << (63 - 38)) -#define CR0_STORAGE_PROTECTION_OVERRIDE (1UL << (63 - 39)) - -static uint8_t __aligned(PAGE_SIZE) mem1[65536]; -static uint8_t __aligned(PAGE_SIZE) mem2[65536]; - -struct test_default { - struct kvm_vm *kvm_vm; - struct test_info vm; - struct test_info vcpu; - struct kvm_run *run; - int size; -}; - -static struct test_default test_default_init(void *guest_code) -{ - struct kvm_vcpu *vcpu; - struct test_default t; - - t.size = min((size_t)kvm_check_cap(KVM_CAP_S390_MEM_OP), sizeof(mem1)); - t.kvm_vm = vm_create_with_one_vcpu(&vcpu, guest_code); - t.vm = (struct test_info) { t.kvm_vm, NULL }; - t.vcpu = (struct test_info) { t.kvm_vm, vcpu }; - t.run = vcpu->run; - return t; -} - -enum stage { - /* Synced state set by host, e.g. DAT */ - STAGE_INITED, - /* Guest did nothing */ - STAGE_IDLED, - /* Guest set storage keys (specifics up to test case) */ - STAGE_SKEYS_SET, - /* Guest copied memory (locations up to test case) */ - STAGE_COPIED, - /* End of guest code reached */ - STAGE_DONE, -}; - -#define HOST_SYNC(info_p, stage) \ -({ \ - struct test_info __info = (info_p); \ - struct kvm_vcpu *__vcpu = __info.vcpu; \ - struct ucall uc; \ - int __stage = (stage); \ - \ - vcpu_run(__vcpu); \ - get_ucall(__vcpu, &uc); \ - if (uc.cmd == UCALL_ABORT) { \ - REPORT_GUEST_ASSERT(uc); \ - } \ - TEST_ASSERT_EQ(uc.cmd, UCALL_SYNC); \ - TEST_ASSERT_EQ(uc.args[1], __stage); \ -}) \ - -static void prepare_mem12(void) -{ - int i; - - for (i = 0; i < sizeof(mem1); i++) - mem1[i] = rand(); - memset(mem2, 0xaa, sizeof(mem2)); -} - -#define ASSERT_MEM_EQ(p1, p2, size) \ - TEST_ASSERT(!memcmp(p1, p2, size), "Memory contents do not match!") - -static void default_write_read(struct test_info copy_cpu, struct test_info mop_cpu, - enum mop_target mop_target, uint32_t size, uint8_t key) -{ - prepare_mem12(); - CHECK_N_DO(MOP, mop_cpu, mop_target, WRITE, mem1, size, - GADDR_V(mem1), KEY(key)); - HOST_SYNC(copy_cpu, STAGE_COPIED); - CHECK_N_DO(MOP, mop_cpu, mop_target, READ, mem2, size, - GADDR_V(mem2), KEY(key)); - ASSERT_MEM_EQ(mem1, mem2, size); -} - -static void default_read(struct test_info copy_cpu, struct test_info mop_cpu, - enum mop_target mop_target, uint32_t size, uint8_t key) -{ - prepare_mem12(); - CHECK_N_DO(MOP, mop_cpu, mop_target, WRITE, mem1, size, GADDR_V(mem1)); - HOST_SYNC(copy_cpu, STAGE_COPIED); - CHECK_N_DO(MOP, mop_cpu, mop_target, READ, mem2, size, - GADDR_V(mem2), KEY(key)); - ASSERT_MEM_EQ(mem1, mem2, size); -} - -static void default_cmpxchg(struct test_default *test, uint8_t key) -{ - for (int size = 1; size <= 16; size *= 2) { - for (int offset = 0; offset < 16; offset += size) { - uint8_t __aligned(16) new[16] = {}; - uint8_t __aligned(16) old[16]; - bool succ; - - prepare_mem12(); - default_write_read(test->vcpu, test->vcpu, LOGICAL, 16, NO_KEY); - - memcpy(&old, mem1, 16); - MOP(test->vm, ABSOLUTE, CMPXCHG, new + offset, - size, GADDR_V(mem1 + offset), - CMPXCHG_OLD(old + offset), - CMPXCHG_SUCCESS(&succ), KEY(key)); - HOST_SYNC(test->vcpu, STAGE_COPIED); - MOP(test->vm, ABSOLUTE, READ, mem2, 16, GADDR_V(mem2)); - TEST_ASSERT(succ, "exchange of values should succeed"); - memcpy(mem1 + offset, new + offset, size); - ASSERT_MEM_EQ(mem1, mem2, 16); - - memcpy(&old, mem1, 16); - new[offset]++; - old[offset]++; - MOP(test->vm, ABSOLUTE, CMPXCHG, new + offset, - size, GADDR_V(mem1 + offset), - CMPXCHG_OLD(old + offset), - CMPXCHG_SUCCESS(&succ), KEY(key)); - HOST_SYNC(test->vcpu, STAGE_COPIED); - MOP(test->vm, ABSOLUTE, READ, mem2, 16, GADDR_V(mem2)); - TEST_ASSERT(!succ, "exchange of values should not succeed"); - ASSERT_MEM_EQ(mem1, mem2, 16); - ASSERT_MEM_EQ(&old, mem1, 16); - } - } -} - -static void guest_copy(void) -{ - GUEST_SYNC(STAGE_INITED); - memcpy(&mem2, &mem1, sizeof(mem2)); - GUEST_SYNC(STAGE_COPIED); -} - -static void test_copy(void) -{ - struct test_default t = test_default_init(guest_copy); - - HOST_SYNC(t.vcpu, STAGE_INITED); - - default_write_read(t.vcpu, t.vcpu, LOGICAL, t.size, NO_KEY); - - kvm_vm_free(t.kvm_vm); -} - -static void test_copy_access_register(void) -{ - struct test_default t = test_default_init(guest_copy); - - HOST_SYNC(t.vcpu, STAGE_INITED); - - prepare_mem12(); - t.run->psw_mask &= ~(3UL << (63 - 17)); - t.run->psw_mask |= 1UL << (63 - 17); /* Enable AR mode */ - - /* - * Primary address space gets used if an access register - * contains zero. The host makes use of AR[1] so is a good - * candidate to ensure the guest AR (of zero) is used. - */ - CHECK_N_DO(MOP, t.vcpu, LOGICAL, WRITE, mem1, t.size, - GADDR_V(mem1), AR(1)); - HOST_SYNC(t.vcpu, STAGE_COPIED); - - CHECK_N_DO(MOP, t.vcpu, LOGICAL, READ, mem2, t.size, - GADDR_V(mem2), AR(1)); - ASSERT_MEM_EQ(mem1, mem2, t.size); - - kvm_vm_free(t.kvm_vm); -} - -static void set_storage_key_range(void *addr, size_t len, uint8_t key) -{ - uintptr_t _addr, abs, i; - int not_mapped = 0; - - _addr = (uintptr_t)addr; - for (i = _addr & PAGE_MASK; i < _addr + len; i += PAGE_SIZE) { - abs = i; - asm volatile ( - "lra %[abs], 0(0,%[abs])\n" - " jz 0f\n" - " llill %[not_mapped],1\n" - " j 1f\n" - "0: sske %[key], %[abs]\n" - "1:" - : [abs] "+&a" (abs), [not_mapped] "+r" (not_mapped) - : [key] "r" (key) - : "cc" - ); - GUEST_ASSERT_EQ(not_mapped, 0); - } -} - -static void guest_copy_key(void) -{ - set_storage_key_range(mem1, sizeof(mem1), 0x90); - set_storage_key_range(mem2, sizeof(mem2), 0x90); - GUEST_SYNC(STAGE_SKEYS_SET); - - for (;;) { - memcpy(&mem2, &mem1, sizeof(mem2)); - GUEST_SYNC(STAGE_COPIED); - } -} - -static void test_copy_key(void) -{ - struct test_default t = test_default_init(guest_copy_key); - - HOST_SYNC(t.vcpu, STAGE_SKEYS_SET); - - /* vm, no key */ - default_write_read(t.vcpu, t.vm, ABSOLUTE, t.size, NO_KEY); - - /* vm/vcpu, machting key or key 0 */ - default_write_read(t.vcpu, t.vcpu, LOGICAL, t.size, 0); - default_write_read(t.vcpu, t.vcpu, LOGICAL, t.size, 9); - default_write_read(t.vcpu, t.vm, ABSOLUTE, t.size, 0); - default_write_read(t.vcpu, t.vm, ABSOLUTE, t.size, 9); - /* - * There used to be different code paths for key handling depending on - * if the region crossed a page boundary. - * There currently are not, but the more tests the merrier. - */ - default_write_read(t.vcpu, t.vcpu, LOGICAL, 1, 0); - default_write_read(t.vcpu, t.vcpu, LOGICAL, 1, 9); - default_write_read(t.vcpu, t.vm, ABSOLUTE, 1, 0); - default_write_read(t.vcpu, t.vm, ABSOLUTE, 1, 9); - - /* vm/vcpu, mismatching keys on read, but no fetch protection */ - default_read(t.vcpu, t.vcpu, LOGICAL, t.size, 2); - default_read(t.vcpu, t.vm, ABSOLUTE, t.size, 2); - - kvm_vm_free(t.kvm_vm); -} - -static void test_cmpxchg_key(void) -{ - struct test_default t = test_default_init(guest_copy_key); - - HOST_SYNC(t.vcpu, STAGE_SKEYS_SET); - - default_cmpxchg(&t, NO_KEY); - default_cmpxchg(&t, 0); - default_cmpxchg(&t, 9); - - kvm_vm_free(t.kvm_vm); -} - -static __uint128_t cut_to_size(int size, __uint128_t val) -{ - switch (size) { - case 1: - return (uint8_t)val; - case 2: - return (uint16_t)val; - case 4: - return (uint32_t)val; - case 8: - return (uint64_t)val; - case 16: - return val; - } - GUEST_FAIL("Invalid size = %u", size); - return 0; -} - -static bool popcount_eq(__uint128_t a, __uint128_t b) -{ - unsigned int count_a, count_b; - - count_a = __builtin_popcountl((uint64_t)(a >> 64)) + - __builtin_popcountl((uint64_t)a); - count_b = __builtin_popcountl((uint64_t)(b >> 64)) + - __builtin_popcountl((uint64_t)b); - return count_a == count_b; -} - -static __uint128_t rotate(int size, __uint128_t val, int amount) -{ - unsigned int bits = size * 8; - - amount = (amount + bits) % bits; - val = cut_to_size(size, val); - if (!amount) - return val; - return (val << (bits - amount)) | (val >> amount); -} - -const unsigned int max_block = 16; - -static void choose_block(bool guest, int i, int *size, int *offset) -{ - unsigned int rand; - - rand = i; - if (guest) { - rand = rand * 19 + 11; - *size = 1 << ((rand % 3) + 2); - rand = rand * 19 + 11; - *offset = (rand % max_block) & ~(*size - 1); - } else { - rand = rand * 17 + 5; - *size = 1 << (rand % 5); - rand = rand * 17 + 5; - *offset = (rand % max_block) & ~(*size - 1); - } -} - -static __uint128_t permutate_bits(bool guest, int i, int size, __uint128_t old) -{ - unsigned int rand; - int amount; - bool swap; - - rand = i; - rand = rand * 3 + 1; - if (guest) - rand = rand * 3 + 1; - swap = rand % 2 == 0; - if (swap) { - int i, j; - __uint128_t new; - uint8_t byte0, byte1; - - rand = rand * 3 + 1; - i = rand % size; - rand = rand * 3 + 1; - j = rand % size; - if (i == j) - return old; - new = rotate(16, old, i * 8); - byte0 = new & 0xff; - new &= ~0xff; - new = rotate(16, new, -i * 8); - new = rotate(16, new, j * 8); - byte1 = new & 0xff; - new = (new & ~0xff) | byte0; - new = rotate(16, new, -j * 8); - new = rotate(16, new, i * 8); - new = new | byte1; - new = rotate(16, new, -i * 8); - return new; - } - rand = rand * 3 + 1; - amount = rand % (size * 8); - return rotate(size, old, amount); -} - -static bool _cmpxchg(int size, void *target, __uint128_t *old_addr, __uint128_t new) -{ - bool ret; - - switch (size) { - case 4: { - uint32_t old = *old_addr; - - asm volatile ("cs %[old],%[new],%[address]" - : [old] "+d" (old), - [address] "+Q" (*(uint32_t *)(target)) - : [new] "d" ((uint32_t)new) - : "cc" - ); - ret = old == (uint32_t)*old_addr; - *old_addr = old; - return ret; - } - case 8: { - uint64_t old = *old_addr; - - asm volatile ("csg %[old],%[new],%[address]" - : [old] "+d" (old), - [address] "+Q" (*(uint64_t *)(target)) - : [new] "d" ((uint64_t)new) - : "cc" - ); - ret = old == (uint64_t)*old_addr; - *old_addr = old; - return ret; - } - case 16: { - __uint128_t old = *old_addr; - - asm volatile ("cdsg %[old],%[new],%[address]" - : [old] "+d" (old), - [address] "+Q" (*(__uint128_t *)(target)) - : [new] "d" (new) - : "cc" - ); - ret = old == *old_addr; - *old_addr = old; - return ret; - } - } - GUEST_FAIL("Invalid size = %u", size); - return 0; -} - -const unsigned int cmpxchg_iter_outer = 100, cmpxchg_iter_inner = 10000; - -static void guest_cmpxchg_key(void) -{ - int size, offset; - __uint128_t old, new; - - set_storage_key_range(mem1, max_block, 0x10); - set_storage_key_range(mem2, max_block, 0x10); - GUEST_SYNC(STAGE_SKEYS_SET); - - for (int i = 0; i < cmpxchg_iter_outer; i++) { - do { - old = 1; - } while (!_cmpxchg(16, mem1, &old, 0)); - for (int j = 0; j < cmpxchg_iter_inner; j++) { - choose_block(true, i + j, &size, &offset); - do { - new = permutate_bits(true, i + j, size, old); - } while (!_cmpxchg(size, mem2 + offset, &old, new)); - } - } - - GUEST_SYNC(STAGE_DONE); -} - -static void *run_guest(void *data) -{ - struct test_info *info = data; - - HOST_SYNC(*info, STAGE_DONE); - return NULL; -} - -static char *quad_to_char(__uint128_t *quad, int size) -{ - return ((char *)quad) + (sizeof(*quad) - size); -} - -static void test_cmpxchg_key_concurrent(void) -{ - struct test_default t = test_default_init(guest_cmpxchg_key); - int size, offset; - __uint128_t old, new; - bool success; - pthread_t thread; - - HOST_SYNC(t.vcpu, STAGE_SKEYS_SET); - prepare_mem12(); - MOP(t.vcpu, LOGICAL, WRITE, mem1, max_block, GADDR_V(mem2)); - pthread_create(&thread, NULL, run_guest, &t.vcpu); - - for (int i = 0; i < cmpxchg_iter_outer; i++) { - do { - old = 0; - new = 1; - MOP(t.vm, ABSOLUTE, CMPXCHG, &new, - sizeof(new), GADDR_V(mem1), - CMPXCHG_OLD(&old), - CMPXCHG_SUCCESS(&success), KEY(1)); - } while (!success); - for (int j = 0; j < cmpxchg_iter_inner; j++) { - choose_block(false, i + j, &size, &offset); - do { - new = permutate_bits(false, i + j, size, old); - MOP(t.vm, ABSOLUTE, CMPXCHG, quad_to_char(&new, size), - size, GADDR_V(mem2 + offset), - CMPXCHG_OLD(quad_to_char(&old, size)), - CMPXCHG_SUCCESS(&success), KEY(1)); - } while (!success); - } - } - - pthread_join(thread, NULL); - - MOP(t.vcpu, LOGICAL, READ, mem2, max_block, GADDR_V(mem2)); - TEST_ASSERT(popcount_eq(*(__uint128_t *)mem1, *(__uint128_t *)mem2), - "Must retain number of set bits"); - - kvm_vm_free(t.kvm_vm); -} - -static void guest_copy_key_fetch_prot(void) -{ - /* - * For some reason combining the first sync with override enablement - * results in an exception when calling HOST_SYNC. - */ - GUEST_SYNC(STAGE_INITED); - /* Storage protection override applies to both store and fetch. */ - set_storage_key_range(mem1, sizeof(mem1), 0x98); - set_storage_key_range(mem2, sizeof(mem2), 0x98); - GUEST_SYNC(STAGE_SKEYS_SET); - - for (;;) { - memcpy(&mem2, &mem1, sizeof(mem2)); - GUEST_SYNC(STAGE_COPIED); - } -} - -static void test_copy_key_storage_prot_override(void) -{ - struct test_default t = test_default_init(guest_copy_key_fetch_prot); - - HOST_SYNC(t.vcpu, STAGE_INITED); - t.run->s.regs.crs[0] |= CR0_STORAGE_PROTECTION_OVERRIDE; - t.run->kvm_dirty_regs = KVM_SYNC_CRS; - HOST_SYNC(t.vcpu, STAGE_SKEYS_SET); - - /* vcpu, mismatching keys, storage protection override in effect */ - default_write_read(t.vcpu, t.vcpu, LOGICAL, t.size, 2); - - kvm_vm_free(t.kvm_vm); -} - -static void test_copy_key_fetch_prot(void) -{ - struct test_default t = test_default_init(guest_copy_key_fetch_prot); - - HOST_SYNC(t.vcpu, STAGE_INITED); - HOST_SYNC(t.vcpu, STAGE_SKEYS_SET); - - /* vm/vcpu, matching key, fetch protection in effect */ - default_read(t.vcpu, t.vcpu, LOGICAL, t.size, 9); - default_read(t.vcpu, t.vm, ABSOLUTE, t.size, 9); - - kvm_vm_free(t.kvm_vm); -} - -#define ERR_PROT_MOP(...) \ -({ \ - int rv; \ - \ - rv = ERR_MOP(__VA_ARGS__); \ - TEST_ASSERT(rv == 4, "Should result in protection exception"); \ -}) - -static void guest_error_key(void) -{ - GUEST_SYNC(STAGE_INITED); - set_storage_key_range(mem1, PAGE_SIZE, 0x18); - set_storage_key_range(mem1 + PAGE_SIZE, sizeof(mem1) - PAGE_SIZE, 0x98); - GUEST_SYNC(STAGE_SKEYS_SET); - GUEST_SYNC(STAGE_IDLED); -} - -static void test_errors_key(void) -{ - struct test_default t = test_default_init(guest_error_key); - - HOST_SYNC(t.vcpu, STAGE_INITED); - HOST_SYNC(t.vcpu, STAGE_SKEYS_SET); - - /* vm/vcpu, mismatching keys, fetch protection in effect */ - CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, WRITE, mem1, t.size, GADDR_V(mem1), KEY(2)); - CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, READ, mem2, t.size, GADDR_V(mem1), KEY(2)); - CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, WRITE, mem1, t.size, GADDR_V(mem1), KEY(2)); - CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, READ, mem2, t.size, GADDR_V(mem1), KEY(2)); - - kvm_vm_free(t.kvm_vm); -} - -static void test_errors_cmpxchg_key(void) -{ - struct test_default t = test_default_init(guest_copy_key_fetch_prot); - int i; - - HOST_SYNC(t.vcpu, STAGE_INITED); - HOST_SYNC(t.vcpu, STAGE_SKEYS_SET); - - for (i = 1; i <= 16; i *= 2) { - __uint128_t old = 0; - - ERR_PROT_MOP(t.vm, ABSOLUTE, CMPXCHG, mem2, i, GADDR_V(mem2), - CMPXCHG_OLD(&old), KEY(2)); - } - - kvm_vm_free(t.kvm_vm); -} - -static void test_termination(void) -{ - struct test_default t = test_default_init(guest_error_key); - uint64_t prefix; - uint64_t teid; - uint64_t teid_mask = BIT(63 - 56) | BIT(63 - 60) | BIT(63 - 61); - uint64_t psw[2]; - - HOST_SYNC(t.vcpu, STAGE_INITED); - HOST_SYNC(t.vcpu, STAGE_SKEYS_SET); - - /* vcpu, mismatching keys after first page */ - ERR_PROT_MOP(t.vcpu, LOGICAL, WRITE, mem1, t.size, GADDR_V(mem1), KEY(1), INJECT); - /* - * The memop injected a program exception and the test needs to check the - * Translation-Exception Identification (TEID). It is necessary to run - * the guest in order to be able to read the TEID from guest memory. - * Set the guest program new PSW, so the guest state is not clobbered. - */ - prefix = t.run->s.regs.prefix; - psw[0] = t.run->psw_mask; - psw[1] = t.run->psw_addr; - MOP(t.vm, ABSOLUTE, WRITE, psw, sizeof(psw), GADDR(prefix + 464)); - HOST_SYNC(t.vcpu, STAGE_IDLED); - MOP(t.vm, ABSOLUTE, READ, &teid, sizeof(teid), GADDR(prefix + 168)); - /* Bits 56, 60, 61 form a code, 0 being the only one allowing for termination */ - TEST_ASSERT_EQ(teid & teid_mask, 0); - - kvm_vm_free(t.kvm_vm); -} - -static void test_errors_key_storage_prot_override(void) -{ - struct test_default t = test_default_init(guest_copy_key_fetch_prot); - - HOST_SYNC(t.vcpu, STAGE_INITED); - t.run->s.regs.crs[0] |= CR0_STORAGE_PROTECTION_OVERRIDE; - t.run->kvm_dirty_regs = KVM_SYNC_CRS; - HOST_SYNC(t.vcpu, STAGE_SKEYS_SET); - - /* vm, mismatching keys, storage protection override not applicable to vm */ - CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, WRITE, mem1, t.size, GADDR_V(mem1), KEY(2)); - CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, READ, mem2, t.size, GADDR_V(mem2), KEY(2)); - - kvm_vm_free(t.kvm_vm); -} - -const uint64_t last_page_addr = -PAGE_SIZE; - -static void guest_copy_key_fetch_prot_override(void) -{ - int i; - char *page_0 = 0; - - GUEST_SYNC(STAGE_INITED); - set_storage_key_range(0, PAGE_SIZE, 0x18); - set_storage_key_range((void *)last_page_addr, PAGE_SIZE, 0x0); - asm volatile ("sske %[key],%[addr]\n" :: [addr] "r"(0L), [key] "r"(0x18) : "cc"); - GUEST_SYNC(STAGE_SKEYS_SET); - - for (;;) { - for (i = 0; i < PAGE_SIZE; i++) - page_0[i] = mem1[i]; - GUEST_SYNC(STAGE_COPIED); - } -} - -static void test_copy_key_fetch_prot_override(void) -{ - struct test_default t = test_default_init(guest_copy_key_fetch_prot_override); - vm_vaddr_t guest_0_page, guest_last_page; - - guest_0_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, 0); - guest_last_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, last_page_addr); - if (guest_0_page != 0 || guest_last_page != last_page_addr) { - print_skip("did not allocate guest pages at required positions"); - goto out; - } - - HOST_SYNC(t.vcpu, STAGE_INITED); - t.run->s.regs.crs[0] |= CR0_FETCH_PROTECTION_OVERRIDE; - t.run->kvm_dirty_regs = KVM_SYNC_CRS; - HOST_SYNC(t.vcpu, STAGE_SKEYS_SET); - - /* vcpu, mismatching keys on fetch, fetch protection override applies */ - prepare_mem12(); - MOP(t.vcpu, LOGICAL, WRITE, mem1, PAGE_SIZE, GADDR_V(mem1)); - HOST_SYNC(t.vcpu, STAGE_COPIED); - CHECK_N_DO(MOP, t.vcpu, LOGICAL, READ, mem2, 2048, GADDR_V(guest_0_page), KEY(2)); - ASSERT_MEM_EQ(mem1, mem2, 2048); - - /* - * vcpu, mismatching keys on fetch, fetch protection override applies, - * wraparound - */ - prepare_mem12(); - MOP(t.vcpu, LOGICAL, WRITE, mem1, 2 * PAGE_SIZE, GADDR_V(guest_last_page)); - HOST_SYNC(t.vcpu, STAGE_COPIED); - CHECK_N_DO(MOP, t.vcpu, LOGICAL, READ, mem2, PAGE_SIZE + 2048, - GADDR_V(guest_last_page), KEY(2)); - ASSERT_MEM_EQ(mem1, mem2, 2048); - -out: - kvm_vm_free(t.kvm_vm); -} - -static void test_errors_key_fetch_prot_override_not_enabled(void) -{ - struct test_default t = test_default_init(guest_copy_key_fetch_prot_override); - vm_vaddr_t guest_0_page, guest_last_page; - - guest_0_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, 0); - guest_last_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, last_page_addr); - if (guest_0_page != 0 || guest_last_page != last_page_addr) { - print_skip("did not allocate guest pages at required positions"); - goto out; - } - HOST_SYNC(t.vcpu, STAGE_INITED); - HOST_SYNC(t.vcpu, STAGE_SKEYS_SET); - - /* vcpu, mismatching keys on fetch, fetch protection override not enabled */ - CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, READ, mem2, 2048, GADDR_V(0), KEY(2)); - -out: - kvm_vm_free(t.kvm_vm); -} - -static void test_errors_key_fetch_prot_override_enabled(void) -{ - struct test_default t = test_default_init(guest_copy_key_fetch_prot_override); - vm_vaddr_t guest_0_page, guest_last_page; - - guest_0_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, 0); - guest_last_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, last_page_addr); - if (guest_0_page != 0 || guest_last_page != last_page_addr) { - print_skip("did not allocate guest pages at required positions"); - goto out; - } - HOST_SYNC(t.vcpu, STAGE_INITED); - t.run->s.regs.crs[0] |= CR0_FETCH_PROTECTION_OVERRIDE; - t.run->kvm_dirty_regs = KVM_SYNC_CRS; - HOST_SYNC(t.vcpu, STAGE_SKEYS_SET); - - /* - * vcpu, mismatching keys on fetch, - * fetch protection override does not apply because memory range exceeded - */ - CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, READ, mem2, 2048 + 1, GADDR_V(0), KEY(2)); - CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, READ, mem2, PAGE_SIZE + 2048 + 1, - GADDR_V(guest_last_page), KEY(2)); - /* vm, fetch protected override does not apply */ - CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, READ, mem2, 2048, GADDR(0), KEY(2)); - CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, READ, mem2, 2048, GADDR_V(guest_0_page), KEY(2)); - -out: - kvm_vm_free(t.kvm_vm); -} - -static void guest_idle(void) -{ - GUEST_SYNC(STAGE_INITED); /* for consistency's sake */ - for (;;) - GUEST_SYNC(STAGE_IDLED); -} - -static void _test_errors_common(struct test_info info, enum mop_target target, int size) -{ - int rv; - - /* Bad size: */ - rv = ERR_MOP(info, target, WRITE, mem1, -1, GADDR_V(mem1)); - TEST_ASSERT(rv == -1 && errno == E2BIG, "ioctl allows insane sizes"); - - /* Zero size: */ - rv = ERR_MOP(info, target, WRITE, mem1, 0, GADDR_V(mem1)); - TEST_ASSERT(rv == -1 && (errno == EINVAL || errno == ENOMEM), - "ioctl allows 0 as size"); - - /* Bad flags: */ - rv = ERR_MOP(info, target, WRITE, mem1, size, GADDR_V(mem1), SET_FLAGS(-1)); - TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows all flags"); - - /* Bad guest address: */ - rv = ERR_MOP(info, target, WRITE, mem1, size, GADDR((void *)~0xfffUL), CHECK_ONLY); - TEST_ASSERT(rv > 0, "ioctl does not report bad guest memory address with CHECK_ONLY"); - rv = ERR_MOP(info, target, WRITE, mem1, size, GADDR((void *)~0xfffUL)); - TEST_ASSERT(rv > 0, "ioctl does not report bad guest memory address on write"); - - /* Bad host address: */ - rv = ERR_MOP(info, target, WRITE, 0, size, GADDR_V(mem1)); - TEST_ASSERT(rv == -1 && errno == EFAULT, - "ioctl does not report bad host memory address"); - - /* Bad key: */ - rv = ERR_MOP(info, target, WRITE, mem1, size, GADDR_V(mem1), KEY(17)); - TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows invalid key"); -} - -static void test_errors(void) -{ - struct test_default t = test_default_init(guest_idle); - int rv; - - HOST_SYNC(t.vcpu, STAGE_INITED); - - _test_errors_common(t.vcpu, LOGICAL, t.size); - _test_errors_common(t.vm, ABSOLUTE, t.size); - - /* Bad operation: */ - rv = ERR_MOP(t.vcpu, INVALID, WRITE, mem1, t.size, GADDR_V(mem1)); - TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows bad operations"); - /* virtual addresses are not translated when passing INVALID */ - rv = ERR_MOP(t.vm, INVALID, WRITE, mem1, PAGE_SIZE, GADDR(0)); - TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows bad operations"); - - /* Bad access register: */ - t.run->psw_mask &= ~(3UL << (63 - 17)); - t.run->psw_mask |= 1UL << (63 - 17); /* Enable AR mode */ - HOST_SYNC(t.vcpu, STAGE_IDLED); /* To sync new state to SIE block */ - rv = ERR_MOP(t.vcpu, LOGICAL, WRITE, mem1, t.size, GADDR_V(mem1), AR(17)); - TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows ARs > 15"); - t.run->psw_mask &= ~(3UL << (63 - 17)); /* Disable AR mode */ - HOST_SYNC(t.vcpu, STAGE_IDLED); /* Run to sync new state */ - - /* Check that the SIDA calls are rejected for non-protected guests */ - rv = ERR_MOP(t.vcpu, SIDA, READ, mem1, 8, GADDR(0), SIDA_OFFSET(0x1c0)); - TEST_ASSERT(rv == -1 && errno == EINVAL, - "ioctl does not reject SIDA_READ in non-protected mode"); - rv = ERR_MOP(t.vcpu, SIDA, WRITE, mem1, 8, GADDR(0), SIDA_OFFSET(0x1c0)); - TEST_ASSERT(rv == -1 && errno == EINVAL, - "ioctl does not reject SIDA_WRITE in non-protected mode"); - - kvm_vm_free(t.kvm_vm); -} - -static void test_errors_cmpxchg(void) -{ - struct test_default t = test_default_init(guest_idle); - __uint128_t old; - int rv, i, power = 1; - - HOST_SYNC(t.vcpu, STAGE_INITED); - - for (i = 0; i < 32; i++) { - if (i == power) { - power *= 2; - continue; - } - rv = ERR_MOP(t.vm, ABSOLUTE, CMPXCHG, mem1, i, GADDR_V(mem1), - CMPXCHG_OLD(&old)); - TEST_ASSERT(rv == -1 && errno == EINVAL, - "ioctl allows bad size for cmpxchg"); - } - for (i = 1; i <= 16; i *= 2) { - rv = ERR_MOP(t.vm, ABSOLUTE, CMPXCHG, mem1, i, GADDR((void *)~0xfffUL), - CMPXCHG_OLD(&old)); - TEST_ASSERT(rv > 0, "ioctl allows bad guest address for cmpxchg"); - } - for (i = 2; i <= 16; i *= 2) { - rv = ERR_MOP(t.vm, ABSOLUTE, CMPXCHG, mem1, i, GADDR_V(mem1 + 1), - CMPXCHG_OLD(&old)); - TEST_ASSERT(rv == -1 && errno == EINVAL, - "ioctl allows bad alignment for cmpxchg"); - } - - kvm_vm_free(t.kvm_vm); -} - -int main(int argc, char *argv[]) -{ - int extension_cap, idx; - - TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_MEM_OP)); - extension_cap = kvm_check_cap(KVM_CAP_S390_MEM_OP_EXTENSION); - - struct testdef { - const char *name; - void (*test)(void); - bool requirements_met; - } testlist[] = { - { - .name = "simple copy", - .test = test_copy, - .requirements_met = true, - }, - { - .name = "generic error checks", - .test = test_errors, - .requirements_met = true, - }, - { - .name = "copy with storage keys", - .test = test_copy_key, - .requirements_met = extension_cap > 0, - }, - { - .name = "cmpxchg with storage keys", - .test = test_cmpxchg_key, - .requirements_met = extension_cap & 0x2, - }, - { - .name = "concurrently cmpxchg with storage keys", - .test = test_cmpxchg_key_concurrent, - .requirements_met = extension_cap & 0x2, - }, - { - .name = "copy with key storage protection override", - .test = test_copy_key_storage_prot_override, - .requirements_met = extension_cap > 0, - }, - { - .name = "copy with key fetch protection", - .test = test_copy_key_fetch_prot, - .requirements_met = extension_cap > 0, - }, - { - .name = "copy with key fetch protection override", - .test = test_copy_key_fetch_prot_override, - .requirements_met = extension_cap > 0, - }, - { - .name = "copy with access register mode", - .test = test_copy_access_register, - .requirements_met = true, - }, - { - .name = "error checks with key", - .test = test_errors_key, - .requirements_met = extension_cap > 0, - }, - { - .name = "error checks for cmpxchg with key", - .test = test_errors_cmpxchg_key, - .requirements_met = extension_cap & 0x2, - }, - { - .name = "error checks for cmpxchg", - .test = test_errors_cmpxchg, - .requirements_met = extension_cap & 0x2, - }, - { - .name = "termination", - .test = test_termination, - .requirements_met = extension_cap > 0, - }, - { - .name = "error checks with key storage protection override", - .test = test_errors_key_storage_prot_override, - .requirements_met = extension_cap > 0, - }, - { - .name = "error checks without key fetch prot override", - .test = test_errors_key_fetch_prot_override_not_enabled, - .requirements_met = extension_cap > 0, - }, - { - .name = "error checks with key fetch prot override", - .test = test_errors_key_fetch_prot_override_enabled, - .requirements_met = extension_cap > 0, - }, - }; - - ksft_print_header(); - ksft_set_plan(ARRAY_SIZE(testlist)); - - for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) { - if (testlist[idx].requirements_met) { - testlist[idx].test(); - ksft_test_result_pass("%s\n", testlist[idx].name); - } else { - ksft_test_result_skip("%s - requirements not met (kernel has extension cap %#x)\n", - testlist[idx].name, extension_cap); - } - } - - ksft_finished(); /* Print results and exit() accordingly */ -} diff --git a/tools/testing/selftests/kvm/s390x/resets.c b/tools/testing/selftests/kvm/s390x/resets.c deleted file mode 100644 index b58f75b381e5..000000000000 --- a/tools/testing/selftests/kvm/s390x/resets.c +++ /dev/null @@ -1,313 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Test for s390x CPU resets - * - * Copyright (C) 2020, IBM - */ - -#include -#include -#include -#include - -#include "test_util.h" -#include "kvm_util.h" -#include "kselftest.h" - -#define LOCAL_IRQS 32 - -#define ARBITRARY_NON_ZERO_VCPU_ID 3 - -struct kvm_s390_irq buf[ARBITRARY_NON_ZERO_VCPU_ID + LOCAL_IRQS]; - -static uint8_t regs_null[512]; - -static void guest_code_initial(void) -{ - /* set several CRs to "safe" value */ - unsigned long cr2_59 = 0x10; /* enable guarded storage */ - unsigned long cr8_63 = 0x1; /* monitor mask = 1 */ - unsigned long cr10 = 1; /* PER START */ - unsigned long cr11 = -1; /* PER END */ - - - /* Dirty registers */ - asm volatile ( - " lghi 2,0x11\n" /* Round toward 0 */ - " sfpc 2\n" /* set fpc to !=0 */ - " lctlg 2,2,%0\n" - " lctlg 8,8,%1\n" - " lctlg 10,10,%2\n" - " lctlg 11,11,%3\n" - /* now clobber some general purpose regs */ - " llihh 0,0xffff\n" - " llihl 1,0x5555\n" - " llilh 2,0xaaaa\n" - " llill 3,0x0000\n" - /* now clobber a floating point reg */ - " lghi 4,0x1\n" - " cdgbr 0,4\n" - /* now clobber an access reg */ - " sar 9,4\n" - /* We embed diag 501 here to control register content */ - " diag 0,0,0x501\n" - : - : "m" (cr2_59), "m" (cr8_63), "m" (cr10), "m" (cr11) - /* no clobber list as this should not return */ - ); -} - -static void test_one_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t value) -{ - uint64_t eval_reg; - - eval_reg = vcpu_get_reg(vcpu, id); - TEST_ASSERT(eval_reg == value, "value == 0x%lx", value); -} - -static void assert_noirq(struct kvm_vcpu *vcpu) -{ - struct kvm_s390_irq_state irq_state; - int irqs; - - irq_state.len = sizeof(buf); - irq_state.buf = (unsigned long)buf; - irqs = __vcpu_ioctl(vcpu, KVM_S390_GET_IRQ_STATE, &irq_state); - /* - * irqs contains the number of retrieved interrupts. Any interrupt - * (notably, the emergency call interrupt we have injected) should - * be cleared by the resets, so this should be 0. - */ - TEST_ASSERT(irqs >= 0, "Could not fetch IRQs: errno %d", errno); - TEST_ASSERT(!irqs, "IRQ pending"); -} - -static void assert_clear(struct kvm_vcpu *vcpu) -{ - struct kvm_sync_regs *sync_regs = &vcpu->run->s.regs; - struct kvm_sregs sregs; - struct kvm_regs regs; - struct kvm_fpu fpu; - - vcpu_regs_get(vcpu, ®s); - TEST_ASSERT(!memcmp(®s.gprs, regs_null, sizeof(regs.gprs)), "grs == 0"); - - vcpu_sregs_get(vcpu, &sregs); - TEST_ASSERT(!memcmp(&sregs.acrs, regs_null, sizeof(sregs.acrs)), "acrs == 0"); - - vcpu_fpu_get(vcpu, &fpu); - TEST_ASSERT(!memcmp(&fpu.fprs, regs_null, sizeof(fpu.fprs)), "fprs == 0"); - - /* sync regs */ - TEST_ASSERT(!memcmp(sync_regs->gprs, regs_null, sizeof(sync_regs->gprs)), - "gprs0-15 == 0 (sync_regs)"); - - TEST_ASSERT(!memcmp(sync_regs->acrs, regs_null, sizeof(sync_regs->acrs)), - "acrs0-15 == 0 (sync_regs)"); - - TEST_ASSERT(!memcmp(sync_regs->vrs, regs_null, sizeof(sync_regs->vrs)), - "vrs0-15 == 0 (sync_regs)"); -} - -static void assert_initial_noclear(struct kvm_vcpu *vcpu) -{ - struct kvm_sync_regs *sync_regs = &vcpu->run->s.regs; - - TEST_ASSERT(sync_regs->gprs[0] == 0xffff000000000000UL, - "gpr0 == 0xffff000000000000 (sync_regs)"); - TEST_ASSERT(sync_regs->gprs[1] == 0x0000555500000000UL, - "gpr1 == 0x0000555500000000 (sync_regs)"); - TEST_ASSERT(sync_regs->gprs[2] == 0x00000000aaaa0000UL, - "gpr2 == 0x00000000aaaa0000 (sync_regs)"); - TEST_ASSERT(sync_regs->gprs[3] == 0x0000000000000000UL, - "gpr3 == 0x0000000000000000 (sync_regs)"); - TEST_ASSERT(sync_regs->fprs[0] == 0x3ff0000000000000UL, - "fpr0 == 0f1 (sync_regs)"); - TEST_ASSERT(sync_regs->acrs[9] == 1, "ar9 == 1 (sync_regs)"); -} - -static void assert_initial(struct kvm_vcpu *vcpu) -{ - struct kvm_sync_regs *sync_regs = &vcpu->run->s.regs; - struct kvm_sregs sregs; - struct kvm_fpu fpu; - - /* KVM_GET_SREGS */ - vcpu_sregs_get(vcpu, &sregs); - TEST_ASSERT(sregs.crs[0] == 0xE0UL, "cr0 == 0xE0 (KVM_GET_SREGS)"); - TEST_ASSERT(sregs.crs[14] == 0xC2000000UL, - "cr14 == 0xC2000000 (KVM_GET_SREGS)"); - TEST_ASSERT(!memcmp(&sregs.crs[1], regs_null, sizeof(sregs.crs[1]) * 12), - "cr1-13 == 0 (KVM_GET_SREGS)"); - TEST_ASSERT(sregs.crs[15] == 0, "cr15 == 0 (KVM_GET_SREGS)"); - - /* sync regs */ - TEST_ASSERT(sync_regs->crs[0] == 0xE0UL, "cr0 == 0xE0 (sync_regs)"); - TEST_ASSERT(sync_regs->crs[14] == 0xC2000000UL, - "cr14 == 0xC2000000 (sync_regs)"); - TEST_ASSERT(!memcmp(&sync_regs->crs[1], regs_null, 8 * 12), - "cr1-13 == 0 (sync_regs)"); - TEST_ASSERT(sync_regs->crs[15] == 0, "cr15 == 0 (sync_regs)"); - TEST_ASSERT(sync_regs->fpc == 0, "fpc == 0 (sync_regs)"); - TEST_ASSERT(sync_regs->todpr == 0, "todpr == 0 (sync_regs)"); - TEST_ASSERT(sync_regs->cputm == 0, "cputm == 0 (sync_regs)"); - TEST_ASSERT(sync_regs->ckc == 0, "ckc == 0 (sync_regs)"); - TEST_ASSERT(sync_regs->pp == 0, "pp == 0 (sync_regs)"); - TEST_ASSERT(sync_regs->gbea == 1, "gbea == 1 (sync_regs)"); - - /* kvm_run */ - TEST_ASSERT(vcpu->run->psw_addr == 0, "psw_addr == 0 (kvm_run)"); - TEST_ASSERT(vcpu->run->psw_mask == 0, "psw_mask == 0 (kvm_run)"); - - vcpu_fpu_get(vcpu, &fpu); - TEST_ASSERT(!fpu.fpc, "fpc == 0"); - - test_one_reg(vcpu, KVM_REG_S390_GBEA, 1); - test_one_reg(vcpu, KVM_REG_S390_PP, 0); - test_one_reg(vcpu, KVM_REG_S390_TODPR, 0); - test_one_reg(vcpu, KVM_REG_S390_CPU_TIMER, 0); - test_one_reg(vcpu, KVM_REG_S390_CLOCK_COMP, 0); -} - -static void assert_normal_noclear(struct kvm_vcpu *vcpu) -{ - struct kvm_sync_regs *sync_regs = &vcpu->run->s.regs; - - TEST_ASSERT(sync_regs->crs[2] == 0x10, "cr2 == 10 (sync_regs)"); - TEST_ASSERT(sync_regs->crs[8] == 1, "cr10 == 1 (sync_regs)"); - TEST_ASSERT(sync_regs->crs[10] == 1, "cr10 == 1 (sync_regs)"); - TEST_ASSERT(sync_regs->crs[11] == -1, "cr11 == -1 (sync_regs)"); -} - -static void assert_normal(struct kvm_vcpu *vcpu) -{ - test_one_reg(vcpu, KVM_REG_S390_PFTOKEN, KVM_S390_PFAULT_TOKEN_INVALID); - TEST_ASSERT(vcpu->run->s.regs.pft == KVM_S390_PFAULT_TOKEN_INVALID, - "pft == 0xff..... (sync_regs)"); - assert_noirq(vcpu); -} - -static void inject_irq(struct kvm_vcpu *vcpu) -{ - struct kvm_s390_irq_state irq_state; - struct kvm_s390_irq *irq = &buf[0]; - int irqs; - - /* Inject IRQ */ - irq_state.len = sizeof(struct kvm_s390_irq); - irq_state.buf = (unsigned long)buf; - irq->type = KVM_S390_INT_EMERGENCY; - irq->u.emerg.code = vcpu->id; - irqs = __vcpu_ioctl(vcpu, KVM_S390_SET_IRQ_STATE, &irq_state); - TEST_ASSERT(irqs >= 0, "Error injecting EMERGENCY IRQ errno %d", errno); -} - -static struct kvm_vm *create_vm(struct kvm_vcpu **vcpu) -{ - struct kvm_vm *vm; - - vm = vm_create(1); - - *vcpu = vm_vcpu_add(vm, ARBITRARY_NON_ZERO_VCPU_ID, guest_code_initial); - - return vm; -} - -static void test_normal(void) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - - ksft_print_msg("Testing normal reset\n"); - vm = create_vm(&vcpu); - - vcpu_run(vcpu); - - inject_irq(vcpu); - - vcpu_ioctl(vcpu, KVM_S390_NORMAL_RESET, NULL); - - /* must clears */ - assert_normal(vcpu); - /* must not clears */ - assert_normal_noclear(vcpu); - assert_initial_noclear(vcpu); - - kvm_vm_free(vm); -} - -static void test_initial(void) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - - ksft_print_msg("Testing initial reset\n"); - vm = create_vm(&vcpu); - - vcpu_run(vcpu); - - inject_irq(vcpu); - - vcpu_ioctl(vcpu, KVM_S390_INITIAL_RESET, NULL); - - /* must clears */ - assert_normal(vcpu); - assert_initial(vcpu); - /* must not clears */ - assert_initial_noclear(vcpu); - - kvm_vm_free(vm); -} - -static void test_clear(void) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - - ksft_print_msg("Testing clear reset\n"); - vm = create_vm(&vcpu); - - vcpu_run(vcpu); - - inject_irq(vcpu); - - vcpu_ioctl(vcpu, KVM_S390_CLEAR_RESET, NULL); - - /* must clears */ - assert_normal(vcpu); - assert_initial(vcpu); - assert_clear(vcpu); - - kvm_vm_free(vm); -} - -struct testdef { - const char *name; - void (*test)(void); - bool needs_cap; -} testlist[] = { - { "initial", test_initial, false }, - { "normal", test_normal, true }, - { "clear", test_clear, true }, -}; - -int main(int argc, char *argv[]) -{ - bool has_s390_vcpu_resets = kvm_check_cap(KVM_CAP_S390_VCPU_RESETS); - int idx; - - ksft_print_header(); - ksft_set_plan(ARRAY_SIZE(testlist)); - - for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) { - if (!testlist[idx].needs_cap || has_s390_vcpu_resets) { - testlist[idx].test(); - ksft_test_result_pass("%s\n", testlist[idx].name); - } else { - ksft_test_result_skip("%s - no VCPU_RESETS capability\n", - testlist[idx].name); - } - } - - ksft_finished(); /* Print results and exit() accordingly */ -} diff --git a/tools/testing/selftests/kvm/s390x/shared_zeropage_test.c b/tools/testing/selftests/kvm/s390x/shared_zeropage_test.c deleted file mode 100644 index bba0d9a6dcc8..000000000000 --- a/tools/testing/selftests/kvm/s390x/shared_zeropage_test.c +++ /dev/null @@ -1,111 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Test shared zeropage handling (with/without storage keys) - * - * Copyright (C) 2024, Red Hat, Inc. - */ -#include - -#include - -#include "test_util.h" -#include "kvm_util.h" -#include "kselftest.h" -#include "ucall_common.h" - -static void set_storage_key(void *addr, uint8_t skey) -{ - asm volatile("sske %0,%1" : : "d" (skey), "a" (addr)); -} - -static void guest_code(void) -{ - /* Issue some storage key instruction. */ - set_storage_key((void *)0, 0x98); - GUEST_DONE(); -} - -/* - * Returns 1 if the shared zeropage is mapped, 0 if something else is mapped. - * Returns < 0 on error or if nothing is mapped. - */ -static int maps_shared_zeropage(int pagemap_fd, void *addr) -{ - struct page_region region; - struct pm_scan_arg arg = { - .start = (uintptr_t)addr, - .end = (uintptr_t)addr + 4096, - .vec = (uintptr_t)®ion, - .vec_len = 1, - .size = sizeof(struct pm_scan_arg), - .category_mask = PAGE_IS_PFNZERO, - .category_anyof_mask = PAGE_IS_PRESENT, - .return_mask = PAGE_IS_PFNZERO, - }; - return ioctl(pagemap_fd, PAGEMAP_SCAN, &arg); -} - -int main(int argc, char *argv[]) -{ - char *mem, *page0, *page1, *page2, tmp; - const size_t pagesize = getpagesize(); - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - struct ucall uc; - int pagemap_fd; - - ksft_print_header(); - ksft_set_plan(3); - - /* - * We'll use memory that is not mapped into the VM for simplicity. - * Shared zeropages are enabled/disabled per-process. - */ - mem = mmap(0, 3 * pagesize, PROT_READ, MAP_PRIVATE | MAP_ANON, -1, 0); - TEST_ASSERT(mem != MAP_FAILED, "mmap() failed"); - - /* Disable THP. Ignore errors on older kernels. */ - madvise(mem, 3 * pagesize, MADV_NOHUGEPAGE); - - page0 = mem; - page1 = page0 + pagesize; - page2 = page1 + pagesize; - - /* Can we even detect shared zeropages? */ - pagemap_fd = open("/proc/self/pagemap", O_RDONLY); - TEST_REQUIRE(pagemap_fd >= 0); - - tmp = *page0; - asm volatile("" : "+r" (tmp)); - TEST_REQUIRE(maps_shared_zeropage(pagemap_fd, page0) == 1); - - vm = vm_create_with_one_vcpu(&vcpu, guest_code); - - /* Verify that we get the shared zeropage after VM creation. */ - tmp = *page1; - asm volatile("" : "+r" (tmp)); - ksft_test_result(maps_shared_zeropage(pagemap_fd, page1) == 1, - "Shared zeropages should be enabled\n"); - - /* - * Let our VM execute a storage key instruction that should - * unshare all shared zeropages. - */ - vcpu_run(vcpu); - get_ucall(vcpu, &uc); - TEST_ASSERT_EQ(uc.cmd, UCALL_DONE); - - /* Verify that we don't have a shared zeropage anymore. */ - ksft_test_result(!maps_shared_zeropage(pagemap_fd, page1), - "Shared zeropage should be gone\n"); - - /* Verify that we don't get any new shared zeropages. */ - tmp = *page2; - asm volatile("" : "+r" (tmp)); - ksft_test_result(!maps_shared_zeropage(pagemap_fd, page2), - "Shared zeropages should be disabled\n"); - - kvm_vm_free(vm); - - ksft_finished(); -} diff --git a/tools/testing/selftests/kvm/s390x/sync_regs_test.c b/tools/testing/selftests/kvm/s390x/sync_regs_test.c deleted file mode 100644 index 53def355ccba..000000000000 --- a/tools/testing/selftests/kvm/s390x/sync_regs_test.c +++ /dev/null @@ -1,238 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Test for s390x KVM_CAP_SYNC_REGS - * - * Based on the same test for x86: - * Copyright (C) 2018, Google LLC. - * - * Adaptions for s390x: - * Copyright (C) 2019, Red Hat, Inc. - * - * Test expected behavior of the KVM_CAP_SYNC_REGS functionality. - */ -#include -#include -#include -#include -#include - -#include "test_util.h" -#include "kvm_util.h" -#include "diag318_test_handler.h" -#include "kselftest.h" - -static void guest_code(void) -{ - /* - * We embed diag 501 here instead of doing a ucall to avoid that - * the compiler has messed with r11 at the time of the ucall. - */ - asm volatile ( - "0: diag 0,0,0x501\n" - " ahi 11,1\n" - " j 0b\n" - ); -} - -#define REG_COMPARE(reg) \ - TEST_ASSERT(left->reg == right->reg, \ - "Register " #reg \ - " values did not match: 0x%llx, 0x%llx", \ - left->reg, right->reg) - -#define REG_COMPARE32(reg) \ - TEST_ASSERT(left->reg == right->reg, \ - "Register " #reg \ - " values did not match: 0x%x, 0x%x", \ - left->reg, right->reg) - - -static void compare_regs(struct kvm_regs *left, struct kvm_sync_regs *right) -{ - int i; - - for (i = 0; i < 16; i++) - REG_COMPARE(gprs[i]); -} - -static void compare_sregs(struct kvm_sregs *left, struct kvm_sync_regs *right) -{ - int i; - - for (i = 0; i < 16; i++) - REG_COMPARE32(acrs[i]); - - for (i = 0; i < 16; i++) - REG_COMPARE(crs[i]); -} - -#undef REG_COMPARE - -#define TEST_SYNC_FIELDS (KVM_SYNC_GPRS|KVM_SYNC_ACRS|KVM_SYNC_CRS|KVM_SYNC_DIAG318) -#define INVALID_SYNC_FIELD 0x80000000 - -void test_read_invalid(struct kvm_vcpu *vcpu) -{ - struct kvm_run *run = vcpu->run; - int rv; - - /* Request reading invalid register set from VCPU. */ - run->kvm_valid_regs = INVALID_SYNC_FIELD; - rv = _vcpu_run(vcpu); - TEST_ASSERT(rv < 0 && errno == EINVAL, - "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d", - rv); - run->kvm_valid_regs = 0; - - run->kvm_valid_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS; - rv = _vcpu_run(vcpu); - TEST_ASSERT(rv < 0 && errno == EINVAL, - "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d", - rv); - run->kvm_valid_regs = 0; -} - -void test_set_invalid(struct kvm_vcpu *vcpu) -{ - struct kvm_run *run = vcpu->run; - int rv; - - /* Request setting invalid register set into VCPU. */ - run->kvm_dirty_regs = INVALID_SYNC_FIELD; - rv = _vcpu_run(vcpu); - TEST_ASSERT(rv < 0 && errno == EINVAL, - "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d", - rv); - run->kvm_dirty_regs = 0; - - run->kvm_dirty_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS; - rv = _vcpu_run(vcpu); - TEST_ASSERT(rv < 0 && errno == EINVAL, - "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d", - rv); - run->kvm_dirty_regs = 0; -} - -void test_req_and_verify_all_valid_regs(struct kvm_vcpu *vcpu) -{ - struct kvm_run *run = vcpu->run; - struct kvm_sregs sregs; - struct kvm_regs regs; - int rv; - - /* Request and verify all valid register sets. */ - run->kvm_valid_regs = TEST_SYNC_FIELDS; - rv = _vcpu_run(vcpu); - TEST_ASSERT(rv == 0, "vcpu_run failed: %d", rv); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC); - TEST_ASSERT(run->s390_sieic.icptcode == 4 && - (run->s390_sieic.ipa >> 8) == 0x83 && - (run->s390_sieic.ipb >> 16) == 0x501, - "Unexpected interception code: ic=%u, ipa=0x%x, ipb=0x%x", - run->s390_sieic.icptcode, run->s390_sieic.ipa, - run->s390_sieic.ipb); - - vcpu_regs_get(vcpu, ®s); - compare_regs(®s, &run->s.regs); - - vcpu_sregs_get(vcpu, &sregs); - compare_sregs(&sregs, &run->s.regs); -} - -void test_set_and_verify_various_reg_values(struct kvm_vcpu *vcpu) -{ - struct kvm_run *run = vcpu->run; - struct kvm_sregs sregs; - struct kvm_regs regs; - int rv; - - /* Set and verify various register values */ - run->s.regs.gprs[11] = 0xBAD1DEA; - run->s.regs.acrs[0] = 1 << 11; - - run->kvm_valid_regs = TEST_SYNC_FIELDS; - run->kvm_dirty_regs = KVM_SYNC_GPRS | KVM_SYNC_ACRS; - - if (get_diag318_info() > 0) { - run->s.regs.diag318 = get_diag318_info(); - run->kvm_dirty_regs |= KVM_SYNC_DIAG318; - } - - rv = _vcpu_run(vcpu); - TEST_ASSERT(rv == 0, "vcpu_run failed: %d", rv); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC); - TEST_ASSERT(run->s.regs.gprs[11] == 0xBAD1DEA + 1, - "r11 sync regs value incorrect 0x%llx.", - run->s.regs.gprs[11]); - TEST_ASSERT(run->s.regs.acrs[0] == 1 << 11, - "acr0 sync regs value incorrect 0x%x.", - run->s.regs.acrs[0]); - TEST_ASSERT(run->s.regs.diag318 == get_diag318_info(), - "diag318 sync regs value incorrect 0x%llx.", - run->s.regs.diag318); - - vcpu_regs_get(vcpu, ®s); - compare_regs(®s, &run->s.regs); - - vcpu_sregs_get(vcpu, &sregs); - compare_sregs(&sregs, &run->s.regs); -} - -void test_clear_kvm_dirty_regs_bits(struct kvm_vcpu *vcpu) -{ - struct kvm_run *run = vcpu->run; - int rv; - - /* Clear kvm_dirty_regs bits, verify new s.regs values are - * overwritten with existing guest values. - */ - run->kvm_valid_regs = TEST_SYNC_FIELDS; - run->kvm_dirty_regs = 0; - run->s.regs.gprs[11] = 0xDEADBEEF; - run->s.regs.diag318 = 0x4B1D; - rv = _vcpu_run(vcpu); - TEST_ASSERT(rv == 0, "vcpu_run failed: %d", rv); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC); - TEST_ASSERT(run->s.regs.gprs[11] != 0xDEADBEEF, - "r11 sync regs value incorrect 0x%llx.", - run->s.regs.gprs[11]); - TEST_ASSERT(run->s.regs.diag318 != 0x4B1D, - "diag318 sync regs value incorrect 0x%llx.", - run->s.regs.diag318); -} - -struct testdef { - const char *name; - void (*test)(struct kvm_vcpu *vcpu); -} testlist[] = { - { "read invalid", test_read_invalid }, - { "set invalid", test_set_invalid }, - { "request+verify all valid regs", test_req_and_verify_all_valid_regs }, - { "set+verify various regs", test_set_and_verify_various_reg_values }, - { "clear kvm_dirty_regs bits", test_clear_kvm_dirty_regs_bits }, -}; - -int main(int argc, char *argv[]) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - int idx; - - TEST_REQUIRE(kvm_has_cap(KVM_CAP_SYNC_REGS)); - - ksft_print_header(); - - ksft_set_plan(ARRAY_SIZE(testlist)); - - /* Create VM */ - vm = vm_create_with_one_vcpu(&vcpu, guest_code); - - for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) { - testlist[idx].test(vcpu); - ksft_test_result_pass("%s\n", testlist[idx].name); - } - - kvm_vm_free(vm); - - ksft_finished(); /* Print results and exit() accordingly */ -} diff --git a/tools/testing/selftests/kvm/s390x/tprot.c b/tools/testing/selftests/kvm/s390x/tprot.c deleted file mode 100644 index 12d5e1cb62e3..000000000000 --- a/tools/testing/selftests/kvm/s390x/tprot.c +++ /dev/null @@ -1,244 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Test TEST PROTECTION emulation. - * - * Copyright IBM Corp. 2021 - */ -#include -#include "test_util.h" -#include "kvm_util.h" -#include "kselftest.h" -#include "ucall_common.h" -#include "processor.h" - -#define CR0_FETCH_PROTECTION_OVERRIDE (1UL << (63 - 38)) -#define CR0_STORAGE_PROTECTION_OVERRIDE (1UL << (63 - 39)) - -static __aligned(PAGE_SIZE) uint8_t pages[2][PAGE_SIZE]; -static uint8_t *const page_store_prot = pages[0]; -static uint8_t *const page_fetch_prot = pages[1]; - -/* Nonzero return value indicates that address not mapped */ -static int set_storage_key(void *addr, uint8_t key) -{ - int not_mapped = 0; - - asm volatile ( - "lra %[addr], 0(0,%[addr])\n" - " jz 0f\n" - " llill %[not_mapped],1\n" - " j 1f\n" - "0: sske %[key], %[addr]\n" - "1:" - : [addr] "+&a" (addr), [not_mapped] "+r" (not_mapped) - : [key] "r" (key) - : "cc" - ); - return -not_mapped; -} - -enum permission { - READ_WRITE = 0, - READ = 1, - RW_PROTECTED = 2, - TRANSL_UNAVAIL = 3, -}; - -static enum permission test_protection(void *addr, uint8_t key) -{ - uint64_t mask; - - asm volatile ( - "tprot %[addr], 0(%[key])\n" - " ipm %[mask]\n" - : [mask] "=r" (mask) - : [addr] "Q" (*(char *)addr), - [key] "a" (key) - : "cc" - ); - - return (enum permission)(mask >> 28); -} - -enum stage { - STAGE_INIT_SIMPLE, - TEST_SIMPLE, - STAGE_INIT_FETCH_PROT_OVERRIDE, - TEST_FETCH_PROT_OVERRIDE, - TEST_STORAGE_PROT_OVERRIDE, - STAGE_END /* must be the last entry (it's the amount of tests) */ -}; - -struct test { - enum stage stage; - void *addr; - uint8_t key; - enum permission expected; -} tests[] = { - /* - * We perform each test in the array by executing TEST PROTECTION on - * the specified addr with the specified key and checking if the returned - * permissions match the expected value. - * Both guest and host cooperate to set up the required test conditions. - * A central condition is that the page targeted by addr has to be DAT - * protected in the host mappings, in order for KVM to emulate the - * TEST PROTECTION instruction. - * Since the page tables are shared, the host uses mprotect to achieve - * this. - * - * Test resulting in RW_PROTECTED/TRANSL_UNAVAIL will be interpreted - * by SIE, not KVM, but there is no harm in testing them also. - * See Enhanced Suppression-on-Protection Facilities in the - * Interpretive-Execution Mode - */ - /* - * guest: set storage key of page_store_prot to 1 - * storage key of page_fetch_prot to 9 and enable - * protection for it - * STAGE_INIT_SIMPLE - * host: write protect both via mprotect - */ - /* access key 0 matches any storage key -> RW */ - { TEST_SIMPLE, page_store_prot, 0x00, READ_WRITE }, - /* access key matches storage key -> RW */ - { TEST_SIMPLE, page_store_prot, 0x10, READ_WRITE }, - /* mismatched keys, but no fetch protection -> RO */ - { TEST_SIMPLE, page_store_prot, 0x20, READ }, - /* access key 0 matches any storage key -> RW */ - { TEST_SIMPLE, page_fetch_prot, 0x00, READ_WRITE }, - /* access key matches storage key -> RW */ - { TEST_SIMPLE, page_fetch_prot, 0x90, READ_WRITE }, - /* mismatched keys, fetch protection -> inaccessible */ - { TEST_SIMPLE, page_fetch_prot, 0x10, RW_PROTECTED }, - /* page 0 not mapped yet -> translation not available */ - { TEST_SIMPLE, (void *)0x00, 0x10, TRANSL_UNAVAIL }, - /* - * host: try to map page 0 - * guest: set storage key of page 0 to 9 and enable fetch protection - * STAGE_INIT_FETCH_PROT_OVERRIDE - * host: write protect page 0 - * enable fetch protection override - */ - /* mismatched keys, fetch protection, but override applies -> RO */ - { TEST_FETCH_PROT_OVERRIDE, (void *)0x00, 0x10, READ }, - /* mismatched keys, fetch protection, override applies to 0-2048 only -> inaccessible */ - { TEST_FETCH_PROT_OVERRIDE, (void *)2049, 0x10, RW_PROTECTED }, - /* - * host: enable storage protection override - */ - /* mismatched keys, but override applies (storage key 9) -> RW */ - { TEST_STORAGE_PROT_OVERRIDE, page_fetch_prot, 0x10, READ_WRITE }, - /* mismatched keys, no fetch protection, override doesn't apply -> RO */ - { TEST_STORAGE_PROT_OVERRIDE, page_store_prot, 0x20, READ }, - /* mismatched keys, but override applies (storage key 9) -> RW */ - { TEST_STORAGE_PROT_OVERRIDE, (void *)2049, 0x10, READ_WRITE }, - /* end marker */ - { STAGE_END, 0, 0, 0 }, -}; - -static enum stage perform_next_stage(int *i, bool mapped_0) -{ - enum stage stage = tests[*i].stage; - enum permission result; - bool skip; - - for (; tests[*i].stage == stage; (*i)++) { - /* - * Some fetch protection override tests require that page 0 - * be mapped, however, when the hosts tries to map that page via - * vm_vaddr_alloc, it may happen that some other page gets mapped - * instead. - * In order to skip these tests we detect this inside the guest - */ - skip = tests[*i].addr < (void *)PAGE_SIZE && - tests[*i].expected != TRANSL_UNAVAIL && - !mapped_0; - if (!skip) { - result = test_protection(tests[*i].addr, tests[*i].key); - __GUEST_ASSERT(result == tests[*i].expected, - "Wanted %u, got %u, for i = %u", - tests[*i].expected, result, *i); - } - } - return stage; -} - -static void guest_code(void) -{ - bool mapped_0; - int i = 0; - - GUEST_ASSERT_EQ(set_storage_key(page_store_prot, 0x10), 0); - GUEST_ASSERT_EQ(set_storage_key(page_fetch_prot, 0x98), 0); - GUEST_SYNC(STAGE_INIT_SIMPLE); - GUEST_SYNC(perform_next_stage(&i, false)); - - /* Fetch-protection override */ - mapped_0 = !set_storage_key((void *)0, 0x98); - GUEST_SYNC(STAGE_INIT_FETCH_PROT_OVERRIDE); - GUEST_SYNC(perform_next_stage(&i, mapped_0)); - - /* Storage-protection override */ - GUEST_SYNC(perform_next_stage(&i, mapped_0)); -} - -#define HOST_SYNC_NO_TAP(vcpup, stage) \ -({ \ - struct kvm_vcpu *__vcpu = (vcpup); \ - struct ucall uc; \ - int __stage = (stage); \ - \ - vcpu_run(__vcpu); \ - get_ucall(__vcpu, &uc); \ - if (uc.cmd == UCALL_ABORT) \ - REPORT_GUEST_ASSERT(uc); \ - TEST_ASSERT_EQ(uc.cmd, UCALL_SYNC); \ - TEST_ASSERT_EQ(uc.args[1], __stage); \ -}) - -#define HOST_SYNC(vcpu, stage) \ -({ \ - HOST_SYNC_NO_TAP(vcpu, stage); \ - ksft_test_result_pass("" #stage "\n"); \ -}) - -int main(int argc, char *argv[]) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - struct kvm_run *run; - vm_vaddr_t guest_0_page; - - ksft_print_header(); - ksft_set_plan(STAGE_END); - - vm = vm_create_with_one_vcpu(&vcpu, guest_code); - run = vcpu->run; - - HOST_SYNC(vcpu, STAGE_INIT_SIMPLE); - mprotect(addr_gva2hva(vm, (vm_vaddr_t)pages), PAGE_SIZE * 2, PROT_READ); - HOST_SYNC(vcpu, TEST_SIMPLE); - - guest_0_page = vm_vaddr_alloc(vm, PAGE_SIZE, 0); - if (guest_0_page != 0) { - /* Use NO_TAP so we don't get a PASS print */ - HOST_SYNC_NO_TAP(vcpu, STAGE_INIT_FETCH_PROT_OVERRIDE); - ksft_test_result_skip("STAGE_INIT_FETCH_PROT_OVERRIDE - " - "Did not allocate page at 0\n"); - } else { - HOST_SYNC(vcpu, STAGE_INIT_FETCH_PROT_OVERRIDE); - } - if (guest_0_page == 0) - mprotect(addr_gva2hva(vm, (vm_vaddr_t)0), PAGE_SIZE, PROT_READ); - run->s.regs.crs[0] |= CR0_FETCH_PROTECTION_OVERRIDE; - run->kvm_dirty_regs = KVM_SYNC_CRS; - HOST_SYNC(vcpu, TEST_FETCH_PROT_OVERRIDE); - - run->s.regs.crs[0] |= CR0_STORAGE_PROTECTION_OVERRIDE; - run->kvm_dirty_regs = KVM_SYNC_CRS; - HOST_SYNC(vcpu, TEST_STORAGE_PROT_OVERRIDE); - - kvm_vm_free(vm); - - ksft_finished(); /* Print results and exit() accordingly */ -} diff --git a/tools/testing/selftests/kvm/s390x/ucontrol_test.c b/tools/testing/selftests/kvm/s390x/ucontrol_test.c deleted file mode 100644 index 0c112319dab1..000000000000 --- a/tools/testing/selftests/kvm/s390x/ucontrol_test.c +++ /dev/null @@ -1,638 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Test code for the s390x kvm ucontrol interface - * - * Copyright IBM Corp. 2024 - * - * Authors: - * Christoph Schlameuss - */ -#include "debug_print.h" -#include "kselftest_harness.h" -#include "kvm_util.h" -#include "processor.h" -#include "sie.h" - -#include -#include - -#define PGM_SEGMENT_TRANSLATION 0x10 - -#define VM_MEM_SIZE (4 * SZ_1M) -#define VM_MEM_EXT_SIZE (2 * SZ_1M) -#define VM_MEM_MAX_M ((VM_MEM_SIZE + VM_MEM_EXT_SIZE) / SZ_1M) - -/* so directly declare capget to check caps without libcap */ -int capget(cap_user_header_t header, cap_user_data_t data); - -/** - * In order to create user controlled virtual machines on S390, - * check KVM_CAP_S390_UCONTROL and use the flag KVM_VM_S390_UCONTROL - * as privileged user (SYS_ADMIN). - */ -void require_ucontrol_admin(void) -{ - struct __user_cap_data_struct data[_LINUX_CAPABILITY_U32S_3]; - struct __user_cap_header_struct hdr = { - .version = _LINUX_CAPABILITY_VERSION_3, - }; - int rc; - - rc = capget(&hdr, data); - TEST_ASSERT_EQ(0, rc); - TEST_REQUIRE((data->effective & CAP_TO_MASK(CAP_SYS_ADMIN)) > 0); - - TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_UCONTROL)); -} - -/* Test program setting some registers and looping */ -extern char test_gprs_asm[]; -asm("test_gprs_asm:\n" - "xgr %r0, %r0\n" - "lgfi %r1,1\n" - "lgfi %r2,2\n" - "lgfi %r3,3\n" - "lgfi %r4,4\n" - "lgfi %r5,5\n" - "lgfi %r6,6\n" - "lgfi %r7,7\n" - "0:\n" - " diag 0,0,0x44\n" - " ahi %r0,1\n" - " j 0b\n" -); - -/* Test program manipulating memory */ -extern char test_mem_asm[]; -asm("test_mem_asm:\n" - "xgr %r0, %r0\n" - - "0:\n" - " ahi %r0,1\n" - " st %r1,0(%r5,%r6)\n" - - " xgr %r1,%r1\n" - " l %r1,0(%r5,%r6)\n" - " ahi %r0,1\n" - " diag 0,0,0x44\n" - - " j 0b\n" -); - -/* Test program manipulating storage keys */ -extern char test_skey_asm[]; -asm("test_skey_asm:\n" - "xgr %r0, %r0\n" - - "0:\n" - " ahi %r0,1\n" - " st %r1,0(%r5,%r6)\n" - - " iske %r1,%r6\n" - " ahi %r0,1\n" - " diag 0,0,0x44\n" - - " sske %r1,%r6\n" - " xgr %r1,%r1\n" - " iske %r1,%r6\n" - " ahi %r0,1\n" - " diag 0,0,0x44\n" - - " rrbe %r1,%r6\n" - " iske %r1,%r6\n" - " ahi %r0,1\n" - " diag 0,0,0x44\n" - - " j 0b\n" -); - -FIXTURE(uc_kvm) -{ - struct kvm_s390_sie_block *sie_block; - struct kvm_run *run; - uintptr_t base_gpa; - uintptr_t code_gpa; - uintptr_t base_hva; - uintptr_t code_hva; - int kvm_run_size; - vm_paddr_t pgd; - void *vm_mem; - int vcpu_fd; - int kvm_fd; - int vm_fd; -}; - -/** - * create VM with single vcpu, map kvm_run and SIE control block for easy access - */ -FIXTURE_SETUP(uc_kvm) -{ - struct kvm_s390_vm_cpu_processor info; - int rc; - - require_ucontrol_admin(); - - self->kvm_fd = open_kvm_dev_path_or_exit(); - self->vm_fd = ioctl(self->kvm_fd, KVM_CREATE_VM, KVM_VM_S390_UCONTROL); - ASSERT_GE(self->vm_fd, 0); - - kvm_device_attr_get(self->vm_fd, KVM_S390_VM_CPU_MODEL, - KVM_S390_VM_CPU_PROCESSOR, &info); - TH_LOG("create VM 0x%llx", info.cpuid); - - self->vcpu_fd = ioctl(self->vm_fd, KVM_CREATE_VCPU, 0); - ASSERT_GE(self->vcpu_fd, 0); - - self->kvm_run_size = ioctl(self->kvm_fd, KVM_GET_VCPU_MMAP_SIZE, NULL); - ASSERT_GE(self->kvm_run_size, sizeof(struct kvm_run)) - TH_LOG(KVM_IOCTL_ERROR(KVM_GET_VCPU_MMAP_SIZE, self->kvm_run_size)); - self->run = (struct kvm_run *)mmap(NULL, self->kvm_run_size, - PROT_READ | PROT_WRITE, MAP_SHARED, self->vcpu_fd, 0); - ASSERT_NE(self->run, MAP_FAILED); - /** - * For virtual cpus that have been created with S390 user controlled - * virtual machines, the resulting vcpu fd can be memory mapped at page - * offset KVM_S390_SIE_PAGE_OFFSET in order to obtain a memory map of - * the virtual cpu's hardware control block. - */ - self->sie_block = (struct kvm_s390_sie_block *)mmap(NULL, PAGE_SIZE, - PROT_READ | PROT_WRITE, MAP_SHARED, - self->vcpu_fd, KVM_S390_SIE_PAGE_OFFSET << PAGE_SHIFT); - ASSERT_NE(self->sie_block, MAP_FAILED); - - TH_LOG("VM created %p %p", self->run, self->sie_block); - - self->base_gpa = 0; - self->code_gpa = self->base_gpa + (3 * SZ_1M); - - self->vm_mem = aligned_alloc(SZ_1M, VM_MEM_MAX_M * SZ_1M); - ASSERT_NE(NULL, self->vm_mem) TH_LOG("malloc failed %u", errno); - self->base_hva = (uintptr_t)self->vm_mem; - self->code_hva = self->base_hva - self->base_gpa + self->code_gpa; - struct kvm_s390_ucas_mapping map = { - .user_addr = self->base_hva, - .vcpu_addr = self->base_gpa, - .length = VM_MEM_SIZE, - }; - TH_LOG("ucas map %p %p 0x%llx", - (void *)map.user_addr, (void *)map.vcpu_addr, map.length); - rc = ioctl(self->vcpu_fd, KVM_S390_UCAS_MAP, &map); - ASSERT_EQ(0, rc) TH_LOG("ucas map result %d not expected, %s", - rc, strerror(errno)); - - TH_LOG("page in %p", (void *)self->base_gpa); - rc = ioctl(self->vcpu_fd, KVM_S390_VCPU_FAULT, self->base_gpa); - ASSERT_EQ(0, rc) TH_LOG("vcpu fault (%p) result %d not expected, %s", - (void *)self->base_hva, rc, strerror(errno)); - - self->sie_block->cpuflags &= ~CPUSTAT_STOPPED; -} - -FIXTURE_TEARDOWN(uc_kvm) -{ - munmap(self->sie_block, PAGE_SIZE); - munmap(self->run, self->kvm_run_size); - close(self->vcpu_fd); - close(self->vm_fd); - close(self->kvm_fd); - free(self->vm_mem); -} - -TEST_F(uc_kvm, uc_sie_assertions) -{ - /* assert interception of Code 08 (Program Interruption) is set */ - EXPECT_EQ(0, self->sie_block->ecb & ECB_SPECI); -} - -TEST_F(uc_kvm, uc_attr_mem_limit) -{ - u64 limit; - struct kvm_device_attr attr = { - .group = KVM_S390_VM_MEM_CTRL, - .attr = KVM_S390_VM_MEM_LIMIT_SIZE, - .addr = (unsigned long)&limit, - }; - int rc; - - rc = ioctl(self->vm_fd, KVM_GET_DEVICE_ATTR, &attr); - EXPECT_EQ(0, rc); - EXPECT_EQ(~0UL, limit); - - /* assert set not supported */ - rc = ioctl(self->vm_fd, KVM_SET_DEVICE_ATTR, &attr); - EXPECT_EQ(-1, rc); - EXPECT_EQ(EINVAL, errno); -} - -TEST_F(uc_kvm, uc_no_dirty_log) -{ - struct kvm_dirty_log dlog; - int rc; - - rc = ioctl(self->vm_fd, KVM_GET_DIRTY_LOG, &dlog); - EXPECT_EQ(-1, rc); - EXPECT_EQ(EINVAL, errno); -} - -/** - * Assert HPAGE CAP cannot be enabled on UCONTROL VM - */ -TEST(uc_cap_hpage) -{ - int rc, kvm_fd, vm_fd, vcpu_fd; - struct kvm_enable_cap cap = { - .cap = KVM_CAP_S390_HPAGE_1M, - }; - - require_ucontrol_admin(); - - kvm_fd = open_kvm_dev_path_or_exit(); - vm_fd = ioctl(kvm_fd, KVM_CREATE_VM, KVM_VM_S390_UCONTROL); - ASSERT_GE(vm_fd, 0); - - /* assert hpages are not supported on ucontrol vm */ - rc = ioctl(vm_fd, KVM_CHECK_EXTENSION, KVM_CAP_S390_HPAGE_1M); - EXPECT_EQ(0, rc); - - /* Test that KVM_CAP_S390_HPAGE_1M can't be enabled for a ucontrol vm */ - rc = ioctl(vm_fd, KVM_ENABLE_CAP, cap); - EXPECT_EQ(-1, rc); - EXPECT_EQ(EINVAL, errno); - - /* assert HPAGE CAP is rejected after vCPU creation */ - vcpu_fd = ioctl(vm_fd, KVM_CREATE_VCPU, 0); - ASSERT_GE(vcpu_fd, 0); - rc = ioctl(vm_fd, KVM_ENABLE_CAP, cap); - EXPECT_EQ(-1, rc); - EXPECT_EQ(EBUSY, errno); - - close(vcpu_fd); - close(vm_fd); - close(kvm_fd); -} - -/* calculate host virtual addr from guest physical addr */ -static void *gpa2hva(FIXTURE_DATA(uc_kvm) *self, u64 gpa) -{ - return (void *)(self->base_hva - self->base_gpa + gpa); -} - -/* map / make additional memory available */ -static int uc_map_ext(FIXTURE_DATA(uc_kvm) *self, u64 vcpu_addr, u64 length) -{ - struct kvm_s390_ucas_mapping map = { - .user_addr = (u64)gpa2hva(self, vcpu_addr), - .vcpu_addr = vcpu_addr, - .length = length, - }; - pr_info("ucas map %p %p 0x%llx", - (void *)map.user_addr, (void *)map.vcpu_addr, map.length); - return ioctl(self->vcpu_fd, KVM_S390_UCAS_MAP, &map); -} - -/* unmap previously mapped memory */ -static int uc_unmap_ext(FIXTURE_DATA(uc_kvm) *self, u64 vcpu_addr, u64 length) -{ - struct kvm_s390_ucas_mapping map = { - .user_addr = (u64)gpa2hva(self, vcpu_addr), - .vcpu_addr = vcpu_addr, - .length = length, - }; - pr_info("ucas unmap %p %p 0x%llx", - (void *)map.user_addr, (void *)map.vcpu_addr, map.length); - return ioctl(self->vcpu_fd, KVM_S390_UCAS_UNMAP, &map); -} - -/* handle ucontrol exit by mapping the accessed segment */ -static void uc_handle_exit_ucontrol(FIXTURE_DATA(uc_kvm) *self) -{ - struct kvm_run *run = self->run; - u64 seg_addr; - int rc; - - TEST_ASSERT_EQ(KVM_EXIT_S390_UCONTROL, run->exit_reason); - switch (run->s390_ucontrol.pgm_code) { - case PGM_SEGMENT_TRANSLATION: - seg_addr = run->s390_ucontrol.trans_exc_code & ~(SZ_1M - 1); - pr_info("ucontrol pic segment translation 0x%llx, mapping segment 0x%lx\n", - run->s390_ucontrol.trans_exc_code, seg_addr); - /* map / make additional memory available */ - rc = uc_map_ext(self, seg_addr, SZ_1M); - TEST_ASSERT_EQ(0, rc); - break; - default: - TEST_FAIL("UNEXPECTED PGM CODE %d", run->s390_ucontrol.pgm_code); - } -} - -/* - * Handle the SIEIC exit - * * fail on codes not expected in the test cases - * Returns if interception is handled / execution can be continued - */ -static void uc_skey_enable(FIXTURE_DATA(uc_kvm) *self) -{ - struct kvm_s390_sie_block *sie_block = self->sie_block; - - /* disable KSS */ - sie_block->cpuflags &= ~CPUSTAT_KSS; - /* disable skey inst interception */ - sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE); -} - -/* - * Handle the instruction intercept - * Returns if interception is handled / execution can be continued - */ -static bool uc_handle_insn_ic(FIXTURE_DATA(uc_kvm) *self) -{ - struct kvm_s390_sie_block *sie_block = self->sie_block; - int ilen = insn_length(sie_block->ipa >> 8); - struct kvm_run *run = self->run; - - switch (run->s390_sieic.ipa) { - case 0xB229: /* ISKE */ - case 0xB22b: /* SSKE */ - case 0xB22a: /* RRBE */ - uc_skey_enable(self); - - /* rewind to reexecute intercepted instruction */ - run->psw_addr = run->psw_addr - ilen; - pr_info("rewind guest addr to 0x%.16llx\n", run->psw_addr); - return true; - default: - return false; - } -} - -/* - * Handle the SIEIC exit - * * fail on codes not expected in the test cases - * Returns if interception is handled / execution can be continued - */ -static bool uc_handle_sieic(FIXTURE_DATA(uc_kvm) *self) -{ - struct kvm_s390_sie_block *sie_block = self->sie_block; - struct kvm_run *run = self->run; - - /* check SIE interception code */ - pr_info("sieic: 0x%.2x 0x%.4x 0x%.8x\n", - run->s390_sieic.icptcode, - run->s390_sieic.ipa, - run->s390_sieic.ipb); - switch (run->s390_sieic.icptcode) { - case ICPT_INST: - /* end execution in caller on intercepted instruction */ - pr_info("sie instruction interception\n"); - return uc_handle_insn_ic(self); - case ICPT_KSS: - uc_skey_enable(self); - return true; - case ICPT_OPEREXC: - /* operation exception */ - TEST_FAIL("sie exception on %.4x%.8x", sie_block->ipa, sie_block->ipb); - default: - TEST_FAIL("UNEXPECTED SIEIC CODE %d", run->s390_sieic.icptcode); - } - return true; -} - -/* verify VM state on exit */ -static bool uc_handle_exit(FIXTURE_DATA(uc_kvm) *self) -{ - struct kvm_run *run = self->run; - - switch (run->exit_reason) { - case KVM_EXIT_S390_UCONTROL: - /** check program interruption code - * handle page fault --> ucas map - */ - uc_handle_exit_ucontrol(self); - break; - case KVM_EXIT_S390_SIEIC: - return uc_handle_sieic(self); - default: - pr_info("exit_reason %2d not handled\n", run->exit_reason); - } - return true; -} - -/* run the VM until interrupted */ -static int uc_run_once(FIXTURE_DATA(uc_kvm) *self) -{ - int rc; - - rc = ioctl(self->vcpu_fd, KVM_RUN, NULL); - print_run(self->run, self->sie_block); - print_regs(self->run); - pr_debug("run %d / %d %s\n", rc, errno, strerror(errno)); - return rc; -} - -static void uc_assert_diag44(FIXTURE_DATA(uc_kvm) *self) -{ - struct kvm_s390_sie_block *sie_block = self->sie_block; - - /* assert vm was interrupted by diag 0x0044 */ - TEST_ASSERT_EQ(KVM_EXIT_S390_SIEIC, self->run->exit_reason); - TEST_ASSERT_EQ(ICPT_INST, sie_block->icptcode); - TEST_ASSERT_EQ(0x8300, sie_block->ipa); - TEST_ASSERT_EQ(0x440000, sie_block->ipb); -} - -TEST_F(uc_kvm, uc_no_user_region) -{ - struct kvm_userspace_memory_region region = { - .slot = 1, - .guest_phys_addr = self->code_gpa, - .memory_size = VM_MEM_EXT_SIZE, - .userspace_addr = (uintptr_t)self->code_hva, - }; - struct kvm_userspace_memory_region2 region2 = { - .slot = 1, - .guest_phys_addr = self->code_gpa, - .memory_size = VM_MEM_EXT_SIZE, - .userspace_addr = (uintptr_t)self->code_hva, - }; - - ASSERT_EQ(-1, ioctl(self->vm_fd, KVM_SET_USER_MEMORY_REGION, ®ion)); - ASSERT_EQ(EINVAL, errno); - - ASSERT_EQ(-1, ioctl(self->vm_fd, KVM_SET_USER_MEMORY_REGION2, ®ion2)); - ASSERT_EQ(EINVAL, errno); -} - -TEST_F(uc_kvm, uc_map_unmap) -{ - struct kvm_sync_regs *sync_regs = &self->run->s.regs; - struct kvm_run *run = self->run; - const u64 disp = 1; - int rc; - - /* copy test_mem_asm to code_hva / code_gpa */ - TH_LOG("copy code %p to vm mapped memory %p / %p", - &test_mem_asm, (void *)self->code_hva, (void *)self->code_gpa); - memcpy((void *)self->code_hva, &test_mem_asm, PAGE_SIZE); - - /* DAT disabled + 64 bit mode */ - run->psw_mask = 0x0000000180000000ULL; - run->psw_addr = self->code_gpa; - - /* set register content for test_mem_asm to access not mapped memory*/ - sync_regs->gprs[1] = 0x55; - sync_regs->gprs[5] = self->base_gpa; - sync_regs->gprs[6] = VM_MEM_SIZE + disp; - run->kvm_dirty_regs |= KVM_SYNC_GPRS; - - /* run and expect to fail with ucontrol pic segment translation */ - ASSERT_EQ(0, uc_run_once(self)); - ASSERT_EQ(1, sync_regs->gprs[0]); - ASSERT_EQ(KVM_EXIT_S390_UCONTROL, run->exit_reason); - - ASSERT_EQ(PGM_SEGMENT_TRANSLATION, run->s390_ucontrol.pgm_code); - ASSERT_EQ(self->base_gpa + VM_MEM_SIZE, run->s390_ucontrol.trans_exc_code); - - /* fail to map memory with not segment aligned address */ - rc = uc_map_ext(self, self->base_gpa + VM_MEM_SIZE + disp, VM_MEM_EXT_SIZE); - ASSERT_GT(0, rc) - TH_LOG("ucas map for non segment address should fail but didn't; " - "result %d not expected, %s", rc, strerror(errno)); - - /* map / make additional memory available */ - rc = uc_map_ext(self, self->base_gpa + VM_MEM_SIZE, VM_MEM_EXT_SIZE); - ASSERT_EQ(0, rc) - TH_LOG("ucas map result %d not expected, %s", rc, strerror(errno)); - ASSERT_EQ(0, uc_run_once(self)); - ASSERT_EQ(false, uc_handle_exit(self)); - uc_assert_diag44(self); - - /* assert registers and memory are in expected state */ - ASSERT_EQ(2, sync_regs->gprs[0]); - ASSERT_EQ(0x55, sync_regs->gprs[1]); - ASSERT_EQ(0x55, *(u32 *)gpa2hva(self, self->base_gpa + VM_MEM_SIZE + disp)); - - /* unmap and run loop again */ - rc = uc_unmap_ext(self, self->base_gpa + VM_MEM_SIZE, VM_MEM_EXT_SIZE); - ASSERT_EQ(0, rc) - TH_LOG("ucas unmap result %d not expected, %s", rc, strerror(errno)); - ASSERT_EQ(0, uc_run_once(self)); - ASSERT_EQ(3, sync_regs->gprs[0]); - ASSERT_EQ(KVM_EXIT_S390_UCONTROL, run->exit_reason); - ASSERT_EQ(PGM_SEGMENT_TRANSLATION, run->s390_ucontrol.pgm_code); - /* handle ucontrol exit and remap memory after previous map and unmap */ - ASSERT_EQ(true, uc_handle_exit(self)); -} - -TEST_F(uc_kvm, uc_gprs) -{ - struct kvm_sync_regs *sync_regs = &self->run->s.regs; - struct kvm_run *run = self->run; - struct kvm_regs regs = {}; - - /* Set registers to values that are different from the ones that we expect below */ - for (int i = 0; i < 8; i++) - sync_regs->gprs[i] = 8; - run->kvm_dirty_regs |= KVM_SYNC_GPRS; - - /* copy test_gprs_asm to code_hva / code_gpa */ - TH_LOG("copy code %p to vm mapped memory %p / %p", - &test_gprs_asm, (void *)self->code_hva, (void *)self->code_gpa); - memcpy((void *)self->code_hva, &test_gprs_asm, PAGE_SIZE); - - /* DAT disabled + 64 bit mode */ - run->psw_mask = 0x0000000180000000ULL; - run->psw_addr = self->code_gpa; - - /* run and expect interception of diag 44 */ - ASSERT_EQ(0, uc_run_once(self)); - ASSERT_EQ(false, uc_handle_exit(self)); - uc_assert_diag44(self); - - /* Retrieve and check guest register values */ - ASSERT_EQ(0, ioctl(self->vcpu_fd, KVM_GET_REGS, ®s)); - for (int i = 0; i < 8; i++) { - ASSERT_EQ(i, regs.gprs[i]); - ASSERT_EQ(i, sync_regs->gprs[i]); - } - - /* run and expect interception of diag 44 again */ - ASSERT_EQ(0, uc_run_once(self)); - ASSERT_EQ(false, uc_handle_exit(self)); - uc_assert_diag44(self); - - /* check continued increment of register 0 value */ - ASSERT_EQ(0, ioctl(self->vcpu_fd, KVM_GET_REGS, ®s)); - ASSERT_EQ(1, regs.gprs[0]); - ASSERT_EQ(1, sync_regs->gprs[0]); -} - -TEST_F(uc_kvm, uc_skey) -{ - struct kvm_s390_sie_block *sie_block = self->sie_block; - struct kvm_sync_regs *sync_regs = &self->run->s.regs; - u64 test_vaddr = VM_MEM_SIZE - (SZ_1M / 2); - struct kvm_run *run = self->run; - const u8 skeyvalue = 0x34; - - /* copy test_skey_asm to code_hva / code_gpa */ - TH_LOG("copy code %p to vm mapped memory %p / %p", - &test_skey_asm, (void *)self->code_hva, (void *)self->code_gpa); - memcpy((void *)self->code_hva, &test_skey_asm, PAGE_SIZE); - - /* set register content for test_skey_asm to access not mapped memory */ - sync_regs->gprs[1] = skeyvalue; - sync_regs->gprs[5] = self->base_gpa; - sync_regs->gprs[6] = test_vaddr; - run->kvm_dirty_regs |= KVM_SYNC_GPRS; - - /* DAT disabled + 64 bit mode */ - run->psw_mask = 0x0000000180000000ULL; - run->psw_addr = self->code_gpa; - - ASSERT_EQ(0, uc_run_once(self)); - ASSERT_EQ(true, uc_handle_exit(self)); - ASSERT_EQ(1, sync_regs->gprs[0]); - - /* ISKE */ - ASSERT_EQ(0, uc_run_once(self)); - - /* - * Bail out and skip the test after uc_skey_enable was executed but iske - * is still intercepted. Instructions are not handled by the kernel. - * Thus there is no need to test this here. - */ - TEST_ASSERT_EQ(0, sie_block->cpuflags & CPUSTAT_KSS); - TEST_ASSERT_EQ(0, sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE)); - TEST_ASSERT_EQ(KVM_EXIT_S390_SIEIC, self->run->exit_reason); - TEST_ASSERT_EQ(ICPT_INST, sie_block->icptcode); - TEST_REQUIRE(sie_block->ipa != 0xb229); - - /* ISKE contd. */ - ASSERT_EQ(false, uc_handle_exit(self)); - ASSERT_EQ(2, sync_regs->gprs[0]); - /* assert initial skey (ACC = 0, R & C = 1) */ - ASSERT_EQ(0x06, sync_regs->gprs[1]); - uc_assert_diag44(self); - - /* SSKE + ISKE */ - sync_regs->gprs[1] = skeyvalue; - run->kvm_dirty_regs |= KVM_SYNC_GPRS; - ASSERT_EQ(0, uc_run_once(self)); - ASSERT_EQ(false, uc_handle_exit(self)); - ASSERT_EQ(3, sync_regs->gprs[0]); - ASSERT_EQ(skeyvalue, sync_regs->gprs[1]); - uc_assert_diag44(self); - - /* RRBE + ISKE */ - sync_regs->gprs[1] = skeyvalue; - run->kvm_dirty_regs |= KVM_SYNC_GPRS; - ASSERT_EQ(0, uc_run_once(self)); - ASSERT_EQ(false, uc_handle_exit(self)); - ASSERT_EQ(4, sync_regs->gprs[0]); - /* assert R reset but rest of skey unchanged */ - ASSERT_EQ(skeyvalue & 0xfa, sync_regs->gprs[1]); - ASSERT_EQ(0, sync_regs->gprs[1] & 0x04); - uc_assert_diag44(self); -} - -TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c index a8267628e9ed..86ee3385e860 100644 --- a/tools/testing/selftests/kvm/set_memory_region_test.c +++ b/tools/testing/selftests/kvm/set_memory_region_test.c @@ -17,9 +17,9 @@ #include /* - * s390x needs at least 1MB alignment, and the x86_64 MOVE/DELETE tests need a - * 2MB sized and aligned region so that the initial region corresponds to - * exactly one large page. + * s390 needs at least 1MB alignment, and the x86 MOVE/DELETE tests need a 2MB + * sized and aligned region so that the initial region corresponds to exactly + * one large page. */ #define MEM_REGION_SIZE 0x200000 diff --git a/tools/testing/selftests/kvm/x86/amx_test.c b/tools/testing/selftests/kvm/x86/amx_test.c new file mode 100644 index 000000000000..f4ce5a185a7d --- /dev/null +++ b/tools/testing/selftests/kvm/x86/amx_test.c @@ -0,0 +1,315 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * amx tests + * + * Copyright (C) 2021, Intel, Inc. + * + * Tests for amx #NM exception and save/restore. + */ +#include +#include +#include +#include +#include +#include + +#include "test_util.h" + +#include "kvm_util.h" +#include "processor.h" +#include "vmx.h" + +#ifndef __x86_64__ +# error This test is 64-bit only +#endif + +#define NUM_TILES 8 +#define TILE_SIZE 1024 +#define XSAVE_SIZE ((NUM_TILES * TILE_SIZE) + PAGE_SIZE) + +/* Tile configuration associated: */ +#define PALETTE_TABLE_INDEX 1 +#define MAX_TILES 16 +#define RESERVED_BYTES 14 + +#define XSAVE_HDR_OFFSET 512 + +struct tile_config { + u8 palette_id; + u8 start_row; + u8 reserved[RESERVED_BYTES]; + u16 colsb[MAX_TILES]; + u8 rows[MAX_TILES]; +}; + +struct tile_data { + u8 data[NUM_TILES * TILE_SIZE]; +}; + +struct xtile_info { + u16 bytes_per_tile; + u16 bytes_per_row; + u16 max_names; + u16 max_rows; + u32 xsave_offset; + u32 xsave_size; +}; + +static struct xtile_info xtile; + +static inline void __ldtilecfg(void *cfg) +{ + asm volatile(".byte 0xc4,0xe2,0x78,0x49,0x00" + : : "a"(cfg)); +} + +static inline void __tileloadd(void *tile) +{ + asm volatile(".byte 0xc4,0xe2,0x7b,0x4b,0x04,0x10" + : : "a"(tile), "d"(0)); +} + +static inline void __tilerelease(void) +{ + asm volatile(".byte 0xc4, 0xe2, 0x78, 0x49, 0xc0" ::); +} + +static inline void __xsavec(struct xstate *xstate, uint64_t rfbm) +{ + uint32_t rfbm_lo = rfbm; + uint32_t rfbm_hi = rfbm >> 32; + + asm volatile("xsavec (%%rdi)" + : : "D" (xstate), "a" (rfbm_lo), "d" (rfbm_hi) + : "memory"); +} + +static void check_xtile_info(void) +{ + GUEST_ASSERT((xgetbv(0) & XFEATURE_MASK_XTILE) == XFEATURE_MASK_XTILE); + + GUEST_ASSERT(this_cpu_has_p(X86_PROPERTY_XSTATE_MAX_SIZE_XCR0)); + GUEST_ASSERT(this_cpu_property(X86_PROPERTY_XSTATE_MAX_SIZE_XCR0) <= XSAVE_SIZE); + + xtile.xsave_offset = this_cpu_property(X86_PROPERTY_XSTATE_TILE_OFFSET); + GUEST_ASSERT(xtile.xsave_offset == 2816); + xtile.xsave_size = this_cpu_property(X86_PROPERTY_XSTATE_TILE_SIZE); + GUEST_ASSERT(xtile.xsave_size == 8192); + GUEST_ASSERT(sizeof(struct tile_data) >= xtile.xsave_size); + + GUEST_ASSERT(this_cpu_has_p(X86_PROPERTY_AMX_MAX_PALETTE_TABLES)); + GUEST_ASSERT(this_cpu_property(X86_PROPERTY_AMX_MAX_PALETTE_TABLES) >= + PALETTE_TABLE_INDEX); + + GUEST_ASSERT(this_cpu_has_p(X86_PROPERTY_AMX_NR_TILE_REGS)); + xtile.max_names = this_cpu_property(X86_PROPERTY_AMX_NR_TILE_REGS); + GUEST_ASSERT(xtile.max_names == 8); + xtile.bytes_per_tile = this_cpu_property(X86_PROPERTY_AMX_BYTES_PER_TILE); + GUEST_ASSERT(xtile.bytes_per_tile == 1024); + xtile.bytes_per_row = this_cpu_property(X86_PROPERTY_AMX_BYTES_PER_ROW); + GUEST_ASSERT(xtile.bytes_per_row == 64); + xtile.max_rows = this_cpu_property(X86_PROPERTY_AMX_MAX_ROWS); + GUEST_ASSERT(xtile.max_rows == 16); +} + +static void set_tilecfg(struct tile_config *cfg) +{ + int i; + + /* Only palette id 1 */ + cfg->palette_id = 1; + for (i = 0; i < xtile.max_names; i++) { + cfg->colsb[i] = xtile.bytes_per_row; + cfg->rows[i] = xtile.max_rows; + } +} + +static void __attribute__((__flatten__)) guest_code(struct tile_config *amx_cfg, + struct tile_data *tiledata, + struct xstate *xstate) +{ + GUEST_ASSERT(this_cpu_has(X86_FEATURE_XSAVE) && + this_cpu_has(X86_FEATURE_OSXSAVE)); + check_xtile_info(); + GUEST_SYNC(1); + + /* xfd=0, enable amx */ + wrmsr(MSR_IA32_XFD, 0); + GUEST_SYNC(2); + GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == 0); + set_tilecfg(amx_cfg); + __ldtilecfg(amx_cfg); + GUEST_SYNC(3); + /* Check save/restore when trap to userspace */ + __tileloadd(tiledata); + GUEST_SYNC(4); + __tilerelease(); + GUEST_SYNC(5); + /* + * After XSAVEC, XTILEDATA is cleared in the xstate_bv but is set in + * the xcomp_bv. + */ + xstate->header.xstate_bv = XFEATURE_MASK_XTILE_DATA; + __xsavec(xstate, XFEATURE_MASK_XTILE_DATA); + GUEST_ASSERT(!(xstate->header.xstate_bv & XFEATURE_MASK_XTILE_DATA)); + GUEST_ASSERT(xstate->header.xcomp_bv & XFEATURE_MASK_XTILE_DATA); + + /* xfd=0x40000, disable amx tiledata */ + wrmsr(MSR_IA32_XFD, XFEATURE_MASK_XTILE_DATA); + + /* + * XTILEDATA is cleared in xstate_bv but set in xcomp_bv, this property + * remains the same even when amx tiledata is disabled by IA32_XFD. + */ + xstate->header.xstate_bv = XFEATURE_MASK_XTILE_DATA; + __xsavec(xstate, XFEATURE_MASK_XTILE_DATA); + GUEST_ASSERT(!(xstate->header.xstate_bv & XFEATURE_MASK_XTILE_DATA)); + GUEST_ASSERT((xstate->header.xcomp_bv & XFEATURE_MASK_XTILE_DATA)); + + GUEST_SYNC(6); + GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == XFEATURE_MASK_XTILE_DATA); + set_tilecfg(amx_cfg); + __ldtilecfg(amx_cfg); + /* Trigger #NM exception */ + __tileloadd(tiledata); + GUEST_SYNC(10); + + GUEST_DONE(); +} + +void guest_nm_handler(struct ex_regs *regs) +{ + /* Check if #NM is triggered by XFEATURE_MASK_XTILE_DATA */ + GUEST_SYNC(7); + GUEST_ASSERT(!(get_cr0() & X86_CR0_TS)); + GUEST_ASSERT(rdmsr(MSR_IA32_XFD_ERR) == XFEATURE_MASK_XTILE_DATA); + GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == XFEATURE_MASK_XTILE_DATA); + GUEST_SYNC(8); + GUEST_ASSERT(rdmsr(MSR_IA32_XFD_ERR) == XFEATURE_MASK_XTILE_DATA); + GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == XFEATURE_MASK_XTILE_DATA); + /* Clear xfd_err */ + wrmsr(MSR_IA32_XFD_ERR, 0); + /* xfd=0, enable amx */ + wrmsr(MSR_IA32_XFD, 0); + GUEST_SYNC(9); +} + +int main(int argc, char *argv[]) +{ + struct kvm_regs regs1, regs2; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + struct kvm_x86_state *state; + int xsave_restore_size; + vm_vaddr_t amx_cfg, tiledata, xstate; + struct ucall uc; + u32 amx_offset; + int ret; + + /* + * Note, all off-by-default features must be enabled before anything + * caches KVM_GET_SUPPORTED_CPUID, e.g. before using kvm_cpu_has(). + */ + vm_xsave_require_permission(XFEATURE_MASK_XTILE_DATA); + + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XFD)); + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVE)); + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_AMX_TILE)); + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XTILECFG)); + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XTILEDATA)); + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XTILEDATA_XFD)); + + /* Create VM */ + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + TEST_ASSERT(kvm_cpu_has_p(X86_PROPERTY_XSTATE_MAX_SIZE), + "KVM should enumerate max XSAVE size when XSAVE is supported"); + xsave_restore_size = kvm_cpu_property(X86_PROPERTY_XSTATE_MAX_SIZE); + + vcpu_regs_get(vcpu, ®s1); + + /* Register #NM handler */ + vm_install_exception_handler(vm, NM_VECTOR, guest_nm_handler); + + /* amx cfg for guest_code */ + amx_cfg = vm_vaddr_alloc_page(vm); + memset(addr_gva2hva(vm, amx_cfg), 0x0, getpagesize()); + + /* amx tiledata for guest_code */ + tiledata = vm_vaddr_alloc_pages(vm, 2); + memset(addr_gva2hva(vm, tiledata), rand() | 1, 2 * getpagesize()); + + /* XSAVE state for guest_code */ + xstate = vm_vaddr_alloc_pages(vm, DIV_ROUND_UP(XSAVE_SIZE, PAGE_SIZE)); + memset(addr_gva2hva(vm, xstate), 0, PAGE_SIZE * DIV_ROUND_UP(XSAVE_SIZE, PAGE_SIZE)); + vcpu_args_set(vcpu, 3, amx_cfg, tiledata, xstate); + + for (;;) { + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + /* NOT REACHED */ + case UCALL_SYNC: + switch (uc.args[1]) { + case 1: + case 2: + case 3: + case 5: + case 6: + case 7: + case 8: + fprintf(stderr, "GUEST_SYNC(%ld)\n", uc.args[1]); + break; + case 4: + case 10: + fprintf(stderr, + "GUEST_SYNC(%ld), check save/restore status\n", uc.args[1]); + + /* Compacted mode, get amx offset by xsave area + * size subtract 8K amx size. + */ + amx_offset = xsave_restore_size - NUM_TILES*TILE_SIZE; + state = vcpu_save_state(vcpu); + void *amx_start = (void *)state->xsave + amx_offset; + void *tiles_data = (void *)addr_gva2hva(vm, tiledata); + /* Only check TMM0 register, 1 tile */ + ret = memcmp(amx_start, tiles_data, TILE_SIZE); + TEST_ASSERT(ret == 0, "memcmp failed, ret=%d", ret); + kvm_x86_state_cleanup(state); + break; + case 9: + fprintf(stderr, + "GUEST_SYNC(%ld), #NM exception and enable amx\n", uc.args[1]); + break; + } + break; + case UCALL_DONE: + fprintf(stderr, "UCALL_DONE\n"); + goto done; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + + state = vcpu_save_state(vcpu); + memset(®s1, 0, sizeof(regs1)); + vcpu_regs_get(vcpu, ®s1); + + kvm_vm_release(vm); + + /* Restore state in a new VM. */ + vcpu = vm_recreate_with_one_vcpu(vm); + vcpu_load_state(vcpu, state); + kvm_x86_state_cleanup(state); + + memset(®s2, 0, sizeof(regs2)); + vcpu_regs_get(vcpu, ®s2); + TEST_ASSERT(!memcmp(®s1, ®s2, sizeof(regs2)), + "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx", + (ulong) regs2.rdi, (ulong) regs2.rsi); + } +done: + kvm_vm_free(vm); +} diff --git a/tools/testing/selftests/kvm/x86/apic_bus_clock_test.c b/tools/testing/selftests/kvm/x86/apic_bus_clock_test.c new file mode 100644 index 000000000000..f8916bb34405 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/apic_bus_clock_test.c @@ -0,0 +1,194 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2024 Intel Corporation + * + * Verify KVM correctly emulates the APIC bus frequency when the VMM configures + * the frequency via KVM_CAP_X86_APIC_BUS_CYCLES_NS. Start the APIC timer by + * programming TMICT (timer initial count) to the largest value possible (so + * that the timer will not expire during the test). Then, after an arbitrary + * amount of time has elapsed, verify TMCCT (timer current count) is within 1% + * of the expected value based on the time elapsed, the APIC bus frequency, and + * the programmed TDCR (timer divide configuration register). + */ + +#include "apic.h" +#include "test_util.h" + +/* + * Possible TDCR values with matching divide count. Used to modify APIC + * timer frequency. + */ +static const struct { + const uint32_t tdcr; + const uint32_t divide_count; +} tdcrs[] = { + {0x0, 2}, + {0x1, 4}, + {0x2, 8}, + {0x3, 16}, + {0x8, 32}, + {0x9, 64}, + {0xa, 128}, + {0xb, 1}, +}; + +static bool is_x2apic; + +static void apic_enable(void) +{ + if (is_x2apic) + x2apic_enable(); + else + xapic_enable(); +} + +static uint32_t apic_read_reg(unsigned int reg) +{ + return is_x2apic ? x2apic_read_reg(reg) : xapic_read_reg(reg); +} + +static void apic_write_reg(unsigned int reg, uint32_t val) +{ + if (is_x2apic) + x2apic_write_reg(reg, val); + else + xapic_write_reg(reg, val); +} + +static void apic_guest_code(uint64_t apic_hz, uint64_t delay_ms) +{ + uint64_t tsc_hz = guest_tsc_khz * 1000; + const uint32_t tmict = ~0u; + uint64_t tsc0, tsc1, freq; + uint32_t tmcct; + int i; + + apic_enable(); + + /* + * Setup one-shot timer. The vector does not matter because the + * interrupt should not fire. + */ + apic_write_reg(APIC_LVTT, APIC_LVT_TIMER_ONESHOT | APIC_LVT_MASKED); + + for (i = 0; i < ARRAY_SIZE(tdcrs); i++) { + apic_write_reg(APIC_TDCR, tdcrs[i].tdcr); + apic_write_reg(APIC_TMICT, tmict); + + tsc0 = rdtsc(); + udelay(delay_ms * 1000); + tmcct = apic_read_reg(APIC_TMCCT); + tsc1 = rdtsc(); + + /* + * Stop the timer _after_ reading the current, final count, as + * writing the initial counter also modifies the current count. + */ + apic_write_reg(APIC_TMICT, 0); + + freq = (tmict - tmcct) * tdcrs[i].divide_count * tsc_hz / (tsc1 - tsc0); + /* Check if measured frequency is within 5% of configured frequency. */ + __GUEST_ASSERT(freq < apic_hz * 105 / 100 && freq > apic_hz * 95 / 100, + "Frequency = %lu (wanted %lu - %lu), bus = %lu, div = %u, tsc = %lu", + freq, apic_hz * 95 / 100, apic_hz * 105 / 100, + apic_hz, tdcrs[i].divide_count, tsc_hz); + } + + GUEST_DONE(); +} + +static void test_apic_bus_clock(struct kvm_vcpu *vcpu) +{ + bool done = false; + struct ucall uc; + + while (!done) { + vcpu_run(vcpu); + + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_DONE: + done = true; + break; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + break; + } + } +} + +static void run_apic_bus_clock_test(uint64_t apic_hz, uint64_t delay_ms, + bool x2apic) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + int ret; + + is_x2apic = x2apic; + + vm = vm_create(1); + + sync_global_to_guest(vm, is_x2apic); + + vm_enable_cap(vm, KVM_CAP_X86_APIC_BUS_CYCLES_NS, + NSEC_PER_SEC / apic_hz); + + vcpu = vm_vcpu_add(vm, 0, apic_guest_code); + vcpu_args_set(vcpu, 2, apic_hz, delay_ms); + + ret = __vm_enable_cap(vm, KVM_CAP_X86_APIC_BUS_CYCLES_NS, + NSEC_PER_SEC / apic_hz); + TEST_ASSERT(ret < 0 && errno == EINVAL, + "Setting of APIC bus frequency after vCPU is created should fail."); + + if (!is_x2apic) + virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA); + + test_apic_bus_clock(vcpu); + kvm_vm_free(vm); +} + +static void help(char *name) +{ + puts(""); + printf("usage: %s [-h] [-d delay] [-f APIC bus freq]\n", name); + puts(""); + printf("-d: Delay (in msec) guest uses to measure APIC bus frequency.\n"); + printf("-f: The APIC bus frequency (in MHz) to be configured for the guest.\n"); + puts(""); +} + +int main(int argc, char *argv[]) +{ + /* + * Arbitrarilty default to 25MHz for the APIC bus frequency, which is + * different enough from the default 1GHz to be interesting. + */ + uint64_t apic_hz = 25 * 1000 * 1000; + uint64_t delay_ms = 100; + int opt; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_APIC_BUS_CYCLES_NS)); + + while ((opt = getopt(argc, argv, "d:f:h")) != -1) { + switch (opt) { + case 'f': + apic_hz = atoi_positive("APIC bus frequency", optarg) * 1000 * 1000; + break; + case 'd': + delay_ms = atoi_positive("Delay in milliseconds", optarg); + break; + case 'h': + default: + help(argv[0]); + exit(KSFT_SKIP); + } + } + + run_apic_bus_clock_test(apic_hz, delay_ms, false); + run_apic_bus_clock_test(apic_hz, delay_ms, true); +} diff --git a/tools/testing/selftests/kvm/x86/cpuid_test.c b/tools/testing/selftests/kvm/x86/cpuid_test.c new file mode 100644 index 000000000000..7b3fda6842bc --- /dev/null +++ b/tools/testing/selftests/kvm/x86/cpuid_test.c @@ -0,0 +1,225 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2021, Red Hat Inc. + * + * Generic tests for KVM CPUID set/get ioctls + */ +#include +#include +#include + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" + +struct cpuid_mask { + union { + struct { + u32 eax; + u32 ebx; + u32 ecx; + u32 edx; + }; + u32 regs[4]; + }; +}; + +static void test_guest_cpuids(struct kvm_cpuid2 *guest_cpuid) +{ + int i; + u32 eax, ebx, ecx, edx; + + for (i = 0; i < guest_cpuid->nent; i++) { + __cpuid(guest_cpuid->entries[i].function, + guest_cpuid->entries[i].index, + &eax, &ebx, &ecx, &edx); + + GUEST_ASSERT_EQ(eax, guest_cpuid->entries[i].eax); + GUEST_ASSERT_EQ(ebx, guest_cpuid->entries[i].ebx); + GUEST_ASSERT_EQ(ecx, guest_cpuid->entries[i].ecx); + GUEST_ASSERT_EQ(edx, guest_cpuid->entries[i].edx); + } + +} + +static void guest_main(struct kvm_cpuid2 *guest_cpuid) +{ + GUEST_SYNC(1); + + test_guest_cpuids(guest_cpuid); + + GUEST_SYNC(2); + + GUEST_ASSERT_EQ(this_cpu_property(X86_PROPERTY_MAX_KVM_LEAF), 0x40000001); + + GUEST_DONE(); +} + +static struct cpuid_mask get_const_cpuid_mask(const struct kvm_cpuid_entry2 *entry) +{ + struct cpuid_mask mask; + + memset(&mask, 0xff, sizeof(mask)); + + switch (entry->function) { + case 0x1: + mask.regs[X86_FEATURE_OSXSAVE.reg] &= ~BIT(X86_FEATURE_OSXSAVE.bit); + break; + case 0x7: + mask.regs[X86_FEATURE_OSPKE.reg] &= ~BIT(X86_FEATURE_OSPKE.bit); + break; + case 0xd: + /* + * CPUID.0xD.{0,1}.EBX enumerate XSAVE size based on the current + * XCR0 and IA32_XSS MSR values. + */ + if (entry->index < 2) + mask.ebx = 0; + break; + } + return mask; +} + +static void compare_cpuids(const struct kvm_cpuid2 *cpuid1, + const struct kvm_cpuid2 *cpuid2) +{ + const struct kvm_cpuid_entry2 *e1, *e2; + int i; + + TEST_ASSERT(cpuid1->nent == cpuid2->nent, + "CPUID nent mismatch: %d vs. %d", cpuid1->nent, cpuid2->nent); + + for (i = 0; i < cpuid1->nent; i++) { + struct cpuid_mask mask; + + e1 = &cpuid1->entries[i]; + e2 = &cpuid2->entries[i]; + + TEST_ASSERT(e1->function == e2->function && + e1->index == e2->index && e1->flags == e2->flags, + "CPUID entries[%d] mismtach: 0x%x.%d.%x vs. 0x%x.%d.%x", + i, e1->function, e1->index, e1->flags, + e2->function, e2->index, e2->flags); + + /* Mask off dynamic bits, e.g. OSXSAVE, when comparing entries. */ + mask = get_const_cpuid_mask(e1); + + TEST_ASSERT((e1->eax & mask.eax) == (e2->eax & mask.eax) && + (e1->ebx & mask.ebx) == (e2->ebx & mask.ebx) && + (e1->ecx & mask.ecx) == (e2->ecx & mask.ecx) && + (e1->edx & mask.edx) == (e2->edx & mask.edx), + "CPUID 0x%x.%x differ: 0x%x:0x%x:0x%x:0x%x vs 0x%x:0x%x:0x%x:0x%x", + e1->function, e1->index, + e1->eax & mask.eax, e1->ebx & mask.ebx, + e1->ecx & mask.ecx, e1->edx & mask.edx, + e2->eax & mask.eax, e2->ebx & mask.ebx, + e2->ecx & mask.ecx, e2->edx & mask.edx); + } +} + +static void run_vcpu(struct kvm_vcpu *vcpu, int stage) +{ + struct ucall uc; + + vcpu_run(vcpu); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_SYNC: + TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") && + uc.args[1] == stage + 1, + "Stage %d: Unexpected register values vmexit, got %lx", + stage + 1, (ulong)uc.args[1]); + return; + case UCALL_DONE: + return; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + default: + TEST_ASSERT(false, "Unexpected exit: %s", + exit_reason_str(vcpu->run->exit_reason)); + } +} + +struct kvm_cpuid2 *vcpu_alloc_cpuid(struct kvm_vm *vm, vm_vaddr_t *p_gva, struct kvm_cpuid2 *cpuid) +{ + int size = sizeof(*cpuid) + cpuid->nent * sizeof(cpuid->entries[0]); + vm_vaddr_t gva = vm_vaddr_alloc(vm, size, KVM_UTIL_MIN_VADDR); + struct kvm_cpuid2 *guest_cpuids = addr_gva2hva(vm, gva); + + memcpy(guest_cpuids, cpuid, size); + + *p_gva = gva; + return guest_cpuids; +} + +static void set_cpuid_after_run(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid_entry2 *ent; + int rc; + u32 eax, ebx, x; + + /* Setting unmodified CPUID is allowed */ + rc = __vcpu_set_cpuid(vcpu); + TEST_ASSERT(!rc, "Setting unmodified CPUID after KVM_RUN failed: %d", rc); + + /* Changing CPU features is forbidden */ + ent = vcpu_get_cpuid_entry(vcpu, 0x7); + ebx = ent->ebx; + ent->ebx--; + rc = __vcpu_set_cpuid(vcpu); + TEST_ASSERT(rc, "Changing CPU features should fail"); + ent->ebx = ebx; + + /* Changing MAXPHYADDR is forbidden */ + ent = vcpu_get_cpuid_entry(vcpu, 0x80000008); + eax = ent->eax; + x = eax & 0xff; + ent->eax = (eax & ~0xffu) | (x - 1); + rc = __vcpu_set_cpuid(vcpu); + TEST_ASSERT(rc, "Changing MAXPHYADDR should fail"); + ent->eax = eax; +} + +static void test_get_cpuid2(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid2 *cpuid = allocate_kvm_cpuid2(vcpu->cpuid->nent + 1); + int i, r; + + vcpu_ioctl(vcpu, KVM_GET_CPUID2, cpuid); + TEST_ASSERT(cpuid->nent == vcpu->cpuid->nent, + "KVM didn't update nent on success, wanted %u, got %u", + vcpu->cpuid->nent, cpuid->nent); + + for (i = 0; i < vcpu->cpuid->nent; i++) { + cpuid->nent = i; + r = __vcpu_ioctl(vcpu, KVM_GET_CPUID2, cpuid); + TEST_ASSERT(r && errno == E2BIG, KVM_IOCTL_ERROR(KVM_GET_CPUID2, r)); + TEST_ASSERT(cpuid->nent == i, "KVM modified nent on failure"); + } + free(cpuid); +} + +int main(void) +{ + struct kvm_vcpu *vcpu; + vm_vaddr_t cpuid_gva; + struct kvm_vm *vm; + int stage; + + vm = vm_create_with_one_vcpu(&vcpu, guest_main); + + compare_cpuids(kvm_get_supported_cpuid(), vcpu->cpuid); + + vcpu_alloc_cpuid(vm, &cpuid_gva, vcpu->cpuid); + + vcpu_args_set(vcpu, 1, cpuid_gva); + + for (stage = 0; stage < 3; stage++) + run_vcpu(vcpu, stage); + + set_cpuid_after_run(vcpu); + + test_get_cpuid2(vcpu); + + kvm_vm_free(vm); +} diff --git a/tools/testing/selftests/kvm/x86/cr4_cpuid_sync_test.c b/tools/testing/selftests/kvm/x86/cr4_cpuid_sync_test.c new file mode 100644 index 000000000000..28cc66454601 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/cr4_cpuid_sync_test.c @@ -0,0 +1,100 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * CR4 and CPUID sync test + * + * Copyright 2018, Red Hat, Inc. and/or its affiliates. + * + * Author: + * Wei Huang + */ + +#include +#include +#include +#include +#include + +#include "test_util.h" + +#include "kvm_util.h" +#include "processor.h" + +#define MAGIC_HYPERCALL_PORT 0x80 + +static void guest_code(void) +{ + u32 regs[4] = { + [KVM_CPUID_EAX] = X86_FEATURE_OSXSAVE.function, + [KVM_CPUID_ECX] = X86_FEATURE_OSXSAVE.index, + }; + + /* CR4.OSXSAVE should be enabled by default (for selftests vCPUs). */ + GUEST_ASSERT(get_cr4() & X86_CR4_OSXSAVE); + + /* verify CR4.OSXSAVE == CPUID.OSXSAVE */ + GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSXSAVE)); + + /* + * Notify hypervisor to clear CR4.0SXSAVE, do CPUID and save output, + * and then restore CR4. Do this all in assembly to ensure no AVX + * instructions are executed while OSXSAVE=0. + */ + asm volatile ( + "out %%al, $" __stringify(MAGIC_HYPERCALL_PORT) "\n\t" + "cpuid\n\t" + "mov %%rdi, %%cr4\n\t" + : "+a" (regs[KVM_CPUID_EAX]), + "=b" (regs[KVM_CPUID_EBX]), + "+c" (regs[KVM_CPUID_ECX]), + "=d" (regs[KVM_CPUID_EDX]) + : "D" (get_cr4()) + ); + + /* Verify KVM cleared OSXSAVE in CPUID when it was cleared in CR4. */ + GUEST_ASSERT(!(regs[X86_FEATURE_OSXSAVE.reg] & BIT(X86_FEATURE_OSXSAVE.bit))); + + /* Verify restoring CR4 also restored OSXSAVE in CPUID. */ + GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSXSAVE)); + + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + struct kvm_sregs sregs; + struct ucall uc; + + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVE)); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + while (1) { + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + if (vcpu->run->io.port == MAGIC_HYPERCALL_PORT && + vcpu->run->io.direction == KVM_EXIT_IO_OUT) { + /* emulate hypervisor clearing CR4.OSXSAVE */ + vcpu_sregs_get(vcpu, &sregs); + sregs.cr4 &= ~X86_CR4_OSXSAVE; + vcpu_sregs_set(vcpu, &sregs); + continue; + } + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + case UCALL_DONE: + goto done; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + } + +done: + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/x86/debug_regs.c b/tools/testing/selftests/kvm/x86/debug_regs.c new file mode 100644 index 000000000000..2d814c1d1dc4 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/debug_regs.c @@ -0,0 +1,217 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * KVM guest debug register tests + * + * Copyright (C) 2020, Red Hat, Inc. + */ +#include +#include +#include "kvm_util.h" +#include "processor.h" +#include "apic.h" + +#define DR6_BD (1 << 13) +#define DR7_GD (1 << 13) + +#define IRQ_VECTOR 0xAA + +/* For testing data access debug BP */ +uint32_t guest_value; + +extern unsigned char sw_bp, hw_bp, write_data, ss_start, bd_start; + +static void guest_code(void) +{ + /* Create a pending interrupt on current vCPU */ + x2apic_enable(); + x2apic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_INT_ASSERT | + APIC_DM_FIXED | IRQ_VECTOR); + + /* + * Software BP tests. + * + * NOTE: sw_bp need to be before the cmd here, because int3 is an + * exception rather than a normal trap for KVM_SET_GUEST_DEBUG (we + * capture it using the vcpu exception bitmap). + */ + asm volatile("sw_bp: int3"); + + /* Hardware instruction BP test */ + asm volatile("hw_bp: nop"); + + /* Hardware data BP test */ + asm volatile("mov $1234,%%rax;\n\t" + "mov %%rax,%0;\n\t write_data:" + : "=m" (guest_value) : : "rax"); + + /* + * Single step test, covers 2 basic instructions and 2 emulated + * + * Enable interrupts during the single stepping to see that pending + * interrupt we raised is not handled due to KVM_GUESTDBG_BLOCKIRQ. + * + * Write MSR_IA32_TSC_DEADLINE to verify that KVM's fastpath handler + * exits to userspace due to single-step being enabled. + */ + asm volatile("ss_start: " + "sti\n\t" + "xor %%eax,%%eax\n\t" + "cpuid\n\t" + "movl $" __stringify(MSR_IA32_TSC_DEADLINE) ", %%ecx\n\t" + "wrmsr\n\t" + "cli\n\t" + : : : "eax", "ebx", "ecx", "edx"); + + /* DR6.BD test */ + asm volatile("bd_start: mov %%dr0, %%rax" : : : "rax"); + GUEST_DONE(); +} + +#define CAST_TO_RIP(v) ((unsigned long long)&(v)) + +static void vcpu_skip_insn(struct kvm_vcpu *vcpu, int insn_len) +{ + struct kvm_regs regs; + + vcpu_regs_get(vcpu, ®s); + regs.rip += insn_len; + vcpu_regs_set(vcpu, ®s); +} + +int main(void) +{ + struct kvm_guest_debug debug; + unsigned long long target_dr6, target_rip; + struct kvm_vcpu *vcpu; + struct kvm_run *run; + struct kvm_vm *vm; + struct ucall uc; + uint64_t cmd; + int i; + /* Instruction lengths starting at ss_start */ + int ss_size[6] = { + 1, /* sti*/ + 2, /* xor */ + 2, /* cpuid */ + 5, /* mov */ + 2, /* rdmsr */ + 1, /* cli */ + }; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_SET_GUEST_DEBUG)); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + run = vcpu->run; + + /* Test software BPs - int3 */ + memset(&debug, 0, sizeof(debug)); + debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP; + vcpu_guest_debug_set(vcpu, &debug); + vcpu_run(vcpu); + TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG && + run->debug.arch.exception == BP_VECTOR && + run->debug.arch.pc == CAST_TO_RIP(sw_bp), + "INT3: exit %d exception %d rip 0x%llx (should be 0x%llx)", + run->exit_reason, run->debug.arch.exception, + run->debug.arch.pc, CAST_TO_RIP(sw_bp)); + vcpu_skip_insn(vcpu, 1); + + /* Test instruction HW BP over DR[0-3] */ + for (i = 0; i < 4; i++) { + memset(&debug, 0, sizeof(debug)); + debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP; + debug.arch.debugreg[i] = CAST_TO_RIP(hw_bp); + debug.arch.debugreg[7] = 0x400 | (1UL << (2*i+1)); + vcpu_guest_debug_set(vcpu, &debug); + vcpu_run(vcpu); + target_dr6 = 0xffff0ff0 | (1UL << i); + TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG && + run->debug.arch.exception == DB_VECTOR && + run->debug.arch.pc == CAST_TO_RIP(hw_bp) && + run->debug.arch.dr6 == target_dr6, + "INS_HW_BP (DR%d): exit %d exception %d rip 0x%llx " + "(should be 0x%llx) dr6 0x%llx (should be 0x%llx)", + i, run->exit_reason, run->debug.arch.exception, + run->debug.arch.pc, CAST_TO_RIP(hw_bp), + run->debug.arch.dr6, target_dr6); + } + /* Skip "nop" */ + vcpu_skip_insn(vcpu, 1); + + /* Test data access HW BP over DR[0-3] */ + for (i = 0; i < 4; i++) { + memset(&debug, 0, sizeof(debug)); + debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP; + debug.arch.debugreg[i] = CAST_TO_RIP(guest_value); + debug.arch.debugreg[7] = 0x00000400 | (1UL << (2*i+1)) | + (0x000d0000UL << (4*i)); + vcpu_guest_debug_set(vcpu, &debug); + vcpu_run(vcpu); + target_dr6 = 0xffff0ff0 | (1UL << i); + TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG && + run->debug.arch.exception == DB_VECTOR && + run->debug.arch.pc == CAST_TO_RIP(write_data) && + run->debug.arch.dr6 == target_dr6, + "DATA_HW_BP (DR%d): exit %d exception %d rip 0x%llx " + "(should be 0x%llx) dr6 0x%llx (should be 0x%llx)", + i, run->exit_reason, run->debug.arch.exception, + run->debug.arch.pc, CAST_TO_RIP(write_data), + run->debug.arch.dr6, target_dr6); + /* Rollback the 4-bytes "mov" */ + vcpu_skip_insn(vcpu, -7); + } + /* Skip the 4-bytes "mov" */ + vcpu_skip_insn(vcpu, 7); + + /* Test single step */ + target_rip = CAST_TO_RIP(ss_start); + target_dr6 = 0xffff4ff0ULL; + for (i = 0; i < ARRAY_SIZE(ss_size); i++) { + target_rip += ss_size[i]; + memset(&debug, 0, sizeof(debug)); + debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP | + KVM_GUESTDBG_BLOCKIRQ; + debug.arch.debugreg[7] = 0x00000400; + vcpu_guest_debug_set(vcpu, &debug); + vcpu_run(vcpu); + TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG && + run->debug.arch.exception == DB_VECTOR && + run->debug.arch.pc == target_rip && + run->debug.arch.dr6 == target_dr6, + "SINGLE_STEP[%d]: exit %d exception %d rip 0x%llx " + "(should be 0x%llx) dr6 0x%llx (should be 0x%llx)", + i, run->exit_reason, run->debug.arch.exception, + run->debug.arch.pc, target_rip, run->debug.arch.dr6, + target_dr6); + } + + /* Finally test global disable */ + memset(&debug, 0, sizeof(debug)); + debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP; + debug.arch.debugreg[7] = 0x400 | DR7_GD; + vcpu_guest_debug_set(vcpu, &debug); + vcpu_run(vcpu); + target_dr6 = 0xffff0ff0 | DR6_BD; + TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG && + run->debug.arch.exception == DB_VECTOR && + run->debug.arch.pc == CAST_TO_RIP(bd_start) && + run->debug.arch.dr6 == target_dr6, + "DR7.GD: exit %d exception %d rip 0x%llx " + "(should be 0x%llx) dr6 0x%llx (should be 0x%llx)", + run->exit_reason, run->debug.arch.exception, + run->debug.arch.pc, target_rip, run->debug.arch.dr6, + target_dr6); + + /* Disable all debug controls, run to the end */ + memset(&debug, 0, sizeof(debug)); + vcpu_guest_debug_set(vcpu, &debug); + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + cmd = get_ucall(vcpu, &uc); + TEST_ASSERT(cmd == UCALL_DONE, "UCALL_DONE"); + + kvm_vm_free(vm); + + return 0; +} diff --git a/tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c b/tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c new file mode 100644 index 000000000000..2929c067c207 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c @@ -0,0 +1,263 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * KVM dirty logging page splitting test + * + * Based on dirty_log_perf.c + * + * Copyright (C) 2018, Red Hat, Inc. + * Copyright (C) 2023, Google, Inc. + */ + +#include +#include +#include +#include + +#include "kvm_util.h" +#include "test_util.h" +#include "memstress.h" +#include "guest_modes.h" +#include "ucall_common.h" + +#define VCPUS 2 +#define SLOTS 2 +#define ITERATIONS 2 + +static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; + +static enum vm_mem_backing_src_type backing_src = VM_MEM_SRC_ANONYMOUS_HUGETLB; + +static u64 dirty_log_manual_caps; +static bool host_quit; +static int iteration; +static int vcpu_last_completed_iteration[KVM_MAX_VCPUS]; + +struct kvm_page_stats { + uint64_t pages_4k; + uint64_t pages_2m; + uint64_t pages_1g; + uint64_t hugepages; +}; + +static void get_page_stats(struct kvm_vm *vm, struct kvm_page_stats *stats, const char *stage) +{ + stats->pages_4k = vm_get_stat(vm, "pages_4k"); + stats->pages_2m = vm_get_stat(vm, "pages_2m"); + stats->pages_1g = vm_get_stat(vm, "pages_1g"); + stats->hugepages = stats->pages_2m + stats->pages_1g; + + pr_debug("\nPage stats after %s: 4K: %ld 2M: %ld 1G: %ld huge: %ld\n", + stage, stats->pages_4k, stats->pages_2m, stats->pages_1g, + stats->hugepages); +} + +static void run_vcpu_iteration(struct kvm_vm *vm) +{ + int i; + + iteration++; + for (i = 0; i < VCPUS; i++) { + while (READ_ONCE(vcpu_last_completed_iteration[i]) != + iteration) + ; + } +} + +static void vcpu_worker(struct memstress_vcpu_args *vcpu_args) +{ + struct kvm_vcpu *vcpu = vcpu_args->vcpu; + int vcpu_idx = vcpu_args->vcpu_idx; + + while (!READ_ONCE(host_quit)) { + int current_iteration = READ_ONCE(iteration); + + vcpu_run(vcpu); + + TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_SYNC); + + vcpu_last_completed_iteration[vcpu_idx] = current_iteration; + + /* Wait for the start of the next iteration to be signaled. */ + while (current_iteration == READ_ONCE(iteration) && + READ_ONCE(iteration) >= 0 && + !READ_ONCE(host_quit)) + ; + } +} + +static void run_test(enum vm_guest_mode mode, void *unused) +{ + struct kvm_vm *vm; + unsigned long **bitmaps; + uint64_t guest_num_pages; + uint64_t host_num_pages; + uint64_t pages_per_slot; + int i; + struct kvm_page_stats stats_populated; + struct kvm_page_stats stats_dirty_logging_enabled; + struct kvm_page_stats stats_dirty_pass[ITERATIONS]; + struct kvm_page_stats stats_clear_pass[ITERATIONS]; + struct kvm_page_stats stats_dirty_logging_disabled; + struct kvm_page_stats stats_repopulated; + + vm = memstress_create_vm(mode, VCPUS, guest_percpu_mem_size, + SLOTS, backing_src, false); + + guest_num_pages = (VCPUS * guest_percpu_mem_size) >> vm->page_shift; + guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages); + host_num_pages = vm_num_host_pages(mode, guest_num_pages); + pages_per_slot = host_num_pages / SLOTS; + TEST_ASSERT_EQ(host_num_pages, pages_per_slot * SLOTS); + TEST_ASSERT(!(host_num_pages % 512), + "Number of pages, '%lu' not a multiple of 2MiB", host_num_pages); + + bitmaps = memstress_alloc_bitmaps(SLOTS, pages_per_slot); + + if (dirty_log_manual_caps) + vm_enable_cap(vm, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2, + dirty_log_manual_caps); + + /* Start the iterations */ + iteration = -1; + host_quit = false; + + for (i = 0; i < VCPUS; i++) + vcpu_last_completed_iteration[i] = -1; + + memstress_start_vcpu_threads(VCPUS, vcpu_worker); + + run_vcpu_iteration(vm); + get_page_stats(vm, &stats_populated, "populating memory"); + + /* Enable dirty logging */ + memstress_enable_dirty_logging(vm, SLOTS); + + get_page_stats(vm, &stats_dirty_logging_enabled, "enabling dirty logging"); + + while (iteration < ITERATIONS) { + run_vcpu_iteration(vm); + get_page_stats(vm, &stats_dirty_pass[iteration - 1], + "dirtying memory"); + + memstress_get_dirty_log(vm, bitmaps, SLOTS); + + if (dirty_log_manual_caps) { + memstress_clear_dirty_log(vm, bitmaps, SLOTS, pages_per_slot); + + get_page_stats(vm, &stats_clear_pass[iteration - 1], "clearing dirty log"); + } + } + + /* Disable dirty logging */ + memstress_disable_dirty_logging(vm, SLOTS); + + get_page_stats(vm, &stats_dirty_logging_disabled, "disabling dirty logging"); + + /* Run vCPUs again to fault pages back in. */ + run_vcpu_iteration(vm); + get_page_stats(vm, &stats_repopulated, "repopulating memory"); + + /* + * Tell the vCPU threads to quit. No need to manually check that vCPUs + * have stopped running after disabling dirty logging, the join will + * wait for them to exit. + */ + host_quit = true; + memstress_join_vcpu_threads(VCPUS); + + memstress_free_bitmaps(bitmaps, SLOTS); + memstress_destroy_vm(vm); + + TEST_ASSERT_EQ((stats_populated.pages_2m * 512 + + stats_populated.pages_1g * 512 * 512), host_num_pages); + + /* + * Check that all huge pages were split. Since large pages can only + * exist in the data slot, and the vCPUs should have dirtied all pages + * in the data slot, there should be no huge pages left after splitting. + * Splitting happens at dirty log enable time without + * KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 and after the first clear pass + * with that capability. + */ + if (dirty_log_manual_caps) { + TEST_ASSERT_EQ(stats_clear_pass[0].hugepages, 0); + TEST_ASSERT(stats_clear_pass[0].pages_4k >= host_num_pages, + "Expected at least '%lu' 4KiB pages, found only '%lu'", + host_num_pages, stats_clear_pass[0].pages_4k); + TEST_ASSERT_EQ(stats_dirty_logging_enabled.hugepages, stats_populated.hugepages); + } else { + TEST_ASSERT_EQ(stats_dirty_logging_enabled.hugepages, 0); + TEST_ASSERT(stats_dirty_logging_enabled.pages_4k >= host_num_pages, + "Expected at least '%lu' 4KiB pages, found only '%lu'", + host_num_pages, stats_dirty_logging_enabled.pages_4k); + } + + /* + * Once dirty logging is disabled and the vCPUs have touched all their + * memory again, the hugepage counts should be the same as they were + * right after initial population of memory. + */ + TEST_ASSERT_EQ(stats_populated.pages_2m, stats_repopulated.pages_2m); + TEST_ASSERT_EQ(stats_populated.pages_1g, stats_repopulated.pages_1g); +} + +static void help(char *name) +{ + puts(""); + printf("usage: %s [-h] [-b vcpu bytes] [-s mem type]\n", + name); + puts(""); + printf(" -b: specify the size of the memory region which should be\n" + " dirtied by each vCPU. e.g. 10M or 3G.\n" + " (default: 1G)\n"); + backing_src_help("-s"); + puts(""); +} + +int main(int argc, char *argv[]) +{ + int opt; + + TEST_REQUIRE(get_kvm_param_bool("eager_page_split")); + TEST_REQUIRE(get_kvm_param_bool("tdp_mmu")); + + while ((opt = getopt(argc, argv, "b:hs:")) != -1) { + switch (opt) { + case 'b': + guest_percpu_mem_size = parse_size(optarg); + break; + case 'h': + help(argv[0]); + exit(0); + case 's': + backing_src = parse_backing_src_type(optarg); + break; + default: + help(argv[0]); + exit(1); + } + } + + if (!is_backing_src_hugetlb(backing_src)) { + pr_info("This test will only work reliably with HugeTLB memory. " + "It can work with THP, but that is best effort.\n"); + } + + guest_modes_append_default(); + + dirty_log_manual_caps = 0; + for_each_guest_mode(run_test, NULL); + + dirty_log_manual_caps = + kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2); + + if (dirty_log_manual_caps) { + dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | + KVM_DIRTY_LOG_INITIALLY_SET); + for_each_guest_mode(run_test, NULL); + } else { + pr_info("Skipping testing with MANUAL_PROTECT as it is not supported"); + } + + return 0; +} diff --git a/tools/testing/selftests/kvm/x86/exit_on_emulation_failure_test.c b/tools/testing/selftests/kvm/x86/exit_on_emulation_failure_test.c new file mode 100644 index 000000000000..81055476d394 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/exit_on_emulation_failure_test.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022, Google LLC. + * + * Test for KVM_CAP_EXIT_ON_EMULATION_FAILURE. + */ +#include "flds_emulation.h" +#include "test_util.h" +#include "ucall_common.h" + +#define MMIO_GPA 0x700000000 +#define MMIO_GVA MMIO_GPA + +static void guest_code(void) +{ + /* Execute flds with an MMIO address to force KVM to emulate it. */ + flds(MMIO_GVA); + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_EXIT_ON_EMULATION_FAILURE)); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + vm_enable_cap(vm, KVM_CAP_EXIT_ON_EMULATION_FAILURE, 1); + virt_map(vm, MMIO_GVA, MMIO_GPA, 1); + + vcpu_run(vcpu); + handle_flds_emulation_failure_exit(vcpu); + vcpu_run(vcpu); + TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE); + + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/x86/feature_msrs_test.c b/tools/testing/selftests/kvm/x86/feature_msrs_test.c new file mode 100644 index 000000000000..a72f13ae2edb --- /dev/null +++ b/tools/testing/selftests/kvm/x86/feature_msrs_test.c @@ -0,0 +1,113 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020, Red Hat, Inc. + */ +#include +#include +#include +#include +#include + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" + +static bool is_kvm_controlled_msr(uint32_t msr) +{ + return msr == MSR_IA32_VMX_CR0_FIXED1 || msr == MSR_IA32_VMX_CR4_FIXED1; +} + +/* + * For VMX MSRs with a "true" variant, KVM requires userspace to set the "true" + * MSR, and doesn't allow setting the hidden version. + */ +static bool is_hidden_vmx_msr(uint32_t msr) +{ + switch (msr) { + case MSR_IA32_VMX_PINBASED_CTLS: + case MSR_IA32_VMX_PROCBASED_CTLS: + case MSR_IA32_VMX_EXIT_CTLS: + case MSR_IA32_VMX_ENTRY_CTLS: + return true; + default: + return false; + } +} + +static bool is_quirked_msr(uint32_t msr) +{ + return msr != MSR_AMD64_DE_CFG; +} + +static void test_feature_msr(uint32_t msr) +{ + const uint64_t supported_mask = kvm_get_feature_msr(msr); + uint64_t reset_value = is_quirked_msr(msr) ? supported_mask : 0; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + /* + * Don't bother testing KVM-controlled MSRs beyond verifying that the + * MSR can be read from userspace. Any value is effectively legal, as + * KVM is bound by x86 architecture, not by ABI. + */ + if (is_kvm_controlled_msr(msr)) + return; + + /* + * More goofy behavior. KVM reports the host CPU's actual revision ID, + * but initializes the vCPU's revision ID to an arbitrary value. + */ + if (msr == MSR_IA32_UCODE_REV) + reset_value = host_cpu_is_intel ? 0x100000000ULL : 0x01000065; + + /* + * For quirked MSRs, KVM's ABI is to initialize the vCPU's value to the + * full set of features supported by KVM. For non-quirked MSRs, and + * when the quirk is disabled, KVM must zero-initialize the MSR and let + * userspace do the configuration. + */ + vm = vm_create_with_one_vcpu(&vcpu, NULL); + TEST_ASSERT(vcpu_get_msr(vcpu, msr) == reset_value, + "Wanted 0x%lx for %squirked MSR 0x%x, got 0x%lx", + reset_value, is_quirked_msr(msr) ? "" : "non-", msr, + vcpu_get_msr(vcpu, msr)); + if (!is_hidden_vmx_msr(msr)) + vcpu_set_msr(vcpu, msr, supported_mask); + kvm_vm_free(vm); + + if (is_hidden_vmx_msr(msr)) + return; + + if (!kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2) || + !(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) & KVM_X86_QUIRK_STUFF_FEATURE_MSRS)) + return; + + vm = vm_create(1); + vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, KVM_X86_QUIRK_STUFF_FEATURE_MSRS); + + vcpu = vm_vcpu_add(vm, 0, NULL); + TEST_ASSERT(!vcpu_get_msr(vcpu, msr), + "Quirk disabled, wanted '0' for MSR 0x%x, got 0x%lx", + msr, vcpu_get_msr(vcpu, msr)); + kvm_vm_free(vm); +} + +int main(int argc, char *argv[]) +{ + const struct kvm_msr_list *feature_list; + int i; + + /* + * Skip the entire test if MSR_FEATURES isn't supported, other tests + * will cover the "regular" list of MSRs, the coverage here is purely + * opportunistic and not interesting on its own. + */ + TEST_REQUIRE(kvm_has_cap(KVM_CAP_GET_MSR_FEATURES)); + + (void)kvm_get_msr_index_list(); + + feature_list = kvm_get_feature_msr_index_list(); + for (i = 0; i < feature_list->nmsrs; i++) + test_feature_msr(feature_list->indices[i]); +} diff --git a/tools/testing/selftests/kvm/x86/fix_hypercall_test.c b/tools/testing/selftests/kvm/x86/fix_hypercall_test.c new file mode 100644 index 000000000000..762628f7d4ba --- /dev/null +++ b/tools/testing/selftests/kvm/x86/fix_hypercall_test.c @@ -0,0 +1,142 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020, Google LLC. + * + * Tests for KVM paravirtual feature disablement + */ +#include +#include +#include +#include + +#include "kvm_test_harness.h" +#include "apic.h" +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" + +/* VMCALL and VMMCALL are both 3-byte opcodes. */ +#define HYPERCALL_INSN_SIZE 3 + +static bool quirk_disabled; + +static void guest_ud_handler(struct ex_regs *regs) +{ + regs->rax = -EFAULT; + regs->rip += HYPERCALL_INSN_SIZE; +} + +static const uint8_t vmx_vmcall[HYPERCALL_INSN_SIZE] = { 0x0f, 0x01, 0xc1 }; +static const uint8_t svm_vmmcall[HYPERCALL_INSN_SIZE] = { 0x0f, 0x01, 0xd9 }; + +extern uint8_t hypercall_insn[HYPERCALL_INSN_SIZE]; +static uint64_t do_sched_yield(uint8_t apic_id) +{ + uint64_t ret; + + asm volatile("hypercall_insn:\n\t" + ".byte 0xcc,0xcc,0xcc\n\t" + : "=a"(ret) + : "a"((uint64_t)KVM_HC_SCHED_YIELD), "b"((uint64_t)apic_id) + : "memory"); + + return ret; +} + +static void guest_main(void) +{ + const uint8_t *native_hypercall_insn; + const uint8_t *other_hypercall_insn; + uint64_t ret; + + if (host_cpu_is_intel) { + native_hypercall_insn = vmx_vmcall; + other_hypercall_insn = svm_vmmcall; + } else if (host_cpu_is_amd) { + native_hypercall_insn = svm_vmmcall; + other_hypercall_insn = vmx_vmcall; + } else { + GUEST_ASSERT(0); + /* unreachable */ + return; + } + + memcpy(hypercall_insn, other_hypercall_insn, HYPERCALL_INSN_SIZE); + + ret = do_sched_yield(GET_APIC_ID_FIELD(xapic_read_reg(APIC_ID))); + + /* + * If the quirk is disabled, verify that guest_ud_handler() "returned" + * -EFAULT and that KVM did NOT patch the hypercall. If the quirk is + * enabled, verify that the hypercall succeeded and that KVM patched in + * the "right" hypercall. + */ + if (quirk_disabled) { + GUEST_ASSERT(ret == (uint64_t)-EFAULT); + GUEST_ASSERT(!memcmp(other_hypercall_insn, hypercall_insn, + HYPERCALL_INSN_SIZE)); + } else { + GUEST_ASSERT(!ret); + GUEST_ASSERT(!memcmp(native_hypercall_insn, hypercall_insn, + HYPERCALL_INSN_SIZE)); + } + + GUEST_DONE(); +} + +KVM_ONE_VCPU_TEST_SUITE(fix_hypercall); + +static void enter_guest(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + struct ucall uc; + + vcpu_run(vcpu); + switch (get_ucall(vcpu, &uc)) { + case UCALL_SYNC: + pr_info("%s: %016lx\n", (const char *)uc.args[2], uc.args[3]); + break; + case UCALL_DONE: + return; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + default: + TEST_FAIL("Unhandled ucall: %ld\nexit_reason: %u (%s)", + uc.cmd, run->exit_reason, exit_reason_str(run->exit_reason)); + } +} + +static void test_fix_hypercall(struct kvm_vcpu *vcpu, bool disable_quirk) +{ + struct kvm_vm *vm = vcpu->vm; + + vm_install_exception_handler(vcpu->vm, UD_VECTOR, guest_ud_handler); + + if (disable_quirk) + vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, + KVM_X86_QUIRK_FIX_HYPERCALL_INSN); + + quirk_disabled = disable_quirk; + sync_global_to_guest(vm, quirk_disabled); + + virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA); + + enter_guest(vcpu); +} + +KVM_ONE_VCPU_TEST(fix_hypercall, enable_quirk, guest_main) +{ + test_fix_hypercall(vcpu, false); +} + +KVM_ONE_VCPU_TEST(fix_hypercall, disable_quirk, guest_main) +{ + test_fix_hypercall(vcpu, true); +} + +int main(int argc, char *argv[]) +{ + TEST_REQUIRE(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) & KVM_X86_QUIRK_FIX_HYPERCALL_INSN); + + return test_harness_run(argc, argv); +} diff --git a/tools/testing/selftests/kvm/x86/flds_emulation.h b/tools/testing/selftests/kvm/x86/flds_emulation.h new file mode 100644 index 000000000000..37b1a9f52864 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/flds_emulation.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef SELFTEST_KVM_FLDS_EMULATION_H +#define SELFTEST_KVM_FLDS_EMULATION_H + +#include "kvm_util.h" + +#define FLDS_MEM_EAX ".byte 0xd9, 0x00" + +/* + * flds is an instruction that the KVM instruction emulator is known not to + * support. This can be used in guest code along with a mechanism to force + * KVM to emulate the instruction (e.g. by providing an MMIO address) to + * exercise emulation failures. + */ +static inline void flds(uint64_t address) +{ + __asm__ __volatile__(FLDS_MEM_EAX :: "a"(address)); +} + +static inline void handle_flds_emulation_failure_exit(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + struct kvm_regs regs; + uint8_t *insn_bytes; + uint64_t flags; + + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR); + + TEST_ASSERT(run->emulation_failure.suberror == KVM_INTERNAL_ERROR_EMULATION, + "Unexpected suberror: %u", + run->emulation_failure.suberror); + + flags = run->emulation_failure.flags; + TEST_ASSERT(run->emulation_failure.ndata >= 3 && + flags & KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES, + "run->emulation_failure is missing instruction bytes"); + + TEST_ASSERT(run->emulation_failure.insn_size >= 2, + "Expected a 2-byte opcode for 'flds', got %d bytes", + run->emulation_failure.insn_size); + + insn_bytes = run->emulation_failure.insn_bytes; + TEST_ASSERT(insn_bytes[0] == 0xd9 && insn_bytes[1] == 0, + "Expected 'flds [eax]', opcode '0xd9 0x00', got opcode 0x%02x 0x%02x", + insn_bytes[0], insn_bytes[1]); + + vcpu_regs_get(vcpu, ®s); + regs.rip += 2; + vcpu_regs_set(vcpu, ®s); +} + +#endif /* !SELFTEST_KVM_FLDS_EMULATION_H */ diff --git a/tools/testing/selftests/kvm/x86/hwcr_msr_test.c b/tools/testing/selftests/kvm/x86/hwcr_msr_test.c new file mode 100644 index 000000000000..10b1b0ba374e --- /dev/null +++ b/tools/testing/selftests/kvm/x86/hwcr_msr_test.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2023, Google LLC. + */ +#include + +#include "test_util.h" +#include "kvm_util.h" +#include "vmx.h" + +void test_hwcr_bit(struct kvm_vcpu *vcpu, unsigned int bit) +{ + const uint64_t ignored = BIT_ULL(3) | BIT_ULL(6) | BIT_ULL(8); + const uint64_t valid = BIT_ULL(18) | BIT_ULL(24); + const uint64_t legal = ignored | valid; + uint64_t val = BIT_ULL(bit); + uint64_t actual; + int r; + + r = _vcpu_set_msr(vcpu, MSR_K7_HWCR, val); + TEST_ASSERT(val & ~legal ? !r : r == 1, + "Expected KVM_SET_MSRS(MSR_K7_HWCR) = 0x%lx to %s", + val, val & ~legal ? "fail" : "succeed"); + + actual = vcpu_get_msr(vcpu, MSR_K7_HWCR); + TEST_ASSERT(actual == (val & valid), + "Bit %u: unexpected HWCR 0x%lx; expected 0x%lx", + bit, actual, (val & valid)); + + vcpu_set_msr(vcpu, MSR_K7_HWCR, 0); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vm *vm; + struct kvm_vcpu *vcpu; + unsigned int bit; + + vm = vm_create_with_one_vcpu(&vcpu, NULL); + + for (bit = 0; bit < BITS_PER_LONG; bit++) + test_hwcr_bit(vcpu, bit); + + kvm_vm_free(vm); +} diff --git a/tools/testing/selftests/kvm/x86/hyperv_clock.c b/tools/testing/selftests/kvm/x86/hyperv_clock.c new file mode 100644 index 000000000000..e058bc676cd6 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/hyperv_clock.c @@ -0,0 +1,263 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2021, Red Hat, Inc. + * + * Tests for Hyper-V clocksources + */ +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "hyperv.h" + +struct ms_hyperv_tsc_page { + volatile u32 tsc_sequence; + u32 reserved1; + volatile u64 tsc_scale; + volatile s64 tsc_offset; +} __packed; + +/* Simplified mul_u64_u64_shr() */ +static inline u64 mul_u64_u64_shr64(u64 a, u64 b) +{ + union { + u64 ll; + struct { + u32 low, high; + } l; + } rm, rn, rh, a0, b0; + u64 c; + + a0.ll = a; + b0.ll = b; + + rm.ll = (u64)a0.l.low * b0.l.high; + rn.ll = (u64)a0.l.high * b0.l.low; + rh.ll = (u64)a0.l.high * b0.l.high; + + rh.l.low = c = rm.l.high + rn.l.high + rh.l.low; + rh.l.high = (c >> 32) + rh.l.high; + + return rh.ll; +} + +static inline void nop_loop(void) +{ + int i; + + for (i = 0; i < 100000000; i++) + asm volatile("nop"); +} + +static inline void check_tsc_msr_rdtsc(void) +{ + u64 tsc_freq, r1, r2, t1, t2; + s64 delta_ns; + + tsc_freq = rdmsr(HV_X64_MSR_TSC_FREQUENCY); + GUEST_ASSERT(tsc_freq > 0); + + /* For increased accuracy, take mean rdtsc() before and afrer rdmsr() */ + r1 = rdtsc(); + t1 = rdmsr(HV_X64_MSR_TIME_REF_COUNT); + r1 = (r1 + rdtsc()) / 2; + nop_loop(); + r2 = rdtsc(); + t2 = rdmsr(HV_X64_MSR_TIME_REF_COUNT); + r2 = (r2 + rdtsc()) / 2; + + GUEST_ASSERT(r2 > r1 && t2 > t1); + + /* HV_X64_MSR_TIME_REF_COUNT is in 100ns */ + delta_ns = ((t2 - t1) * 100) - ((r2 - r1) * 1000000000 / tsc_freq); + if (delta_ns < 0) + delta_ns = -delta_ns; + + /* 1% tolerance */ + GUEST_ASSERT(delta_ns * 100 < (t2 - t1) * 100); +} + +static inline u64 get_tscpage_ts(struct ms_hyperv_tsc_page *tsc_page) +{ + return mul_u64_u64_shr64(rdtsc(), tsc_page->tsc_scale) + tsc_page->tsc_offset; +} + +static inline void check_tsc_msr_tsc_page(struct ms_hyperv_tsc_page *tsc_page) +{ + u64 r1, r2, t1, t2; + + /* Compare TSC page clocksource with HV_X64_MSR_TIME_REF_COUNT */ + t1 = get_tscpage_ts(tsc_page); + r1 = rdmsr(HV_X64_MSR_TIME_REF_COUNT); + + /* 10 ms tolerance */ + GUEST_ASSERT(r1 >= t1 && r1 - t1 < 100000); + nop_loop(); + + t2 = get_tscpage_ts(tsc_page); + r2 = rdmsr(HV_X64_MSR_TIME_REF_COUNT); + GUEST_ASSERT(r2 >= t1 && r2 - t2 < 100000); +} + +static void guest_main(struct ms_hyperv_tsc_page *tsc_page, vm_paddr_t tsc_page_gpa) +{ + u64 tsc_scale, tsc_offset; + + /* Set Guest OS id to enable Hyper-V emulation */ + GUEST_SYNC(1); + wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID); + GUEST_SYNC(2); + + check_tsc_msr_rdtsc(); + + GUEST_SYNC(3); + + /* Set up TSC page is disabled state, check that it's clean */ + wrmsr(HV_X64_MSR_REFERENCE_TSC, tsc_page_gpa); + GUEST_ASSERT(tsc_page->tsc_sequence == 0); + GUEST_ASSERT(tsc_page->tsc_scale == 0); + GUEST_ASSERT(tsc_page->tsc_offset == 0); + + GUEST_SYNC(4); + + /* Set up TSC page is enabled state */ + wrmsr(HV_X64_MSR_REFERENCE_TSC, tsc_page_gpa | 0x1); + GUEST_ASSERT(tsc_page->tsc_sequence != 0); + + GUEST_SYNC(5); + + check_tsc_msr_tsc_page(tsc_page); + + GUEST_SYNC(6); + + tsc_offset = tsc_page->tsc_offset; + /* Call KVM_SET_CLOCK from userspace, check that TSC page was updated */ + + GUEST_SYNC(7); + /* Sanity check TSC page timestamp, it should be close to 0 */ + GUEST_ASSERT(get_tscpage_ts(tsc_page) < 100000); + + GUEST_ASSERT(tsc_page->tsc_offset != tsc_offset); + + nop_loop(); + + /* + * Enable Re-enlightenment and check that TSC page stays constant across + * KVM_SET_CLOCK. + */ + wrmsr(HV_X64_MSR_REENLIGHTENMENT_CONTROL, 0x1 << 16 | 0xff); + wrmsr(HV_X64_MSR_TSC_EMULATION_CONTROL, 0x1); + tsc_offset = tsc_page->tsc_offset; + tsc_scale = tsc_page->tsc_scale; + GUEST_SYNC(8); + GUEST_ASSERT(tsc_page->tsc_offset == tsc_offset); + GUEST_ASSERT(tsc_page->tsc_scale == tsc_scale); + + GUEST_SYNC(9); + + check_tsc_msr_tsc_page(tsc_page); + + /* + * Disable re-enlightenment and TSC page, check that KVM doesn't update + * it anymore. + */ + wrmsr(HV_X64_MSR_REENLIGHTENMENT_CONTROL, 0); + wrmsr(HV_X64_MSR_TSC_EMULATION_CONTROL, 0); + wrmsr(HV_X64_MSR_REFERENCE_TSC, 0); + memset(tsc_page, 0, sizeof(*tsc_page)); + + GUEST_SYNC(10); + GUEST_ASSERT(tsc_page->tsc_sequence == 0); + GUEST_ASSERT(tsc_page->tsc_offset == 0); + GUEST_ASSERT(tsc_page->tsc_scale == 0); + + GUEST_DONE(); +} + +static void host_check_tsc_msr_rdtsc(struct kvm_vcpu *vcpu) +{ + u64 tsc_freq, r1, r2, t1, t2; + s64 delta_ns; + + tsc_freq = vcpu_get_msr(vcpu, HV_X64_MSR_TSC_FREQUENCY); + TEST_ASSERT(tsc_freq > 0, "TSC frequency must be nonzero"); + + /* For increased accuracy, take mean rdtsc() before and afrer ioctl */ + r1 = rdtsc(); + t1 = vcpu_get_msr(vcpu, HV_X64_MSR_TIME_REF_COUNT); + r1 = (r1 + rdtsc()) / 2; + nop_loop(); + r2 = rdtsc(); + t2 = vcpu_get_msr(vcpu, HV_X64_MSR_TIME_REF_COUNT); + r2 = (r2 + rdtsc()) / 2; + + TEST_ASSERT(t2 > t1, "Time reference MSR is not monotonic (%ld <= %ld)", t1, t2); + + /* HV_X64_MSR_TIME_REF_COUNT is in 100ns */ + delta_ns = ((t2 - t1) * 100) - ((r2 - r1) * 1000000000 / tsc_freq); + if (delta_ns < 0) + delta_ns = -delta_ns; + + /* 1% tolerance */ + TEST_ASSERT(delta_ns * 100 < (t2 - t1) * 100, + "Elapsed time does not match (MSR=%ld, TSC=%ld)", + (t2 - t1) * 100, (r2 - r1) * 1000000000 / tsc_freq); +} + +int main(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + struct ucall uc; + vm_vaddr_t tsc_page_gva; + int stage; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_TIME)); + TEST_REQUIRE(sys_clocksource_is_based_on_tsc()); + + vm = vm_create_with_one_vcpu(&vcpu, guest_main); + + vcpu_set_hv_cpuid(vcpu); + + tsc_page_gva = vm_vaddr_alloc_page(vm); + memset(addr_gva2hva(vm, tsc_page_gva), 0x0, getpagesize()); + TEST_ASSERT((addr_gva2gpa(vm, tsc_page_gva) & (getpagesize() - 1)) == 0, + "TSC page has to be page aligned"); + vcpu_args_set(vcpu, 2, tsc_page_gva, addr_gva2gpa(vm, tsc_page_gva)); + + host_check_tsc_msr_rdtsc(vcpu); + + for (stage = 1;; stage++) { + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + /* NOT REACHED */ + case UCALL_SYNC: + break; + case UCALL_DONE: + /* Keep in sync with guest_main() */ + TEST_ASSERT(stage == 11, "Testing ended prematurely, stage %d", + stage); + goto out; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + + TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") && + uc.args[1] == stage, + "Stage %d: Unexpected register values vmexit, got %lx", + stage, (ulong)uc.args[1]); + + /* Reset kvmclock triggering TSC page update */ + if (stage == 7 || stage == 8 || stage == 10) { + struct kvm_clock_data clock = {0}; + + vm_ioctl(vm, KVM_SET_CLOCK, &clock); + } + } + +out: + kvm_vm_free(vm); +} diff --git a/tools/testing/selftests/kvm/x86/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86/hyperv_cpuid.c new file mode 100644 index 000000000000..4f5881d4ef66 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/hyperv_cpuid.c @@ -0,0 +1,172 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test for x86 KVM_CAP_HYPERV_CPUID + * + * Copyright (C) 2018, Red Hat, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2. + * + */ +#include +#include +#include +#include +#include + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "vmx.h" + +static void guest_code(void) +{ +} + +static bool smt_possible(void) +{ + char buf[16]; + FILE *f; + bool res = true; + + f = fopen("/sys/devices/system/cpu/smt/control", "r"); + if (f) { + if (fread(buf, sizeof(*buf), sizeof(buf), f) > 0) { + if (!strncmp(buf, "forceoff", 8) || + !strncmp(buf, "notsupported", 12)) + res = false; + } + fclose(f); + } + + return res; +} + +static void test_hv_cpuid(const struct kvm_cpuid2 *hv_cpuid_entries, + bool evmcs_expected) +{ + int i; + int nent_expected = 10; + u32 test_val; + + TEST_ASSERT(hv_cpuid_entries->nent == nent_expected, + "KVM_GET_SUPPORTED_HV_CPUID should return %d entries" + " (returned %d)", + nent_expected, hv_cpuid_entries->nent); + + for (i = 0; i < hv_cpuid_entries->nent; i++) { + const struct kvm_cpuid_entry2 *entry = &hv_cpuid_entries->entries[i]; + + TEST_ASSERT((entry->function >= 0x40000000) && + (entry->function <= 0x40000082), + "function %x is our of supported range", + entry->function); + + TEST_ASSERT(entry->index == 0, + ".index field should be zero"); + + TEST_ASSERT(entry->flags == 0, + ".flags field should be zero"); + + TEST_ASSERT(!entry->padding[0] && !entry->padding[1] && + !entry->padding[2], "padding should be zero"); + + switch (entry->function) { + case 0x40000000: + test_val = 0x40000082; + + TEST_ASSERT(entry->eax == test_val, + "Wrong max leaf report in 0x40000000.EAX: %x" + " (evmcs=%d)", + entry->eax, evmcs_expected + ); + break; + case 0x40000004: + test_val = entry->eax & (1UL << 18); + + TEST_ASSERT(!!test_val == !smt_possible(), + "NoNonArchitecturalCoreSharing bit" + " doesn't reflect SMT setting"); + break; + case 0x4000000A: + TEST_ASSERT(entry->eax & (1UL << 19), + "Enlightened MSR-Bitmap should always be supported" + " 0x40000000.EAX: %x", entry->eax); + if (evmcs_expected) + TEST_ASSERT((entry->eax & 0xffff) == 0x101, + "Supported Enlightened VMCS version range is supposed to be 1:1" + " 0x40000000.EAX: %x", entry->eax); + + break; + default: + break; + + } + /* + * If needed for debug: + * fprintf(stdout, + * "CPUID%lx EAX=0x%lx EBX=0x%lx ECX=0x%lx EDX=0x%lx\n", + * entry->function, entry->eax, entry->ebx, entry->ecx, + * entry->edx); + */ + } +} + +void test_hv_cpuid_e2big(struct kvm_vm *vm, struct kvm_vcpu *vcpu) +{ + static struct kvm_cpuid2 cpuid = {.nent = 0}; + int ret; + + if (vcpu) + ret = __vcpu_ioctl(vcpu, KVM_GET_SUPPORTED_HV_CPUID, &cpuid); + else + ret = __kvm_ioctl(vm->kvm_fd, KVM_GET_SUPPORTED_HV_CPUID, &cpuid); + + TEST_ASSERT(ret == -1 && errno == E2BIG, + "%s KVM_GET_SUPPORTED_HV_CPUID didn't fail with -E2BIG when" + " it should have: %d %d", !vcpu ? "KVM" : "vCPU", ret, errno); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vm *vm; + const struct kvm_cpuid2 *hv_cpuid_entries; + struct kvm_vcpu *vcpu; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_CPUID)); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + /* Test vCPU ioctl version */ + test_hv_cpuid_e2big(vm, vcpu); + + hv_cpuid_entries = vcpu_get_supported_hv_cpuid(vcpu); + test_hv_cpuid(hv_cpuid_entries, false); + free((void *)hv_cpuid_entries); + + if (!kvm_cpu_has(X86_FEATURE_VMX) || + !kvm_has_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) { + print_skip("Enlightened VMCS is unsupported"); + goto do_sys; + } + vcpu_enable_evmcs(vcpu); + hv_cpuid_entries = vcpu_get_supported_hv_cpuid(vcpu); + test_hv_cpuid(hv_cpuid_entries, true); + free((void *)hv_cpuid_entries); + +do_sys: + /* Test system ioctl version */ + if (!kvm_has_cap(KVM_CAP_SYS_HYPERV_CPUID)) { + print_skip("KVM_CAP_SYS_HYPERV_CPUID not supported"); + goto out; + } + + test_hv_cpuid_e2big(vm, NULL); + + hv_cpuid_entries = kvm_get_supported_hv_cpuid(); + test_hv_cpuid(hv_cpuid_entries, kvm_cpu_has(X86_FEATURE_VMX)); + +out: + kvm_vm_free(vm); + + return 0; +} diff --git a/tools/testing/selftests/kvm/x86/hyperv_evmcs.c b/tools/testing/selftests/kvm/x86/hyperv_evmcs.c new file mode 100644 index 000000000000..74cf19661309 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/hyperv_evmcs.c @@ -0,0 +1,307 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2018, Red Hat, Inc. + * + * Tests for Enlightened VMCS, including nested guest state. + */ +#include +#include +#include +#include +#include +#include + +#include "test_util.h" + +#include "kvm_util.h" + +#include "hyperv.h" +#include "vmx.h" + +static int ud_count; + +static void guest_ud_handler(struct ex_regs *regs) +{ + ud_count++; + regs->rip += 3; /* VMLAUNCH */ +} + +static void guest_nmi_handler(struct ex_regs *regs) +{ +} + +static inline void rdmsr_from_l2(uint32_t msr) +{ + /* Currently, L1 doesn't preserve GPRs during vmexits. */ + __asm__ __volatile__ ("rdmsr" : : "c"(msr) : + "rax", "rbx", "rdx", "rsi", "rdi", "r8", "r9", + "r10", "r11", "r12", "r13", "r14", "r15"); +} + +/* Exit to L1 from L2 with RDMSR instruction */ +void l2_guest_code(void) +{ + u64 unused; + + GUEST_SYNC(7); + + GUEST_SYNC(8); + + /* Forced exit to L1 upon restore */ + GUEST_SYNC(9); + + vmcall(); + + /* MSR-Bitmap tests */ + rdmsr_from_l2(MSR_FS_BASE); /* intercepted */ + rdmsr_from_l2(MSR_FS_BASE); /* intercepted */ + rdmsr_from_l2(MSR_GS_BASE); /* not intercepted */ + vmcall(); + rdmsr_from_l2(MSR_GS_BASE); /* intercepted */ + + /* L2 TLB flush tests */ + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT, 0x0, + HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | HV_FLUSH_ALL_PROCESSORS); + rdmsr_from_l2(MSR_FS_BASE); + /* + * Note: hypercall status (RAX) is not preserved correctly by L1 after + * synthetic vmexit, use unchecked version. + */ + __hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT, 0x0, + HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | HV_FLUSH_ALL_PROCESSORS, + &unused); + + /* Done, exit to L1 and never come back. */ + vmcall(); +} + +void guest_code(struct vmx_pages *vmx_pages, struct hyperv_test_pages *hv_pages, + vm_vaddr_t hv_hcall_page_gpa) +{ +#define L2_GUEST_STACK_SIZE 64 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + + wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID); + wrmsr(HV_X64_MSR_HYPERCALL, hv_hcall_page_gpa); + + x2apic_enable(); + + GUEST_SYNC(1); + GUEST_SYNC(2); + + enable_vp_assist(hv_pages->vp_assist_gpa, hv_pages->vp_assist); + evmcs_enable(); + + GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); + GUEST_SYNC(3); + GUEST_ASSERT(load_evmcs(hv_pages)); + GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa); + + GUEST_SYNC(4); + GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa); + + prepare_vmcs(vmx_pages, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + GUEST_SYNC(5); + GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa); + current_evmcs->revision_id = -1u; + GUEST_ASSERT(vmlaunch()); + current_evmcs->revision_id = EVMCS_VERSION; + GUEST_SYNC(6); + + vmwrite(PIN_BASED_VM_EXEC_CONTROL, vmreadz(PIN_BASED_VM_EXEC_CONTROL) | + PIN_BASED_NMI_EXITING); + + /* L2 TLB flush setup */ + current_evmcs->partition_assist_page = hv_pages->partition_assist_gpa; + current_evmcs->hv_enlightenments_control.nested_flush_hypercall = 1; + current_evmcs->hv_vm_id = 1; + current_evmcs->hv_vp_id = 1; + current_vp_assist->nested_control.features.directhypercall = 1; + *(u32 *)(hv_pages->partition_assist) = 0; + + GUEST_ASSERT(!vmlaunch()); + GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_EXCEPTION_NMI); + GUEST_ASSERT_EQ((vmreadz(VM_EXIT_INTR_INFO) & 0xff), NMI_VECTOR); + GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa); + + /* + * NMI forces L2->L1 exit, resuming L2 and hope that EVMCS is + * up-to-date (RIP points where it should and not at the beginning + * of l2_guest_code(). GUEST_SYNC(9) checkes that. + */ + GUEST_ASSERT(!vmresume()); + + GUEST_SYNC(10); + + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + current_evmcs->guest_rip += 3; /* vmcall */ + + /* Intercept RDMSR 0xc0000100 */ + vmwrite(CPU_BASED_VM_EXEC_CONTROL, vmreadz(CPU_BASED_VM_EXEC_CONTROL) | + CPU_BASED_USE_MSR_BITMAPS); + __set_bit(MSR_FS_BASE & 0x1fff, vmx_pages->msr + 0x400); + GUEST_ASSERT(!vmresume()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ); + current_evmcs->guest_rip += 2; /* rdmsr */ + + /* Enable enlightened MSR bitmap */ + current_evmcs->hv_enlightenments_control.msr_bitmap = 1; + GUEST_ASSERT(!vmresume()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ); + current_evmcs->guest_rip += 2; /* rdmsr */ + + /* Intercept RDMSR 0xc0000101 without telling KVM about it */ + __set_bit(MSR_GS_BASE & 0x1fff, vmx_pages->msr + 0x400); + /* Make sure HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP is set */ + current_evmcs->hv_clean_fields |= HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP; + GUEST_ASSERT(!vmresume()); + /* Make sure we don't see EXIT_REASON_MSR_READ here so eMSR bitmap works */ + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + current_evmcs->guest_rip += 3; /* vmcall */ + + /* Now tell KVM we've changed MSR-Bitmap */ + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP; + GUEST_ASSERT(!vmresume()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ); + current_evmcs->guest_rip += 2; /* rdmsr */ + + /* + * L2 TLB flush test. First VMCALL should be handled directly by L0, + * no VMCALL exit expected. + */ + GUEST_ASSERT(!vmresume()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ); + current_evmcs->guest_rip += 2; /* rdmsr */ + /* Enable synthetic vmexit */ + *(u32 *)(hv_pages->partition_assist) = 1; + GUEST_ASSERT(!vmresume()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == HV_VMX_SYNTHETIC_EXIT_REASON_TRAP_AFTER_FLUSH); + + GUEST_ASSERT(!vmresume()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + GUEST_SYNC(11); + + /* Try enlightened vmptrld with an incorrect GPA */ + evmcs_vmptrld(0xdeadbeef, hv_pages->enlightened_vmcs); + GUEST_ASSERT(vmlaunch()); + GUEST_ASSERT(ud_count == 1); + GUEST_DONE(); +} + +void inject_nmi(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_events events; + + vcpu_events_get(vcpu, &events); + + events.nmi.pending = 1; + events.flags |= KVM_VCPUEVENT_VALID_NMI_PENDING; + + vcpu_events_set(vcpu, &events); +} + +static struct kvm_vcpu *save_restore_vm(struct kvm_vm *vm, + struct kvm_vcpu *vcpu) +{ + struct kvm_regs regs1, regs2; + struct kvm_x86_state *state; + + state = vcpu_save_state(vcpu); + memset(®s1, 0, sizeof(regs1)); + vcpu_regs_get(vcpu, ®s1); + + kvm_vm_release(vm); + + /* Restore state in a new VM. */ + vcpu = vm_recreate_with_one_vcpu(vm); + vcpu_set_hv_cpuid(vcpu); + vcpu_enable_evmcs(vcpu); + vcpu_load_state(vcpu, state); + kvm_x86_state_cleanup(state); + + memset(®s2, 0, sizeof(regs2)); + vcpu_regs_get(vcpu, ®s2); + TEST_ASSERT(!memcmp(®s1, ®s2, sizeof(regs2)), + "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx", + (ulong) regs2.rdi, (ulong) regs2.rsi); + return vcpu; +} + +int main(int argc, char *argv[]) +{ + vm_vaddr_t vmx_pages_gva = 0, hv_pages_gva = 0; + vm_vaddr_t hcall_page; + + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + struct ucall uc; + int stage; + + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); + TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE)); + TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)); + TEST_REQUIRE(kvm_hv_cpu_has(HV_X64_NESTED_DIRECT_FLUSH)); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + hcall_page = vm_vaddr_alloc_pages(vm, 1); + memset(addr_gva2hva(vm, hcall_page), 0x0, getpagesize()); + + vcpu_set_hv_cpuid(vcpu); + vcpu_enable_evmcs(vcpu); + + vcpu_alloc_vmx(vm, &vmx_pages_gva); + vcpu_alloc_hyperv_test_pages(vm, &hv_pages_gva); + vcpu_args_set(vcpu, 3, vmx_pages_gva, hv_pages_gva, addr_gva2gpa(vm, hcall_page)); + vcpu_set_msr(vcpu, HV_X64_MSR_VP_INDEX, vcpu->id); + + vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler); + vm_install_exception_handler(vm, NMI_VECTOR, guest_nmi_handler); + + pr_info("Running L1 which uses EVMCS to run L2\n"); + + for (stage = 1;; stage++) { + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + /* NOT REACHED */ + case UCALL_SYNC: + break; + case UCALL_DONE: + goto done; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + + /* UCALL_SYNC is handled here. */ + TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") && + uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx", + stage, (ulong)uc.args[1]); + + vcpu = save_restore_vm(vm, vcpu); + + /* Force immediate L2->L1 exit before resuming */ + if (stage == 8) { + pr_info("Injecting NMI into L1 before L2 had a chance to run after restore\n"); + inject_nmi(vcpu); + } + + /* + * Do KVM_GET_NESTED_STATE/KVM_SET_NESTED_STATE for a freshly + * restored VM (before the first KVM_RUN) to check that + * KVM_STATE_NESTED_EVMCS is not lost. + */ + if (stage == 9) { + pr_info("Trying extra KVM_GET_NESTED_STATE/KVM_SET_NESTED_STATE cycle\n"); + vcpu = save_restore_vm(vm, vcpu); + } + } + +done: + kvm_vm_free(vm); +} diff --git a/tools/testing/selftests/kvm/x86/hyperv_extended_hypercalls.c b/tools/testing/selftests/kvm/x86/hyperv_extended_hypercalls.c new file mode 100644 index 000000000000..949e08e98f31 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/hyperv_extended_hypercalls.c @@ -0,0 +1,98 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Test Hyper-V extended hypercall, HV_EXT_CALL_QUERY_CAPABILITIES (0x8001), + * exit to userspace and receive result in guest. + * + * Negative tests are present in hyperv_features.c + * + * Copyright 2022 Google LLC + * Author: Vipin Sharma + */ +#include "kvm_util.h" +#include "processor.h" +#include "hyperv.h" + +/* Any value is fine */ +#define EXT_CAPABILITIES 0xbull + +static void guest_code(vm_paddr_t in_pg_gpa, vm_paddr_t out_pg_gpa, + vm_vaddr_t out_pg_gva) +{ + uint64_t *output_gva; + + wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID); + wrmsr(HV_X64_MSR_HYPERCALL, in_pg_gpa); + + output_gva = (uint64_t *)out_pg_gva; + + hyperv_hypercall(HV_EXT_CALL_QUERY_CAPABILITIES, in_pg_gpa, out_pg_gpa); + + /* TLFS states output will be a uint64_t value */ + GUEST_ASSERT_EQ(*output_gva, EXT_CAPABILITIES); + + GUEST_DONE(); +} + +int main(void) +{ + vm_vaddr_t hcall_out_page; + vm_vaddr_t hcall_in_page; + struct kvm_vcpu *vcpu; + struct kvm_run *run; + struct kvm_vm *vm; + uint64_t *outval; + struct ucall uc; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_CPUID)); + + /* Verify if extended hypercalls are supported */ + if (!kvm_cpuid_has(kvm_get_supported_hv_cpuid(), + HV_ENABLE_EXTENDED_HYPERCALLS)) { + print_skip("Extended calls not supported by the kernel"); + exit(KSFT_SKIP); + } + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + run = vcpu->run; + vcpu_set_hv_cpuid(vcpu); + + /* Hypercall input */ + hcall_in_page = vm_vaddr_alloc_pages(vm, 1); + memset(addr_gva2hva(vm, hcall_in_page), 0x0, vm->page_size); + + /* Hypercall output */ + hcall_out_page = vm_vaddr_alloc_pages(vm, 1); + memset(addr_gva2hva(vm, hcall_out_page), 0x0, vm->page_size); + + vcpu_args_set(vcpu, 3, addr_gva2gpa(vm, hcall_in_page), + addr_gva2gpa(vm, hcall_out_page), hcall_out_page); + + vcpu_run(vcpu); + + TEST_ASSERT(run->exit_reason == KVM_EXIT_HYPERV, + "Unexpected exit reason: %u (%s)", + run->exit_reason, exit_reason_str(run->exit_reason)); + + outval = addr_gpa2hva(vm, run->hyperv.u.hcall.params[1]); + *outval = EXT_CAPABILITIES; + run->hyperv.u.hcall.result = HV_STATUS_SUCCESS; + + vcpu_run(vcpu); + + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Unexpected exit reason: %u (%s)", + run->exit_reason, exit_reason_str(run->exit_reason)); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + case UCALL_DONE: + break; + default: + TEST_FAIL("Unhandled ucall: %ld", uc.cmd); + } + + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/x86/hyperv_features.c b/tools/testing/selftests/kvm/x86/hyperv_features.c new file mode 100644 index 000000000000..068e9c69710d --- /dev/null +++ b/tools/testing/selftests/kvm/x86/hyperv_features.c @@ -0,0 +1,695 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2021, Red Hat, Inc. + * + * Tests for Hyper-V features enablement + */ +#include +#include +#include + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "hyperv.h" + +/* + * HYPERV_CPUID_ENLIGHTMENT_INFO.EBX is not a 'feature' CPUID leaf + * but to activate the feature it is sufficient to set it to a non-zero + * value. Use BIT(0) for that. + */ +#define HV_PV_SPINLOCKS_TEST \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EBX, 0) + +struct msr_data { + uint32_t idx; + bool fault_expected; + bool write; + u64 write_val; +}; + +struct hcall_data { + uint64_t control; + uint64_t expect; + bool ud_expected; +}; + +static bool is_write_only_msr(uint32_t msr) +{ + return msr == HV_X64_MSR_EOI; +} + +static void guest_msr(struct msr_data *msr) +{ + uint8_t vector = 0; + uint64_t msr_val = 0; + + GUEST_ASSERT(msr->idx); + + if (msr->write) + vector = wrmsr_safe(msr->idx, msr->write_val); + + if (!vector && (!msr->write || !is_write_only_msr(msr->idx))) + vector = rdmsr_safe(msr->idx, &msr_val); + + if (msr->fault_expected) + __GUEST_ASSERT(vector == GP_VECTOR, + "Expected #GP on %sMSR(0x%x), got vector '0x%x'", + msr->write ? "WR" : "RD", msr->idx, vector); + else + __GUEST_ASSERT(!vector, + "Expected success on %sMSR(0x%x), got vector '0x%x'", + msr->write ? "WR" : "RD", msr->idx, vector); + + if (vector || is_write_only_msr(msr->idx)) + goto done; + + if (msr->write) + __GUEST_ASSERT(!vector, + "WRMSR(0x%x) to '0x%lx', RDMSR read '0x%lx'", + msr->idx, msr->write_val, msr_val); + + /* Invariant TSC bit appears when TSC invariant control MSR is written to */ + if (msr->idx == HV_X64_MSR_TSC_INVARIANT_CONTROL) { + if (!this_cpu_has(HV_ACCESS_TSC_INVARIANT)) + GUEST_ASSERT(this_cpu_has(X86_FEATURE_INVTSC)); + else + GUEST_ASSERT(this_cpu_has(X86_FEATURE_INVTSC) == + !!(msr_val & HV_INVARIANT_TSC_EXPOSED)); + } + +done: + GUEST_DONE(); +} + +static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall) +{ + u64 res, input, output; + uint8_t vector; + + GUEST_ASSERT_NE(hcall->control, 0); + + wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID); + wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa); + + if (!(hcall->control & HV_HYPERCALL_FAST_BIT)) { + input = pgs_gpa; + output = pgs_gpa + 4096; + } else { + input = output = 0; + } + + vector = __hyperv_hypercall(hcall->control, input, output, &res); + if (hcall->ud_expected) { + __GUEST_ASSERT(vector == UD_VECTOR, + "Expected #UD for control '%lu', got vector '0x%x'", + hcall->control, vector); + } else { + __GUEST_ASSERT(!vector, + "Expected no exception for control '%lu', got vector '0x%x'", + hcall->control, vector); + GUEST_ASSERT_EQ(res, hcall->expect); + } + + GUEST_DONE(); +} + +static void vcpu_reset_hv_cpuid(struct kvm_vcpu *vcpu) +{ + /* + * Enable all supported Hyper-V features, then clear the leafs holding + * the features that will be tested one by one. + */ + vcpu_set_hv_cpuid(vcpu); + + vcpu_clear_cpuid_entry(vcpu, HYPERV_CPUID_FEATURES); + vcpu_clear_cpuid_entry(vcpu, HYPERV_CPUID_ENLIGHTMENT_INFO); + vcpu_clear_cpuid_entry(vcpu, HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES); +} + +static void guest_test_msrs_access(void) +{ + struct kvm_cpuid2 *prev_cpuid = NULL; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + struct ucall uc; + int stage = 0; + vm_vaddr_t msr_gva; + struct msr_data *msr; + bool has_invtsc = kvm_cpu_has(X86_FEATURE_INVTSC); + + while (true) { + vm = vm_create_with_one_vcpu(&vcpu, guest_msr); + + msr_gva = vm_vaddr_alloc_page(vm); + memset(addr_gva2hva(vm, msr_gva), 0x0, getpagesize()); + msr = addr_gva2hva(vm, msr_gva); + + vcpu_args_set(vcpu, 1, msr_gva); + vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_ENFORCE_CPUID, 1); + + if (!prev_cpuid) { + vcpu_reset_hv_cpuid(vcpu); + + prev_cpuid = allocate_kvm_cpuid2(vcpu->cpuid->nent); + } else { + vcpu_init_cpuid(vcpu, prev_cpuid); + } + + /* TODO: Make this entire test easier to maintain. */ + if (stage >= 21) + vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_SYNIC2, 0); + + switch (stage) { + case 0: + /* + * Only available when Hyper-V identification is set + */ + msr->idx = HV_X64_MSR_GUEST_OS_ID; + msr->write = false; + msr->fault_expected = true; + break; + case 1: + msr->idx = HV_X64_MSR_HYPERCALL; + msr->write = false; + msr->fault_expected = true; + break; + case 2: + vcpu_set_cpuid_feature(vcpu, HV_MSR_HYPERCALL_AVAILABLE); + /* + * HV_X64_MSR_GUEST_OS_ID has to be written first to make + * HV_X64_MSR_HYPERCALL available. + */ + msr->idx = HV_X64_MSR_GUEST_OS_ID; + msr->write = true; + msr->write_val = HYPERV_LINUX_OS_ID; + msr->fault_expected = false; + break; + case 3: + msr->idx = HV_X64_MSR_GUEST_OS_ID; + msr->write = false; + msr->fault_expected = false; + break; + case 4: + msr->idx = HV_X64_MSR_HYPERCALL; + msr->write = false; + msr->fault_expected = false; + break; + + case 5: + msr->idx = HV_X64_MSR_VP_RUNTIME; + msr->write = false; + msr->fault_expected = true; + break; + case 6: + vcpu_set_cpuid_feature(vcpu, HV_MSR_VP_RUNTIME_AVAILABLE); + msr->idx = HV_X64_MSR_VP_RUNTIME; + msr->write = false; + msr->fault_expected = false; + break; + case 7: + /* Read only */ + msr->idx = HV_X64_MSR_VP_RUNTIME; + msr->write = true; + msr->write_val = 1; + msr->fault_expected = true; + break; + + case 8: + msr->idx = HV_X64_MSR_TIME_REF_COUNT; + msr->write = false; + msr->fault_expected = true; + break; + case 9: + vcpu_set_cpuid_feature(vcpu, HV_MSR_TIME_REF_COUNT_AVAILABLE); + msr->idx = HV_X64_MSR_TIME_REF_COUNT; + msr->write = false; + msr->fault_expected = false; + break; + case 10: + /* Read only */ + msr->idx = HV_X64_MSR_TIME_REF_COUNT; + msr->write = true; + msr->write_val = 1; + msr->fault_expected = true; + break; + + case 11: + msr->idx = HV_X64_MSR_VP_INDEX; + msr->write = false; + msr->fault_expected = true; + break; + case 12: + vcpu_set_cpuid_feature(vcpu, HV_MSR_VP_INDEX_AVAILABLE); + msr->idx = HV_X64_MSR_VP_INDEX; + msr->write = false; + msr->fault_expected = false; + break; + case 13: + /* Read only */ + msr->idx = HV_X64_MSR_VP_INDEX; + msr->write = true; + msr->write_val = 1; + msr->fault_expected = true; + break; + + case 14: + msr->idx = HV_X64_MSR_RESET; + msr->write = false; + msr->fault_expected = true; + break; + case 15: + vcpu_set_cpuid_feature(vcpu, HV_MSR_RESET_AVAILABLE); + msr->idx = HV_X64_MSR_RESET; + msr->write = false; + msr->fault_expected = false; + break; + case 16: + msr->idx = HV_X64_MSR_RESET; + msr->write = true; + /* + * TODO: the test only writes '0' to HV_X64_MSR_RESET + * at the moment, writing some other value there will + * trigger real vCPU reset and the code is not prepared + * to handle it yet. + */ + msr->write_val = 0; + msr->fault_expected = false; + break; + + case 17: + msr->idx = HV_X64_MSR_REFERENCE_TSC; + msr->write = false; + msr->fault_expected = true; + break; + case 18: + vcpu_set_cpuid_feature(vcpu, HV_MSR_REFERENCE_TSC_AVAILABLE); + msr->idx = HV_X64_MSR_REFERENCE_TSC; + msr->write = false; + msr->fault_expected = false; + break; + case 19: + msr->idx = HV_X64_MSR_REFERENCE_TSC; + msr->write = true; + msr->write_val = 0; + msr->fault_expected = false; + break; + + case 20: + msr->idx = HV_X64_MSR_EOM; + msr->write = false; + msr->fault_expected = true; + break; + case 21: + /* + * Remains unavailable even with KVM_CAP_HYPERV_SYNIC2 + * capability enabled and guest visible CPUID bit unset. + */ + msr->idx = HV_X64_MSR_EOM; + msr->write = false; + msr->fault_expected = true; + break; + case 22: + vcpu_set_cpuid_feature(vcpu, HV_MSR_SYNIC_AVAILABLE); + msr->idx = HV_X64_MSR_EOM; + msr->write = false; + msr->fault_expected = false; + break; + case 23: + msr->idx = HV_X64_MSR_EOM; + msr->write = true; + msr->write_val = 0; + msr->fault_expected = false; + break; + + case 24: + msr->idx = HV_X64_MSR_STIMER0_CONFIG; + msr->write = false; + msr->fault_expected = true; + break; + case 25: + vcpu_set_cpuid_feature(vcpu, HV_MSR_SYNTIMER_AVAILABLE); + msr->idx = HV_X64_MSR_STIMER0_CONFIG; + msr->write = false; + msr->fault_expected = false; + break; + case 26: + msr->idx = HV_X64_MSR_STIMER0_CONFIG; + msr->write = true; + msr->write_val = 0; + msr->fault_expected = false; + break; + case 27: + /* Direct mode test */ + msr->idx = HV_X64_MSR_STIMER0_CONFIG; + msr->write = true; + msr->write_val = 1 << 12; + msr->fault_expected = true; + break; + case 28: + vcpu_set_cpuid_feature(vcpu, HV_STIMER_DIRECT_MODE_AVAILABLE); + msr->idx = HV_X64_MSR_STIMER0_CONFIG; + msr->write = true; + msr->write_val = 1 << 12; + msr->fault_expected = false; + break; + + case 29: + msr->idx = HV_X64_MSR_EOI; + msr->write = false; + msr->fault_expected = true; + break; + case 30: + vcpu_set_cpuid_feature(vcpu, HV_MSR_APIC_ACCESS_AVAILABLE); + msr->idx = HV_X64_MSR_EOI; + msr->write = true; + msr->write_val = 1; + msr->fault_expected = false; + break; + + case 31: + msr->idx = HV_X64_MSR_TSC_FREQUENCY; + msr->write = false; + msr->fault_expected = true; + break; + case 32: + vcpu_set_cpuid_feature(vcpu, HV_ACCESS_FREQUENCY_MSRS); + msr->idx = HV_X64_MSR_TSC_FREQUENCY; + msr->write = false; + msr->fault_expected = false; + break; + case 33: + /* Read only */ + msr->idx = HV_X64_MSR_TSC_FREQUENCY; + msr->write = true; + msr->write_val = 1; + msr->fault_expected = true; + break; + + case 34: + msr->idx = HV_X64_MSR_REENLIGHTENMENT_CONTROL; + msr->write = false; + msr->fault_expected = true; + break; + case 35: + vcpu_set_cpuid_feature(vcpu, HV_ACCESS_REENLIGHTENMENT); + msr->idx = HV_X64_MSR_REENLIGHTENMENT_CONTROL; + msr->write = false; + msr->fault_expected = false; + break; + case 36: + msr->idx = HV_X64_MSR_REENLIGHTENMENT_CONTROL; + msr->write = true; + msr->write_val = 1; + msr->fault_expected = false; + break; + case 37: + /* Can only write '0' */ + msr->idx = HV_X64_MSR_TSC_EMULATION_STATUS; + msr->write = true; + msr->write_val = 1; + msr->fault_expected = true; + break; + + case 38: + msr->idx = HV_X64_MSR_CRASH_P0; + msr->write = false; + msr->fault_expected = true; + break; + case 39: + vcpu_set_cpuid_feature(vcpu, HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE); + msr->idx = HV_X64_MSR_CRASH_P0; + msr->write = false; + msr->fault_expected = false; + break; + case 40: + msr->idx = HV_X64_MSR_CRASH_P0; + msr->write = true; + msr->write_val = 1; + msr->fault_expected = false; + break; + + case 41: + msr->idx = HV_X64_MSR_SYNDBG_STATUS; + msr->write = false; + msr->fault_expected = true; + break; + case 42: + vcpu_set_cpuid_feature(vcpu, HV_FEATURE_DEBUG_MSRS_AVAILABLE); + vcpu_set_cpuid_feature(vcpu, HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING); + msr->idx = HV_X64_MSR_SYNDBG_STATUS; + msr->write = false; + msr->fault_expected = false; + break; + case 43: + msr->idx = HV_X64_MSR_SYNDBG_STATUS; + msr->write = true; + msr->write_val = 0; + msr->fault_expected = false; + break; + + case 44: + /* MSR is not available when CPUID feature bit is unset */ + if (!has_invtsc) + goto next_stage; + msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL; + msr->write = false; + msr->fault_expected = true; + break; + case 45: + /* MSR is vailable when CPUID feature bit is set */ + if (!has_invtsc) + goto next_stage; + vcpu_set_cpuid_feature(vcpu, HV_ACCESS_TSC_INVARIANT); + msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL; + msr->write = false; + msr->fault_expected = false; + break; + case 46: + /* Writing bits other than 0 is forbidden */ + if (!has_invtsc) + goto next_stage; + msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL; + msr->write = true; + msr->write_val = 0xdeadbeef; + msr->fault_expected = true; + break; + case 47: + /* Setting bit 0 enables the feature */ + if (!has_invtsc) + goto next_stage; + msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL; + msr->write = true; + msr->write_val = 1; + msr->fault_expected = false; + break; + + default: + kvm_vm_free(vm); + return; + } + + vcpu_set_cpuid(vcpu); + + memcpy(prev_cpuid, vcpu->cpuid, kvm_cpuid2_size(vcpu->cpuid->nent)); + + pr_debug("Stage %d: testing msr: 0x%x for %s\n", stage, + msr->idx, msr->write ? "write" : "read"); + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + return; + case UCALL_DONE: + break; + default: + TEST_FAIL("Unhandled ucall: %ld", uc.cmd); + return; + } + +next_stage: + stage++; + kvm_vm_free(vm); + } +} + +static void guest_test_hcalls_access(void) +{ + struct kvm_cpuid2 *prev_cpuid = NULL; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + struct ucall uc; + int stage = 0; + vm_vaddr_t hcall_page, hcall_params; + struct hcall_data *hcall; + + while (true) { + vm = vm_create_with_one_vcpu(&vcpu, guest_hcall); + + /* Hypercall input/output */ + hcall_page = vm_vaddr_alloc_pages(vm, 2); + memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize()); + + hcall_params = vm_vaddr_alloc_page(vm); + memset(addr_gva2hva(vm, hcall_params), 0x0, getpagesize()); + hcall = addr_gva2hva(vm, hcall_params); + + vcpu_args_set(vcpu, 2, addr_gva2gpa(vm, hcall_page), hcall_params); + vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_ENFORCE_CPUID, 1); + + if (!prev_cpuid) { + vcpu_reset_hv_cpuid(vcpu); + + prev_cpuid = allocate_kvm_cpuid2(vcpu->cpuid->nent); + } else { + vcpu_init_cpuid(vcpu, prev_cpuid); + } + + switch (stage) { + case 0: + vcpu_set_cpuid_feature(vcpu, HV_MSR_HYPERCALL_AVAILABLE); + hcall->control = 0xbeef; + hcall->expect = HV_STATUS_INVALID_HYPERCALL_CODE; + break; + + case 1: + hcall->control = HVCALL_POST_MESSAGE; + hcall->expect = HV_STATUS_ACCESS_DENIED; + break; + case 2: + vcpu_set_cpuid_feature(vcpu, HV_POST_MESSAGES); + hcall->control = HVCALL_POST_MESSAGE; + hcall->expect = HV_STATUS_INVALID_HYPERCALL_INPUT; + break; + + case 3: + hcall->control = HVCALL_SIGNAL_EVENT; + hcall->expect = HV_STATUS_ACCESS_DENIED; + break; + case 4: + vcpu_set_cpuid_feature(vcpu, HV_SIGNAL_EVENTS); + hcall->control = HVCALL_SIGNAL_EVENT; + hcall->expect = HV_STATUS_INVALID_HYPERCALL_INPUT; + break; + + case 5: + hcall->control = HVCALL_RESET_DEBUG_SESSION; + hcall->expect = HV_STATUS_INVALID_HYPERCALL_CODE; + break; + case 6: + vcpu_set_cpuid_feature(vcpu, HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING); + hcall->control = HVCALL_RESET_DEBUG_SESSION; + hcall->expect = HV_STATUS_ACCESS_DENIED; + break; + case 7: + vcpu_set_cpuid_feature(vcpu, HV_DEBUGGING); + hcall->control = HVCALL_RESET_DEBUG_SESSION; + hcall->expect = HV_STATUS_OPERATION_DENIED; + break; + + case 8: + hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE; + hcall->expect = HV_STATUS_ACCESS_DENIED; + break; + case 9: + vcpu_set_cpuid_feature(vcpu, HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED); + hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE; + hcall->expect = HV_STATUS_SUCCESS; + break; + case 10: + hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX; + hcall->expect = HV_STATUS_ACCESS_DENIED; + break; + case 11: + vcpu_set_cpuid_feature(vcpu, HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED); + hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX; + hcall->expect = HV_STATUS_SUCCESS; + break; + + case 12: + hcall->control = HVCALL_SEND_IPI; + hcall->expect = HV_STATUS_ACCESS_DENIED; + break; + case 13: + vcpu_set_cpuid_feature(vcpu, HV_X64_CLUSTER_IPI_RECOMMENDED); + hcall->control = HVCALL_SEND_IPI; + hcall->expect = HV_STATUS_INVALID_HYPERCALL_INPUT; + break; + case 14: + /* Nothing in 'sparse banks' -> success */ + hcall->control = HVCALL_SEND_IPI_EX; + hcall->expect = HV_STATUS_SUCCESS; + break; + + case 15: + hcall->control = HVCALL_NOTIFY_LONG_SPIN_WAIT; + hcall->expect = HV_STATUS_ACCESS_DENIED; + break; + case 16: + vcpu_set_cpuid_feature(vcpu, HV_PV_SPINLOCKS_TEST); + hcall->control = HVCALL_NOTIFY_LONG_SPIN_WAIT; + hcall->expect = HV_STATUS_SUCCESS; + break; + case 17: + /* XMM fast hypercall */ + hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT; + hcall->ud_expected = true; + break; + case 18: + vcpu_set_cpuid_feature(vcpu, HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE); + hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT; + hcall->ud_expected = false; + hcall->expect = HV_STATUS_SUCCESS; + break; + case 19: + hcall->control = HV_EXT_CALL_QUERY_CAPABILITIES; + hcall->expect = HV_STATUS_ACCESS_DENIED; + break; + case 20: + vcpu_set_cpuid_feature(vcpu, HV_ENABLE_EXTENDED_HYPERCALLS); + hcall->control = HV_EXT_CALL_QUERY_CAPABILITIES | HV_HYPERCALL_FAST_BIT; + hcall->expect = HV_STATUS_INVALID_PARAMETER; + break; + case 21: + kvm_vm_free(vm); + return; + } + + vcpu_set_cpuid(vcpu); + + memcpy(prev_cpuid, vcpu->cpuid, kvm_cpuid2_size(vcpu->cpuid->nent)); + + pr_debug("Stage %d: testing hcall: 0x%lx\n", stage, hcall->control); + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + return; + case UCALL_DONE: + break; + default: + TEST_FAIL("Unhandled ucall: %ld", uc.cmd); + return; + } + + stage++; + kvm_vm_free(vm); + } +} + +int main(void) +{ + TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_ENFORCE_CPUID)); + + pr_info("Testing access to Hyper-V specific MSRs\n"); + guest_test_msrs_access(); + + pr_info("Testing access to Hyper-V hypercalls\n"); + guest_test_hcalls_access(); +} diff --git a/tools/testing/selftests/kvm/x86/hyperv_ipi.c b/tools/testing/selftests/kvm/x86/hyperv_ipi.c new file mode 100644 index 000000000000..22c0c124582f --- /dev/null +++ b/tools/testing/selftests/kvm/x86/hyperv_ipi.c @@ -0,0 +1,308 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Hyper-V HvCallSendSyntheticClusterIpi{,Ex} tests + * + * Copyright (C) 2022, Red Hat, Inc. + * + */ +#include +#include + +#include "kvm_util.h" +#include "hyperv.h" +#include "test_util.h" +#include "vmx.h" + +#define RECEIVER_VCPU_ID_1 2 +#define RECEIVER_VCPU_ID_2 65 + +#define IPI_VECTOR 0xfe + +static volatile uint64_t ipis_rcvd[RECEIVER_VCPU_ID_2 + 1]; + +struct hv_vpset { + u64 format; + u64 valid_bank_mask; + u64 bank_contents[2]; +}; + +enum HV_GENERIC_SET_FORMAT { + HV_GENERIC_SET_SPARSE_4K, + HV_GENERIC_SET_ALL, +}; + +/* HvCallSendSyntheticClusterIpi hypercall */ +struct hv_send_ipi { + u32 vector; + u32 reserved; + u64 cpu_mask; +}; + +/* HvCallSendSyntheticClusterIpiEx hypercall */ +struct hv_send_ipi_ex { + u32 vector; + u32 reserved; + struct hv_vpset vp_set; +}; + +static inline void hv_init(vm_vaddr_t pgs_gpa) +{ + wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID); + wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa); +} + +static void receiver_code(void *hcall_page, vm_vaddr_t pgs_gpa) +{ + u32 vcpu_id; + + x2apic_enable(); + hv_init(pgs_gpa); + + vcpu_id = rdmsr(HV_X64_MSR_VP_INDEX); + + /* Signal sender vCPU we're ready */ + ipis_rcvd[vcpu_id] = (u64)-1; + + for (;;) + asm volatile("sti; hlt; cli"); +} + +static void guest_ipi_handler(struct ex_regs *regs) +{ + u32 vcpu_id = rdmsr(HV_X64_MSR_VP_INDEX); + + ipis_rcvd[vcpu_id]++; + wrmsr(HV_X64_MSR_EOI, 1); +} + +static inline void nop_loop(void) +{ + int i; + + for (i = 0; i < 100000000; i++) + asm volatile("nop"); +} + +static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa) +{ + struct hv_send_ipi *ipi = (struct hv_send_ipi *)hcall_page; + struct hv_send_ipi_ex *ipi_ex = (struct hv_send_ipi_ex *)hcall_page; + int stage = 1, ipis_expected[2] = {0}; + + hv_init(pgs_gpa); + GUEST_SYNC(stage++); + + /* Wait for receiver vCPUs to come up */ + while (!ipis_rcvd[RECEIVER_VCPU_ID_1] || !ipis_rcvd[RECEIVER_VCPU_ID_2]) + nop_loop(); + ipis_rcvd[RECEIVER_VCPU_ID_1] = ipis_rcvd[RECEIVER_VCPU_ID_2] = 0; + + /* 'Slow' HvCallSendSyntheticClusterIpi to RECEIVER_VCPU_ID_1 */ + ipi->vector = IPI_VECTOR; + ipi->cpu_mask = 1 << RECEIVER_VCPU_ID_1; + hyperv_hypercall(HVCALL_SEND_IPI, pgs_gpa, pgs_gpa + 4096); + nop_loop(); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]); + GUEST_SYNC(stage++); + /* 'Fast' HvCallSendSyntheticClusterIpi to RECEIVER_VCPU_ID_1 */ + hyperv_hypercall(HVCALL_SEND_IPI | HV_HYPERCALL_FAST_BIT, + IPI_VECTOR, 1 << RECEIVER_VCPU_ID_1); + nop_loop(); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]); + GUEST_SYNC(stage++); + + /* 'Slow' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_1 */ + memset(hcall_page, 0, 4096); + ipi_ex->vector = IPI_VECTOR; + ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K; + ipi_ex->vp_set.valid_bank_mask = 1 << 0; + ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_1); + hyperv_hypercall(HVCALL_SEND_IPI_EX | (1 << HV_HYPERCALL_VARHEAD_OFFSET), + pgs_gpa, pgs_gpa + 4096); + nop_loop(); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]); + GUEST_SYNC(stage++); + /* 'XMM Fast' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_1 */ + hyperv_write_xmm_input(&ipi_ex->vp_set.valid_bank_mask, 1); + hyperv_hypercall(HVCALL_SEND_IPI_EX | HV_HYPERCALL_FAST_BIT | + (1 << HV_HYPERCALL_VARHEAD_OFFSET), + IPI_VECTOR, HV_GENERIC_SET_SPARSE_4K); + nop_loop(); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]); + GUEST_SYNC(stage++); + + /* 'Slow' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_2 */ + memset(hcall_page, 0, 4096); + ipi_ex->vector = IPI_VECTOR; + ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K; + ipi_ex->vp_set.valid_bank_mask = 1 << 1; + ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_2 - 64); + hyperv_hypercall(HVCALL_SEND_IPI_EX | (1 << HV_HYPERCALL_VARHEAD_OFFSET), + pgs_gpa, pgs_gpa + 4096); + nop_loop(); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ipis_expected[0]); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]); + GUEST_SYNC(stage++); + /* 'XMM Fast' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_2 */ + hyperv_write_xmm_input(&ipi_ex->vp_set.valid_bank_mask, 1); + hyperv_hypercall(HVCALL_SEND_IPI_EX | HV_HYPERCALL_FAST_BIT | + (1 << HV_HYPERCALL_VARHEAD_OFFSET), + IPI_VECTOR, HV_GENERIC_SET_SPARSE_4K); + nop_loop(); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ipis_expected[0]); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]); + GUEST_SYNC(stage++); + + /* 'Slow' HvCallSendSyntheticClusterIpiEx to both RECEIVER_VCPU_ID_{1,2} */ + memset(hcall_page, 0, 4096); + ipi_ex->vector = IPI_VECTOR; + ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K; + ipi_ex->vp_set.valid_bank_mask = 1 << 1 | 1; + ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_1); + ipi_ex->vp_set.bank_contents[1] = BIT(RECEIVER_VCPU_ID_2 - 64); + hyperv_hypercall(HVCALL_SEND_IPI_EX | (2 << HV_HYPERCALL_VARHEAD_OFFSET), + pgs_gpa, pgs_gpa + 4096); + nop_loop(); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]); + GUEST_SYNC(stage++); + /* 'XMM Fast' HvCallSendSyntheticClusterIpiEx to both RECEIVER_VCPU_ID_{1, 2} */ + hyperv_write_xmm_input(&ipi_ex->vp_set.valid_bank_mask, 2); + hyperv_hypercall(HVCALL_SEND_IPI_EX | HV_HYPERCALL_FAST_BIT | + (2 << HV_HYPERCALL_VARHEAD_OFFSET), + IPI_VECTOR, HV_GENERIC_SET_SPARSE_4K); + nop_loop(); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]); + GUEST_SYNC(stage++); + + /* 'Slow' HvCallSendSyntheticClusterIpiEx to HV_GENERIC_SET_ALL */ + memset(hcall_page, 0, 4096); + ipi_ex->vector = IPI_VECTOR; + ipi_ex->vp_set.format = HV_GENERIC_SET_ALL; + hyperv_hypercall(HVCALL_SEND_IPI_EX, pgs_gpa, pgs_gpa + 4096); + nop_loop(); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]); + GUEST_SYNC(stage++); + /* + * 'XMM Fast' HvCallSendSyntheticClusterIpiEx to HV_GENERIC_SET_ALL. + */ + ipi_ex->vp_set.valid_bank_mask = 0; + hyperv_write_xmm_input(&ipi_ex->vp_set.valid_bank_mask, 2); + hyperv_hypercall(HVCALL_SEND_IPI_EX | HV_HYPERCALL_FAST_BIT, + IPI_VECTOR, HV_GENERIC_SET_ALL); + nop_loop(); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]); + GUEST_SYNC(stage++); + + GUEST_DONE(); +} + +static void *vcpu_thread(void *arg) +{ + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)arg; + int old, r; + + r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old); + TEST_ASSERT(!r, "pthread_setcanceltype failed on vcpu_id=%u with errno=%d", + vcpu->id, r); + + vcpu_run(vcpu); + + TEST_FAIL("vCPU %u exited unexpectedly", vcpu->id); + + return NULL; +} + +static void cancel_join_vcpu_thread(pthread_t thread, struct kvm_vcpu *vcpu) +{ + void *retval; + int r; + + r = pthread_cancel(thread); + TEST_ASSERT(!r, "pthread_cancel on vcpu_id=%d failed with errno=%d", + vcpu->id, r); + + r = pthread_join(thread, &retval); + TEST_ASSERT(!r, "pthread_join on vcpu_id=%d failed with errno=%d", + vcpu->id, r); + TEST_ASSERT(retval == PTHREAD_CANCELED, + "expected retval=%p, got %p", PTHREAD_CANCELED, + retval); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vm *vm; + struct kvm_vcpu *vcpu[3]; + vm_vaddr_t hcall_page; + pthread_t threads[2]; + int stage = 1, r; + struct ucall uc; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_SEND_IPI)); + + vm = vm_create_with_one_vcpu(&vcpu[0], sender_guest_code); + + /* Hypercall input/output */ + hcall_page = vm_vaddr_alloc_pages(vm, 2); + memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize()); + + + vcpu[1] = vm_vcpu_add(vm, RECEIVER_VCPU_ID_1, receiver_code); + vcpu_args_set(vcpu[1], 2, hcall_page, addr_gva2gpa(vm, hcall_page)); + vcpu_set_msr(vcpu[1], HV_X64_MSR_VP_INDEX, RECEIVER_VCPU_ID_1); + vcpu_set_hv_cpuid(vcpu[1]); + + vcpu[2] = vm_vcpu_add(vm, RECEIVER_VCPU_ID_2, receiver_code); + vcpu_args_set(vcpu[2], 2, hcall_page, addr_gva2gpa(vm, hcall_page)); + vcpu_set_msr(vcpu[2], HV_X64_MSR_VP_INDEX, RECEIVER_VCPU_ID_2); + vcpu_set_hv_cpuid(vcpu[2]); + + vm_install_exception_handler(vm, IPI_VECTOR, guest_ipi_handler); + + vcpu_args_set(vcpu[0], 2, hcall_page, addr_gva2gpa(vm, hcall_page)); + vcpu_set_hv_cpuid(vcpu[0]); + + r = pthread_create(&threads[0], NULL, vcpu_thread, vcpu[1]); + TEST_ASSERT(!r, "pthread_create failed errno=%d", r); + + r = pthread_create(&threads[1], NULL, vcpu_thread, vcpu[2]); + TEST_ASSERT(!r, "pthread_create failed errno=%d", errno); + + while (true) { + vcpu_run(vcpu[0]); + + TEST_ASSERT_KVM_EXIT_REASON(vcpu[0], KVM_EXIT_IO); + + switch (get_ucall(vcpu[0], &uc)) { + case UCALL_SYNC: + TEST_ASSERT(uc.args[1] == stage, + "Unexpected stage: %ld (%d expected)", + uc.args[1], stage); + break; + case UCALL_DONE: + goto done; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + /* NOT REACHED */ + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + + stage++; + } + +done: + cancel_join_vcpu_thread(threads[0], vcpu[1]); + cancel_join_vcpu_thread(threads[1], vcpu[2]); + kvm_vm_free(vm); + + return r; +} diff --git a/tools/testing/selftests/kvm/x86/hyperv_svm_test.c b/tools/testing/selftests/kvm/x86/hyperv_svm_test.c new file mode 100644 index 000000000000..0ddb63229bcb --- /dev/null +++ b/tools/testing/selftests/kvm/x86/hyperv_svm_test.c @@ -0,0 +1,199 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2022, Red Hat, Inc. + * + * Tests for Hyper-V extensions to SVM. + */ +#include +#include +#include +#include +#include +#include + +#include "test_util.h" + +#include "kvm_util.h" +#include "processor.h" +#include "svm_util.h" +#include "hyperv.h" + +#define L2_GUEST_STACK_SIZE 256 + +/* Exit to L1 from L2 with RDMSR instruction */ +static inline void rdmsr_from_l2(uint32_t msr) +{ + /* Currently, L1 doesn't preserve GPRs during vmexits. */ + __asm__ __volatile__ ("rdmsr" : : "c"(msr) : + "rax", "rbx", "rdx", "rsi", "rdi", "r8", "r9", + "r10", "r11", "r12", "r13", "r14", "r15"); +} + +void l2_guest_code(void) +{ + u64 unused; + + GUEST_SYNC(3); + /* Exit to L1 */ + vmmcall(); + + /* MSR-Bitmap tests */ + rdmsr_from_l2(MSR_FS_BASE); /* intercepted */ + rdmsr_from_l2(MSR_FS_BASE); /* intercepted */ + rdmsr_from_l2(MSR_GS_BASE); /* not intercepted */ + vmmcall(); + rdmsr_from_l2(MSR_GS_BASE); /* intercepted */ + + GUEST_SYNC(5); + + /* L2 TLB flush tests */ + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | + HV_HYPERCALL_FAST_BIT, 0x0, + HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | + HV_FLUSH_ALL_PROCESSORS); + rdmsr_from_l2(MSR_FS_BASE); + /* + * Note: hypercall status (RAX) is not preserved correctly by L1 after + * synthetic vmexit, use unchecked version. + */ + __hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | + HV_HYPERCALL_FAST_BIT, 0x0, + HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | + HV_FLUSH_ALL_PROCESSORS, &unused); + + /* Done, exit to L1 and never come back. */ + vmmcall(); +} + +static void __attribute__((__flatten__)) guest_code(struct svm_test_data *svm, + struct hyperv_test_pages *hv_pages, + vm_vaddr_t pgs_gpa) +{ + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + struct vmcb *vmcb = svm->vmcb; + struct hv_vmcb_enlightenments *hve = &vmcb->control.hv_enlightenments; + + GUEST_SYNC(1); + + wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID); + wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa); + enable_vp_assist(hv_pages->vp_assist_gpa, hv_pages->vp_assist); + + GUEST_ASSERT(svm->vmcb_gpa); + /* Prepare for L2 execution. */ + generic_svm_setup(svm, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + /* L2 TLB flush setup */ + hve->partition_assist_page = hv_pages->partition_assist_gpa; + hve->hv_enlightenments_control.nested_flush_hypercall = 1; + hve->hv_vm_id = 1; + hve->hv_vp_id = 1; + current_vp_assist->nested_control.features.directhypercall = 1; + *(u32 *)(hv_pages->partition_assist) = 0; + + GUEST_SYNC(2); + run_guest(vmcb, svm->vmcb_gpa); + GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL); + GUEST_SYNC(4); + vmcb->save.rip += 3; + + /* Intercept RDMSR 0xc0000100 */ + vmcb->control.intercept |= 1ULL << INTERCEPT_MSR_PROT; + __set_bit(2 * (MSR_FS_BASE & 0x1fff), svm->msr + 0x800); + run_guest(vmcb, svm->vmcb_gpa); + GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR); + vmcb->save.rip += 2; /* rdmsr */ + + /* Enable enlightened MSR bitmap */ + hve->hv_enlightenments_control.msr_bitmap = 1; + run_guest(vmcb, svm->vmcb_gpa); + GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR); + vmcb->save.rip += 2; /* rdmsr */ + + /* Intercept RDMSR 0xc0000101 without telling KVM about it */ + __set_bit(2 * (MSR_GS_BASE & 0x1fff), svm->msr + 0x800); + /* Make sure HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP is set */ + vmcb->control.clean |= HV_VMCB_NESTED_ENLIGHTENMENTS; + run_guest(vmcb, svm->vmcb_gpa); + /* Make sure we don't see SVM_EXIT_MSR here so eMSR bitmap works */ + GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL); + vmcb->save.rip += 3; /* vmcall */ + + /* Now tell KVM we've changed MSR-Bitmap */ + vmcb->control.clean &= ~HV_VMCB_NESTED_ENLIGHTENMENTS; + run_guest(vmcb, svm->vmcb_gpa); + GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR); + vmcb->save.rip += 2; /* rdmsr */ + + + /* + * L2 TLB flush test. First VMCALL should be handled directly by L0, + * no VMCALL exit expected. + */ + run_guest(vmcb, svm->vmcb_gpa); + GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR); + vmcb->save.rip += 2; /* rdmsr */ + /* Enable synthetic vmexit */ + *(u32 *)(hv_pages->partition_assist) = 1; + run_guest(vmcb, svm->vmcb_gpa); + GUEST_ASSERT(vmcb->control.exit_code == HV_SVM_EXITCODE_ENL); + GUEST_ASSERT(vmcb->control.exit_info_1 == HV_SVM_ENL_EXITCODE_TRAP_AFTER_FLUSH); + + run_guest(vmcb, svm->vmcb_gpa); + GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL); + GUEST_SYNC(6); + + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + vm_vaddr_t nested_gva = 0, hv_pages_gva = 0; + vm_vaddr_t hcall_page; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + struct ucall uc; + int stage; + + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM)); + TEST_REQUIRE(kvm_hv_cpu_has(HV_X64_NESTED_DIRECT_FLUSH)); + + /* Create VM */ + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + vcpu_set_hv_cpuid(vcpu); + vcpu_alloc_svm(vm, &nested_gva); + vcpu_alloc_hyperv_test_pages(vm, &hv_pages_gva); + + hcall_page = vm_vaddr_alloc_pages(vm, 1); + memset(addr_gva2hva(vm, hcall_page), 0x0, getpagesize()); + + vcpu_args_set(vcpu, 3, nested_gva, hv_pages_gva, addr_gva2gpa(vm, hcall_page)); + vcpu_set_msr(vcpu, HV_X64_MSR_VP_INDEX, vcpu->id); + + for (stage = 1;; stage++) { + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + /* NOT REACHED */ + case UCALL_SYNC: + break; + case UCALL_DONE: + goto done; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + + /* UCALL_SYNC is handled here. */ + TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") && + uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx", + stage, (ulong)uc.args[1]); + + } + +done: + kvm_vm_free(vm); +} diff --git a/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c b/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c new file mode 100644 index 000000000000..077cd0ec3040 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c @@ -0,0 +1,680 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Hyper-V HvFlushVirtualAddress{List,Space}{,Ex} tests + * + * Copyright (C) 2022, Red Hat, Inc. + * + */ +#include +#include +#include + +#include "kvm_util.h" +#include "processor.h" +#include "hyperv.h" +#include "test_util.h" +#include "vmx.h" + +#define WORKER_VCPU_ID_1 2 +#define WORKER_VCPU_ID_2 65 + +#define NTRY 100 +#define NTEST_PAGES 2 + +struct hv_vpset { + u64 format; + u64 valid_bank_mask; + u64 bank_contents[]; +}; + +enum HV_GENERIC_SET_FORMAT { + HV_GENERIC_SET_SPARSE_4K, + HV_GENERIC_SET_ALL, +}; + +#define HV_FLUSH_ALL_PROCESSORS BIT(0) +#define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES BIT(1) +#define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY BIT(2) +#define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT BIT(3) + +/* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */ +struct hv_tlb_flush { + u64 address_space; + u64 flags; + u64 processor_mask; + u64 gva_list[]; +} __packed; + +/* HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressListEx hypercalls */ +struct hv_tlb_flush_ex { + u64 address_space; + u64 flags; + struct hv_vpset hv_vp_set; + u64 gva_list[]; +} __packed; + +/* + * Pass the following info to 'workers' and 'sender' + * - Hypercall page's GVA + * - Hypercall page's GPA + * - Test pages GVA + * - GVAs of the test pages' PTEs + */ +struct test_data { + vm_vaddr_t hcall_gva; + vm_paddr_t hcall_gpa; + vm_vaddr_t test_pages; + vm_vaddr_t test_pages_pte[NTEST_PAGES]; +}; + +/* 'Worker' vCPU code checking the contents of the test page */ +static void worker_guest_code(vm_vaddr_t test_data) +{ + struct test_data *data = (struct test_data *)test_data; + u32 vcpu_id = rdmsr(HV_X64_MSR_VP_INDEX); + void *exp_page = (void *)data->test_pages + PAGE_SIZE * NTEST_PAGES; + u64 *this_cpu = (u64 *)(exp_page + vcpu_id * sizeof(u64)); + u64 expected, val; + + x2apic_enable(); + wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID); + + for (;;) { + cpu_relax(); + + expected = READ_ONCE(*this_cpu); + + /* + * Make sure the value in the test page is read after reading + * the expectation for the first time. Pairs with wmb() in + * prepare_to_test(). + */ + rmb(); + + val = READ_ONCE(*(u64 *)data->test_pages); + + /* + * Make sure the value in the test page is read after before + * reading the expectation for the second time. Pairs with wmb() + * post_test(). + */ + rmb(); + + /* + * '0' indicates the sender is between iterations, wait until + * the sender is ready for this vCPU to start checking again. + */ + if (!expected) + continue; + + /* + * Re-read the per-vCPU byte to ensure the sender didn't move + * onto a new iteration. + */ + if (expected != READ_ONCE(*this_cpu)) + continue; + + GUEST_ASSERT(val == expected); + } +} + +/* + * Write per-CPU info indicating what each 'worker' CPU is supposed to see in + * test page. '0' means don't check. + */ +static void set_expected_val(void *addr, u64 val, int vcpu_id) +{ + void *exp_page = addr + PAGE_SIZE * NTEST_PAGES; + + *(u64 *)(exp_page + vcpu_id * sizeof(u64)) = val; +} + +/* + * Update PTEs swapping two test pages. + * TODO: use swap()/xchg() when these are provided. + */ +static void swap_two_test_pages(vm_paddr_t pte_gva1, vm_paddr_t pte_gva2) +{ + uint64_t tmp = *(uint64_t *)pte_gva1; + + *(uint64_t *)pte_gva1 = *(uint64_t *)pte_gva2; + *(uint64_t *)pte_gva2 = tmp; +} + +/* + * TODO: replace the silly NOP loop with a proper udelay() implementation. + */ +static inline void do_delay(void) +{ + int i; + + for (i = 0; i < 1000000; i++) + asm volatile("nop"); +} + +/* + * Prepare to test: 'disable' workers by setting the expectation to '0', + * clear hypercall input page and then swap two test pages. + */ +static inline void prepare_to_test(struct test_data *data) +{ + /* Clear hypercall input page */ + memset((void *)data->hcall_gva, 0, PAGE_SIZE); + + /* 'Disable' workers */ + set_expected_val((void *)data->test_pages, 0x0, WORKER_VCPU_ID_1); + set_expected_val((void *)data->test_pages, 0x0, WORKER_VCPU_ID_2); + + /* Make sure workers are 'disabled' before we swap PTEs. */ + wmb(); + + /* Make sure workers have enough time to notice */ + do_delay(); + + /* Swap test page mappings */ + swap_two_test_pages(data->test_pages_pte[0], data->test_pages_pte[1]); +} + +/* + * Finalize the test: check hypercall resule set the expected val for + * 'worker' CPUs and give them some time to test. + */ +static inline void post_test(struct test_data *data, u64 exp1, u64 exp2) +{ + /* Make sure we change the expectation after swapping PTEs */ + wmb(); + + /* Set the expectation for workers, '0' means don't test */ + set_expected_val((void *)data->test_pages, exp1, WORKER_VCPU_ID_1); + set_expected_val((void *)data->test_pages, exp2, WORKER_VCPU_ID_2); + + /* Make sure workers have enough time to test */ + do_delay(); +} + +#define TESTVAL1 0x0101010101010101 +#define TESTVAL2 0x0202020202020202 + +/* Main vCPU doing the test */ +static void sender_guest_code(vm_vaddr_t test_data) +{ + struct test_data *data = (struct test_data *)test_data; + struct hv_tlb_flush *flush = (struct hv_tlb_flush *)data->hcall_gva; + struct hv_tlb_flush_ex *flush_ex = (struct hv_tlb_flush_ex *)data->hcall_gva; + vm_paddr_t hcall_gpa = data->hcall_gpa; + int i, stage = 1; + + wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID); + wrmsr(HV_X64_MSR_HYPERCALL, data->hcall_gpa); + + /* "Slow" hypercalls */ + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for WORKER_VCPU_ID_1 */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; + flush->processor_mask = BIT(WORKER_VCPU_ID_1); + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE, hcall_gpa, + hcall_gpa + PAGE_SIZE); + post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, 0x0); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for WORKER_VCPU_ID_1 */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; + flush->processor_mask = BIT(WORKER_VCPU_ID_1); + flush->gva_list[0] = (u64)data->test_pages; + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST | + (1UL << HV_HYPERCALL_REP_COMP_OFFSET), + hcall_gpa, hcall_gpa + PAGE_SIZE); + post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, 0x0); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for HV_FLUSH_ALL_PROCESSORS */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | + HV_FLUSH_ALL_PROCESSORS; + flush->processor_mask = 0; + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE, hcall_gpa, + hcall_gpa + PAGE_SIZE); + post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, i % 2 ? TESTVAL1 : TESTVAL2); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for HV_FLUSH_ALL_PROCESSORS */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | + HV_FLUSH_ALL_PROCESSORS; + flush->gva_list[0] = (u64)data->test_pages; + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST | + (1UL << HV_HYPERCALL_REP_COMP_OFFSET), + hcall_gpa, hcall_gpa + PAGE_SIZE); + post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, + i % 2 ? TESTVAL1 : TESTVAL2); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for WORKER_VCPU_ID_2 */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; + flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; + flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64); + flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64); + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX | + (1 << HV_HYPERCALL_VARHEAD_OFFSET), + hcall_gpa, hcall_gpa + PAGE_SIZE); + post_test(data, 0x0, i % 2 ? TESTVAL1 : TESTVAL2); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for WORKER_VCPU_ID_2 */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; + flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; + flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64); + flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64); + /* bank_contents and gva_list occupy the same space, thus [1] */ + flush_ex->gva_list[1] = (u64)data->test_pages; + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX | + (1 << HV_HYPERCALL_VARHEAD_OFFSET) | + (1UL << HV_HYPERCALL_REP_COMP_OFFSET), + hcall_gpa, hcall_gpa + PAGE_SIZE); + post_test(data, 0x0, i % 2 ? TESTVAL1 : TESTVAL2); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for both vCPUs */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; + flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; + flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64) | + BIT_ULL(WORKER_VCPU_ID_1 / 64); + flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64); + flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64); + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX | + (2 << HV_HYPERCALL_VARHEAD_OFFSET), + hcall_gpa, hcall_gpa + PAGE_SIZE); + post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, + i % 2 ? TESTVAL1 : TESTVAL2); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for both vCPUs */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; + flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; + flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_1 / 64) | + BIT_ULL(WORKER_VCPU_ID_2 / 64); + flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64); + flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64); + /* bank_contents and gva_list occupy the same space, thus [2] */ + flush_ex->gva_list[2] = (u64)data->test_pages; + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX | + (2 << HV_HYPERCALL_VARHEAD_OFFSET) | + (1UL << HV_HYPERCALL_REP_COMP_OFFSET), + hcall_gpa, hcall_gpa + PAGE_SIZE); + post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, + i % 2 ? TESTVAL1 : TESTVAL2); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for HV_GENERIC_SET_ALL */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; + flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL; + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX, + hcall_gpa, hcall_gpa + PAGE_SIZE); + post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, + i % 2 ? TESTVAL1 : TESTVAL2); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for HV_GENERIC_SET_ALL */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; + flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL; + flush_ex->gva_list[0] = (u64)data->test_pages; + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX | + (1UL << HV_HYPERCALL_REP_COMP_OFFSET), + hcall_gpa, hcall_gpa + PAGE_SIZE); + post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, + i % 2 ? TESTVAL1 : TESTVAL2); + } + + /* "Fast" hypercalls */ + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for WORKER_VCPU_ID_1 */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush->processor_mask = BIT(WORKER_VCPU_ID_1); + hyperv_write_xmm_input(&flush->processor_mask, 1); + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | + HV_HYPERCALL_FAST_BIT, 0x0, + HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES); + post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, 0x0); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for WORKER_VCPU_ID_1 */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush->processor_mask = BIT(WORKER_VCPU_ID_1); + flush->gva_list[0] = (u64)data->test_pages; + hyperv_write_xmm_input(&flush->processor_mask, 1); + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST | + HV_HYPERCALL_FAST_BIT | + (1UL << HV_HYPERCALL_REP_COMP_OFFSET), + 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES); + post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, 0x0); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for HV_FLUSH_ALL_PROCESSORS */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + hyperv_write_xmm_input(&flush->processor_mask, 1); + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | + HV_HYPERCALL_FAST_BIT, 0x0, + HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | + HV_FLUSH_ALL_PROCESSORS); + post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, + i % 2 ? TESTVAL1 : TESTVAL2); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for HV_FLUSH_ALL_PROCESSORS */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush->gva_list[0] = (u64)data->test_pages; + hyperv_write_xmm_input(&flush->processor_mask, 1); + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST | + HV_HYPERCALL_FAST_BIT | + (1UL << HV_HYPERCALL_REP_COMP_OFFSET), 0x0, + HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | + HV_FLUSH_ALL_PROCESSORS); + post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, + i % 2 ? TESTVAL1 : TESTVAL2); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for WORKER_VCPU_ID_2 */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; + flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64); + flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64); + hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2); + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX | + HV_HYPERCALL_FAST_BIT | + (1 << HV_HYPERCALL_VARHEAD_OFFSET), + 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES); + post_test(data, 0x0, i % 2 ? TESTVAL1 : TESTVAL2); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for WORKER_VCPU_ID_2 */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; + flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64); + flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64); + /* bank_contents and gva_list occupy the same space, thus [1] */ + flush_ex->gva_list[1] = (u64)data->test_pages; + hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2); + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX | + HV_HYPERCALL_FAST_BIT | + (1 << HV_HYPERCALL_VARHEAD_OFFSET) | + (1UL << HV_HYPERCALL_REP_COMP_OFFSET), + 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES); + post_test(data, 0x0, i % 2 ? TESTVAL1 : TESTVAL2); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for both vCPUs */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; + flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64) | + BIT_ULL(WORKER_VCPU_ID_1 / 64); + flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64); + flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64); + hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2); + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX | + HV_HYPERCALL_FAST_BIT | + (2 << HV_HYPERCALL_VARHEAD_OFFSET), + 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES); + post_test(data, i % 2 ? TESTVAL1 : + TESTVAL2, i % 2 ? TESTVAL1 : TESTVAL2); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for both vCPUs */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; + flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_1 / 64) | + BIT_ULL(WORKER_VCPU_ID_2 / 64); + flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64); + flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64); + /* bank_contents and gva_list occupy the same space, thus [2] */ + flush_ex->gva_list[2] = (u64)data->test_pages; + hyperv_write_xmm_input(&flush_ex->hv_vp_set, 3); + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX | + HV_HYPERCALL_FAST_BIT | + (2 << HV_HYPERCALL_VARHEAD_OFFSET) | + (1UL << HV_HYPERCALL_REP_COMP_OFFSET), + 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES); + post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, + i % 2 ? TESTVAL1 : TESTVAL2); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for HV_GENERIC_SET_ALL */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; + flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL; + hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2); + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX | + HV_HYPERCALL_FAST_BIT, + 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES); + post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, + i % 2 ? TESTVAL1 : TESTVAL2); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for HV_GENERIC_SET_ALL */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; + flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL; + flush_ex->gva_list[0] = (u64)data->test_pages; + hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2); + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX | + HV_HYPERCALL_FAST_BIT | + (1UL << HV_HYPERCALL_REP_COMP_OFFSET), + 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES); + post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, + i % 2 ? TESTVAL1 : TESTVAL2); + } + + GUEST_DONE(); +} + +static void *vcpu_thread(void *arg) +{ + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)arg; + struct ucall uc; + int old; + int r; + + r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old); + TEST_ASSERT(!r, "pthread_setcanceltype failed on vcpu_id=%u with errno=%d", + vcpu->id, r); + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + /* NOT REACHED */ + default: + TEST_FAIL("Unexpected ucall %lu, vCPU %d", uc.cmd, vcpu->id); + } + + return NULL; +} + +static void cancel_join_vcpu_thread(pthread_t thread, struct kvm_vcpu *vcpu) +{ + void *retval; + int r; + + r = pthread_cancel(thread); + TEST_ASSERT(!r, "pthread_cancel on vcpu_id=%d failed with errno=%d", + vcpu->id, r); + + r = pthread_join(thread, &retval); + TEST_ASSERT(!r, "pthread_join on vcpu_id=%d failed with errno=%d", + vcpu->id, r); + TEST_ASSERT(retval == PTHREAD_CANCELED, + "expected retval=%p, got %p", PTHREAD_CANCELED, + retval); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vm *vm; + struct kvm_vcpu *vcpu[3]; + pthread_t threads[2]; + vm_vaddr_t test_data_page, gva; + vm_paddr_t gpa; + uint64_t *pte; + struct test_data *data; + struct ucall uc; + int stage = 1, r, i; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_TLBFLUSH)); + + vm = vm_create_with_one_vcpu(&vcpu[0], sender_guest_code); + + /* Test data page */ + test_data_page = vm_vaddr_alloc_page(vm); + data = (struct test_data *)addr_gva2hva(vm, test_data_page); + + /* Hypercall input/output */ + data->hcall_gva = vm_vaddr_alloc_pages(vm, 2); + data->hcall_gpa = addr_gva2gpa(vm, data->hcall_gva); + memset(addr_gva2hva(vm, data->hcall_gva), 0x0, 2 * PAGE_SIZE); + + /* + * Test pages: the first one is filled with '0x01's, the second with '0x02's + * and the test will swap their mappings. The third page keeps the indication + * about the current state of mappings. + */ + data->test_pages = vm_vaddr_alloc_pages(vm, NTEST_PAGES + 1); + for (i = 0; i < NTEST_PAGES; i++) + memset(addr_gva2hva(vm, data->test_pages + PAGE_SIZE * i), + (u8)(i + 1), PAGE_SIZE); + set_expected_val(addr_gva2hva(vm, data->test_pages), 0x0, WORKER_VCPU_ID_1); + set_expected_val(addr_gva2hva(vm, data->test_pages), 0x0, WORKER_VCPU_ID_2); + + /* + * Get PTE pointers for test pages and map them inside the guest. + * Use separate page for each PTE for simplicity. + */ + gva = vm_vaddr_unused_gap(vm, NTEST_PAGES * PAGE_SIZE, KVM_UTIL_MIN_VADDR); + for (i = 0; i < NTEST_PAGES; i++) { + pte = vm_get_page_table_entry(vm, data->test_pages + i * PAGE_SIZE); + gpa = addr_hva2gpa(vm, pte); + __virt_pg_map(vm, gva + PAGE_SIZE * i, gpa & PAGE_MASK, PG_LEVEL_4K); + data->test_pages_pte[i] = gva + (gpa & ~PAGE_MASK); + } + + /* + * Sender vCPU which performs the test: swaps test pages, sets expectation + * for 'workers' and issues TLB flush hypercalls. + */ + vcpu_args_set(vcpu[0], 1, test_data_page); + vcpu_set_hv_cpuid(vcpu[0]); + + /* Create worker vCPUs which check the contents of the test pages */ + vcpu[1] = vm_vcpu_add(vm, WORKER_VCPU_ID_1, worker_guest_code); + vcpu_args_set(vcpu[1], 1, test_data_page); + vcpu_set_msr(vcpu[1], HV_X64_MSR_VP_INDEX, WORKER_VCPU_ID_1); + vcpu_set_hv_cpuid(vcpu[1]); + + vcpu[2] = vm_vcpu_add(vm, WORKER_VCPU_ID_2, worker_guest_code); + vcpu_args_set(vcpu[2], 1, test_data_page); + vcpu_set_msr(vcpu[2], HV_X64_MSR_VP_INDEX, WORKER_VCPU_ID_2); + vcpu_set_hv_cpuid(vcpu[2]); + + r = pthread_create(&threads[0], NULL, vcpu_thread, vcpu[1]); + TEST_ASSERT(!r, "pthread_create() failed"); + + r = pthread_create(&threads[1], NULL, vcpu_thread, vcpu[2]); + TEST_ASSERT(!r, "pthread_create() failed"); + + while (true) { + vcpu_run(vcpu[0]); + TEST_ASSERT_KVM_EXIT_REASON(vcpu[0], KVM_EXIT_IO); + + switch (get_ucall(vcpu[0], &uc)) { + case UCALL_SYNC: + TEST_ASSERT(uc.args[1] == stage, + "Unexpected stage: %ld (%d expected)", + uc.args[1], stage); + break; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + /* NOT REACHED */ + case UCALL_DONE: + goto done; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + + stage++; + } + +done: + cancel_join_vcpu_thread(threads[0], vcpu[1]); + cancel_join_vcpu_thread(threads[1], vcpu[2]); + kvm_vm_free(vm); + + return 0; +} diff --git a/tools/testing/selftests/kvm/x86/kvm_clock_test.c b/tools/testing/selftests/kvm/x86/kvm_clock_test.c new file mode 100644 index 000000000000..5bc12222d87a --- /dev/null +++ b/tools/testing/selftests/kvm/x86/kvm_clock_test.c @@ -0,0 +1,156 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2021, Google LLC. + * + * Tests for adjusting the KVM clock from userspace + */ +#include +#include +#include +#include +#include +#include +#include + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" + +struct test_case { + uint64_t kvmclock_base; + int64_t realtime_offset; +}; + +static struct test_case test_cases[] = { + { .kvmclock_base = 0 }, + { .kvmclock_base = 180 * NSEC_PER_SEC }, + { .kvmclock_base = 0, .realtime_offset = -180 * NSEC_PER_SEC }, + { .kvmclock_base = 0, .realtime_offset = 180 * NSEC_PER_SEC }, +}; + +#define GUEST_SYNC_CLOCK(__stage, __val) \ + GUEST_SYNC_ARGS(__stage, __val, 0, 0, 0) + +static void guest_main(vm_paddr_t pvti_pa, struct pvclock_vcpu_time_info *pvti) +{ + int i; + + wrmsr(MSR_KVM_SYSTEM_TIME_NEW, pvti_pa | KVM_MSR_ENABLED); + for (i = 0; i < ARRAY_SIZE(test_cases); i++) + GUEST_SYNC_CLOCK(i, __pvclock_read_cycles(pvti, rdtsc())); +} + +#define EXPECTED_FLAGS (KVM_CLOCK_REALTIME | KVM_CLOCK_HOST_TSC) + +static inline void assert_flags(struct kvm_clock_data *data) +{ + TEST_ASSERT((data->flags & EXPECTED_FLAGS) == EXPECTED_FLAGS, + "unexpected clock data flags: %x (want set: %x)", + data->flags, EXPECTED_FLAGS); +} + +static void handle_sync(struct ucall *uc, struct kvm_clock_data *start, + struct kvm_clock_data *end) +{ + uint64_t obs, exp_lo, exp_hi; + + obs = uc->args[2]; + exp_lo = start->clock; + exp_hi = end->clock; + + assert_flags(start); + assert_flags(end); + + TEST_ASSERT(exp_lo <= obs && obs <= exp_hi, + "unexpected kvm-clock value: %"PRIu64" expected range: [%"PRIu64", %"PRIu64"]", + obs, exp_lo, exp_hi); + + pr_info("kvm-clock value: %"PRIu64" expected range [%"PRIu64", %"PRIu64"]\n", + obs, exp_lo, exp_hi); +} + +static void handle_abort(struct ucall *uc) +{ + REPORT_GUEST_ASSERT(*uc); +} + +static void setup_clock(struct kvm_vm *vm, struct test_case *test_case) +{ + struct kvm_clock_data data; + + memset(&data, 0, sizeof(data)); + + data.clock = test_case->kvmclock_base; + if (test_case->realtime_offset) { + struct timespec ts; + int r; + + data.flags |= KVM_CLOCK_REALTIME; + do { + r = clock_gettime(CLOCK_REALTIME, &ts); + if (!r) + break; + } while (errno == EINTR); + + TEST_ASSERT(!r, "clock_gettime() failed: %d", r); + + data.realtime = ts.tv_sec * NSEC_PER_SEC; + data.realtime += ts.tv_nsec; + data.realtime += test_case->realtime_offset; + } + + vm_ioctl(vm, KVM_SET_CLOCK, &data); +} + +static void enter_guest(struct kvm_vcpu *vcpu) +{ + struct kvm_clock_data start, end; + struct kvm_vm *vm = vcpu->vm; + struct ucall uc; + int i; + + for (i = 0; i < ARRAY_SIZE(test_cases); i++) { + setup_clock(vm, &test_cases[i]); + + vm_ioctl(vm, KVM_GET_CLOCK, &start); + + vcpu_run(vcpu); + vm_ioctl(vm, KVM_GET_CLOCK, &end); + + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_SYNC: + handle_sync(&uc, &start, &end); + break; + case UCALL_ABORT: + handle_abort(&uc); + return; + default: + TEST_ASSERT(0, "unhandled ucall: %ld", uc.cmd); + } + } +} + +int main(void) +{ + struct kvm_vcpu *vcpu; + vm_vaddr_t pvti_gva; + vm_paddr_t pvti_gpa; + struct kvm_vm *vm; + int flags; + + flags = kvm_check_cap(KVM_CAP_ADJUST_CLOCK); + TEST_REQUIRE(flags & KVM_CLOCK_REALTIME); + + TEST_REQUIRE(sys_clocksource_is_based_on_tsc()); + + vm = vm_create_with_one_vcpu(&vcpu, guest_main); + + pvti_gva = vm_vaddr_alloc(vm, getpagesize(), 0x10000); + pvti_gpa = addr_gva2gpa(vm, pvti_gva); + vcpu_args_set(vcpu, 2, pvti_gpa, pvti_gva); + + enter_guest(vcpu); + kvm_vm_free(vm); +} diff --git a/tools/testing/selftests/kvm/x86/kvm_pv_test.c b/tools/testing/selftests/kvm/x86/kvm_pv_test.c new file mode 100644 index 000000000000..78878b3a2725 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/kvm_pv_test.c @@ -0,0 +1,190 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020, Google LLC. + * + * Tests for KVM paravirtual feature disablement + */ +#include +#include +#include + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" + +struct msr_data { + uint32_t idx; + const char *name; +}; + +#define TEST_MSR(msr) { .idx = msr, .name = #msr } +#define UCALL_PR_MSR 0xdeadbeef +#define PR_MSR(msr) ucall(UCALL_PR_MSR, 1, msr) + +/* + * KVM paravirtual msrs to test. Expect a #GP if any of these msrs are read or + * written, as the KVM_CPUID_FEATURES leaf is cleared. + */ +static struct msr_data msrs_to_test[] = { + TEST_MSR(MSR_KVM_SYSTEM_TIME), + TEST_MSR(MSR_KVM_SYSTEM_TIME_NEW), + TEST_MSR(MSR_KVM_WALL_CLOCK), + TEST_MSR(MSR_KVM_WALL_CLOCK_NEW), + TEST_MSR(MSR_KVM_ASYNC_PF_EN), + TEST_MSR(MSR_KVM_STEAL_TIME), + TEST_MSR(MSR_KVM_PV_EOI_EN), + TEST_MSR(MSR_KVM_POLL_CONTROL), + TEST_MSR(MSR_KVM_ASYNC_PF_INT), + TEST_MSR(MSR_KVM_ASYNC_PF_ACK), +}; + +static void test_msr(struct msr_data *msr) +{ + uint64_t ignored; + uint8_t vector; + + PR_MSR(msr); + + vector = rdmsr_safe(msr->idx, &ignored); + GUEST_ASSERT_EQ(vector, GP_VECTOR); + + vector = wrmsr_safe(msr->idx, 0); + GUEST_ASSERT_EQ(vector, GP_VECTOR); +} + +struct hcall_data { + uint64_t nr; + const char *name; +}; + +#define TEST_HCALL(hc) { .nr = hc, .name = #hc } +#define UCALL_PR_HCALL 0xdeadc0de +#define PR_HCALL(hc) ucall(UCALL_PR_HCALL, 1, hc) + +/* + * KVM hypercalls to test. Expect -KVM_ENOSYS when called, as the corresponding + * features have been cleared in KVM_CPUID_FEATURES. + */ +static struct hcall_data hcalls_to_test[] = { + TEST_HCALL(KVM_HC_KICK_CPU), + TEST_HCALL(KVM_HC_SEND_IPI), + TEST_HCALL(KVM_HC_SCHED_YIELD), +}; + +static void test_hcall(struct hcall_data *hc) +{ + uint64_t r; + + PR_HCALL(hc); + r = kvm_hypercall(hc->nr, 0, 0, 0, 0); + GUEST_ASSERT_EQ(r, -KVM_ENOSYS); +} + +static void guest_main(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(msrs_to_test); i++) { + test_msr(&msrs_to_test[i]); + } + + for (i = 0; i < ARRAY_SIZE(hcalls_to_test); i++) { + test_hcall(&hcalls_to_test[i]); + } + + GUEST_DONE(); +} + +static void pr_msr(struct ucall *uc) +{ + struct msr_data *msr = (struct msr_data *)uc->args[0]; + + pr_info("testing msr: %s (%#x)\n", msr->name, msr->idx); +} + +static void pr_hcall(struct ucall *uc) +{ + struct hcall_data *hc = (struct hcall_data *)uc->args[0]; + + pr_info("testing hcall: %s (%lu)\n", hc->name, hc->nr); +} + +static void enter_guest(struct kvm_vcpu *vcpu) +{ + struct ucall uc; + + while (true) { + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_PR_MSR: + pr_msr(&uc); + break; + case UCALL_PR_HCALL: + pr_hcall(&uc); + break; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + return; + case UCALL_DONE: + return; + } + } +} + +static void test_pv_unhalt(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + struct kvm_cpuid_entry2 *ent; + u32 kvm_sig_old; + + pr_info("testing KVM_FEATURE_PV_UNHALT\n"); + + TEST_REQUIRE(KVM_CAP_X86_DISABLE_EXITS); + + /* KVM_PV_UNHALT test */ + vm = vm_create_with_one_vcpu(&vcpu, guest_main); + vcpu_set_cpuid_feature(vcpu, X86_FEATURE_KVM_PV_UNHALT); + + TEST_ASSERT(vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT), + "Enabling X86_FEATURE_KVM_PV_UNHALT had no effect"); + + /* Make sure KVM clears vcpu->arch.kvm_cpuid */ + ent = vcpu_get_cpuid_entry(vcpu, KVM_CPUID_SIGNATURE); + kvm_sig_old = ent->ebx; + ent->ebx = 0xdeadbeef; + vcpu_set_cpuid(vcpu); + + vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS, KVM_X86_DISABLE_EXITS_HLT); + ent = vcpu_get_cpuid_entry(vcpu, KVM_CPUID_SIGNATURE); + ent->ebx = kvm_sig_old; + vcpu_set_cpuid(vcpu); + + TEST_ASSERT(!vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT), + "KVM_FEATURE_PV_UNHALT is set with KVM_CAP_X86_DISABLE_EXITS"); + + /* FIXME: actually test KVM_FEATURE_PV_UNHALT feature */ + + kvm_vm_free(vm); +} + +int main(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_ENFORCE_PV_FEATURE_CPUID)); + + vm = vm_create_with_one_vcpu(&vcpu, guest_main); + + vcpu_enable_cap(vcpu, KVM_CAP_ENFORCE_PV_FEATURE_CPUID, 1); + + vcpu_clear_cpuid_entry(vcpu, KVM_CPUID_FEATURES); + + enter_guest(vcpu); + kvm_vm_free(vm); + + test_pv_unhalt(); +} diff --git a/tools/testing/selftests/kvm/x86/max_vcpuid_cap_test.c b/tools/testing/selftests/kvm/x86/max_vcpuid_cap_test.c new file mode 100644 index 000000000000..7e2bfb3c3f3b --- /dev/null +++ b/tools/testing/selftests/kvm/x86/max_vcpuid_cap_test.c @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * maximum APIC ID capability tests + * + * Copyright (C) 2022, Intel, Inc. + * + * Tests for getting/setting maximum APIC ID capability + */ + +#include "kvm_util.h" + +#define MAX_VCPU_ID 2 + +int main(int argc, char *argv[]) +{ + struct kvm_vm *vm; + int ret; + + vm = vm_create_barebones(); + + /* Get KVM_CAP_MAX_VCPU_ID cap supported in KVM */ + ret = vm_check_cap(vm, KVM_CAP_MAX_VCPU_ID); + + /* Try to set KVM_CAP_MAX_VCPU_ID beyond KVM cap */ + ret = __vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, ret + 1); + TEST_ASSERT(ret < 0, + "Setting KVM_CAP_MAX_VCPU_ID beyond KVM cap should fail"); + + /* Test BOOT_CPU_ID interaction (MAX_VCPU_ID cannot be lower) */ + if (kvm_has_cap(KVM_CAP_SET_BOOT_CPU_ID)) { + vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)MAX_VCPU_ID); + + /* Try setting KVM_CAP_MAX_VCPU_ID below BOOT_CPU_ID */ + ret = __vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, MAX_VCPU_ID - 1); + TEST_ASSERT(ret < 0, + "Setting KVM_CAP_MAX_VCPU_ID below BOOT_CPU_ID should fail"); + } + + /* Set KVM_CAP_MAX_VCPU_ID */ + vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, MAX_VCPU_ID); + + /* Try to set KVM_CAP_MAX_VCPU_ID again */ + ret = __vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, MAX_VCPU_ID + 1); + TEST_ASSERT(ret < 0, + "Setting KVM_CAP_MAX_VCPU_ID multiple times should fail"); + + /* Create vCPU with id beyond KVM_CAP_MAX_VCPU_ID cap */ + ret = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)MAX_VCPU_ID); + TEST_ASSERT(ret < 0, "Creating vCPU with ID > MAX_VCPU_ID should fail"); + + /* Create vCPU with bits 63:32 != 0, but an otherwise valid id */ + ret = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)(1L << 32)); + TEST_ASSERT(ret < 0, "Creating vCPU with ID[63:32] != 0 should fail"); + + /* Create vCPU with id within bounds */ + ret = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)0); + TEST_ASSERT(ret >= 0, "Creating vCPU with ID 0 should succeed"); + + close(ret); + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/x86/monitor_mwait_test.c b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c new file mode 100644 index 000000000000..2b550eff35f1 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c @@ -0,0 +1,129 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include + +#include "kvm_util.h" +#include "processor.h" + +#define CPUID_MWAIT (1u << 3) + +enum monitor_mwait_testcases { + MWAIT_QUIRK_DISABLED = BIT(0), + MISC_ENABLES_QUIRK_DISABLED = BIT(1), + MWAIT_DISABLED = BIT(2), +}; + +/* + * If both MWAIT and its quirk are disabled, MONITOR/MWAIT should #UD, in all + * other scenarios KVM should emulate them as nops. + */ +#define GUEST_ASSERT_MONITOR_MWAIT(insn, testcase, vector) \ +do { \ + bool fault_wanted = ((testcase) & MWAIT_QUIRK_DISABLED) && \ + ((testcase) & MWAIT_DISABLED); \ + \ + if (fault_wanted) \ + __GUEST_ASSERT((vector) == UD_VECTOR, \ + "Expected #UD on " insn " for testcase '0x%x', got '0x%x'", \ + testcase, vector); \ + else \ + __GUEST_ASSERT(!(vector), \ + "Expected success on " insn " for testcase '0x%x', got '0x%x'", \ + testcase, vector); \ +} while (0) + +static void guest_monitor_wait(int testcase) +{ + u8 vector; + + GUEST_SYNC(testcase); + + /* + * Arbitrarily MONITOR this function, SVM performs fault checks before + * intercept checks, so the inputs for MONITOR and MWAIT must be valid. + */ + vector = kvm_asm_safe("monitor", "a"(guest_monitor_wait), "c"(0), "d"(0)); + GUEST_ASSERT_MONITOR_MWAIT("MONITOR", testcase, vector); + + vector = kvm_asm_safe("mwait", "a"(guest_monitor_wait), "c"(0), "d"(0)); + GUEST_ASSERT_MONITOR_MWAIT("MWAIT", testcase, vector); +} + +static void guest_code(void) +{ + guest_monitor_wait(MWAIT_DISABLED); + + guest_monitor_wait(MWAIT_QUIRK_DISABLED | MWAIT_DISABLED); + + guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_DISABLED); + guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED); + + guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_QUIRK_DISABLED | MWAIT_DISABLED); + guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_QUIRK_DISABLED); + + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + uint64_t disabled_quirks; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + struct ucall uc; + int testcase; + + TEST_REQUIRE(this_cpu_has(X86_FEATURE_MWAIT)); + TEST_REQUIRE(kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2)); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_MWAIT); + + while (1) { + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_SYNC: + testcase = uc.args[1]; + break; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + goto done; + case UCALL_DONE: + goto done; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + goto done; + } + + disabled_quirks = 0; + if (testcase & MWAIT_QUIRK_DISABLED) + disabled_quirks |= KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS; + if (testcase & MISC_ENABLES_QUIRK_DISABLED) + disabled_quirks |= KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT; + vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, disabled_quirks); + + /* + * If the MISC_ENABLES quirk (KVM neglects to update CPUID to + * enable/disable MWAIT) is disabled, toggle the ENABLE_MWAIT + * bit in MISC_ENABLES accordingly. If the quirk is enabled, + * the only valid configuration is MWAIT disabled, as CPUID + * can't be manually changed after running the vCPU. + */ + if (!(testcase & MISC_ENABLES_QUIRK_DISABLED)) { + TEST_ASSERT(testcase & MWAIT_DISABLED, + "Can't toggle CPUID features after running vCPU"); + continue; + } + + vcpu_set_msr(vcpu, MSR_IA32_MISC_ENABLE, + (testcase & MWAIT_DISABLED) ? 0 : MSR_IA32_MISC_ENABLE_MWAIT); + } + +done: + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/x86/nested_exceptions_test.c b/tools/testing/selftests/kvm/x86/nested_exceptions_test.c new file mode 100644 index 000000000000..3eb0313ffa39 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/nested_exceptions_test.c @@ -0,0 +1,288 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "vmx.h" +#include "svm_util.h" + +#define L2_GUEST_STACK_SIZE 256 + +/* + * Arbitrary, never shoved into KVM/hardware, just need to avoid conflict with + * the "real" exceptions used, #SS/#GP/#DF (12/13/8). + */ +#define FAKE_TRIPLE_FAULT_VECTOR 0xaa + +/* Arbitrary 32-bit error code injected by this test. */ +#define SS_ERROR_CODE 0xdeadbeef + +/* + * Bit '0' is set on Intel if the exception occurs while delivering a previous + * event/exception. AMD's wording is ambiguous, but presumably the bit is set + * if the exception occurs while delivering an external event, e.g. NMI or INTR, + * but not for exceptions that occur when delivering other exceptions or + * software interrupts. + * + * Note, Intel's name for it, "External event", is misleading and much more + * aligned with AMD's behavior, but the SDM is quite clear on its behavior. + */ +#define ERROR_CODE_EXT_FLAG BIT(0) + +/* + * Bit '1' is set if the fault occurred when looking up a descriptor in the + * IDT, which is the case here as the IDT is empty/NULL. + */ +#define ERROR_CODE_IDT_FLAG BIT(1) + +/* + * The #GP that occurs when vectoring #SS should show the index into the IDT + * for #SS, plus have the "IDT flag" set. + */ +#define GP_ERROR_CODE_AMD ((SS_VECTOR * 8) | ERROR_CODE_IDT_FLAG) +#define GP_ERROR_CODE_INTEL ((SS_VECTOR * 8) | ERROR_CODE_IDT_FLAG | ERROR_CODE_EXT_FLAG) + +/* + * Intel and AMD both shove '0' into the error code on #DF, regardless of what + * led to the double fault. + */ +#define DF_ERROR_CODE 0 + +#define INTERCEPT_SS (BIT_ULL(SS_VECTOR)) +#define INTERCEPT_SS_DF (INTERCEPT_SS | BIT_ULL(DF_VECTOR)) +#define INTERCEPT_SS_GP_DF (INTERCEPT_SS_DF | BIT_ULL(GP_VECTOR)) + +static void l2_ss_pending_test(void) +{ + GUEST_SYNC(SS_VECTOR); +} + +static void l2_ss_injected_gp_test(void) +{ + GUEST_SYNC(GP_VECTOR); +} + +static void l2_ss_injected_df_test(void) +{ + GUEST_SYNC(DF_VECTOR); +} + +static void l2_ss_injected_tf_test(void) +{ + GUEST_SYNC(FAKE_TRIPLE_FAULT_VECTOR); +} + +static void svm_run_l2(struct svm_test_data *svm, void *l2_code, int vector, + uint32_t error_code) +{ + struct vmcb *vmcb = svm->vmcb; + struct vmcb_control_area *ctrl = &vmcb->control; + + vmcb->save.rip = (u64)l2_code; + run_guest(vmcb, svm->vmcb_gpa); + + if (vector == FAKE_TRIPLE_FAULT_VECTOR) + return; + + GUEST_ASSERT_EQ(ctrl->exit_code, (SVM_EXIT_EXCP_BASE + vector)); + GUEST_ASSERT_EQ(ctrl->exit_info_1, error_code); +} + +static void l1_svm_code(struct svm_test_data *svm) +{ + struct vmcb_control_area *ctrl = &svm->vmcb->control; + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + + generic_svm_setup(svm, NULL, &l2_guest_stack[L2_GUEST_STACK_SIZE]); + svm->vmcb->save.idtr.limit = 0; + ctrl->intercept |= BIT_ULL(INTERCEPT_SHUTDOWN); + + ctrl->intercept_exceptions = INTERCEPT_SS_GP_DF; + svm_run_l2(svm, l2_ss_pending_test, SS_VECTOR, SS_ERROR_CODE); + svm_run_l2(svm, l2_ss_injected_gp_test, GP_VECTOR, GP_ERROR_CODE_AMD); + + ctrl->intercept_exceptions = INTERCEPT_SS_DF; + svm_run_l2(svm, l2_ss_injected_df_test, DF_VECTOR, DF_ERROR_CODE); + + ctrl->intercept_exceptions = INTERCEPT_SS; + svm_run_l2(svm, l2_ss_injected_tf_test, FAKE_TRIPLE_FAULT_VECTOR, 0); + GUEST_ASSERT_EQ(ctrl->exit_code, SVM_EXIT_SHUTDOWN); + + GUEST_DONE(); +} + +static void vmx_run_l2(void *l2_code, int vector, uint32_t error_code) +{ + GUEST_ASSERT(!vmwrite(GUEST_RIP, (u64)l2_code)); + + GUEST_ASSERT_EQ(vector == SS_VECTOR ? vmlaunch() : vmresume(), 0); + + if (vector == FAKE_TRIPLE_FAULT_VECTOR) + return; + + GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_EXCEPTION_NMI); + GUEST_ASSERT_EQ((vmreadz(VM_EXIT_INTR_INFO) & 0xff), vector); + GUEST_ASSERT_EQ(vmreadz(VM_EXIT_INTR_ERROR_CODE), error_code); +} + +static void l1_vmx_code(struct vmx_pages *vmx) +{ + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + + GUEST_ASSERT_EQ(prepare_for_vmx_operation(vmx), true); + + GUEST_ASSERT_EQ(load_vmcs(vmx), true); + + prepare_vmcs(vmx, NULL, &l2_guest_stack[L2_GUEST_STACK_SIZE]); + GUEST_ASSERT_EQ(vmwrite(GUEST_IDTR_LIMIT, 0), 0); + + /* + * VMX disallows injecting an exception with error_code[31:16] != 0, + * and hardware will never generate a VM-Exit with bits 31:16 set. + * KVM should likewise truncate the "bad" userspace value. + */ + GUEST_ASSERT_EQ(vmwrite(EXCEPTION_BITMAP, INTERCEPT_SS_GP_DF), 0); + vmx_run_l2(l2_ss_pending_test, SS_VECTOR, (u16)SS_ERROR_CODE); + vmx_run_l2(l2_ss_injected_gp_test, GP_VECTOR, GP_ERROR_CODE_INTEL); + + GUEST_ASSERT_EQ(vmwrite(EXCEPTION_BITMAP, INTERCEPT_SS_DF), 0); + vmx_run_l2(l2_ss_injected_df_test, DF_VECTOR, DF_ERROR_CODE); + + GUEST_ASSERT_EQ(vmwrite(EXCEPTION_BITMAP, INTERCEPT_SS), 0); + vmx_run_l2(l2_ss_injected_tf_test, FAKE_TRIPLE_FAULT_VECTOR, 0); + GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_TRIPLE_FAULT); + + GUEST_DONE(); +} + +static void __attribute__((__flatten__)) l1_guest_code(void *test_data) +{ + if (this_cpu_has(X86_FEATURE_SVM)) + l1_svm_code(test_data); + else + l1_vmx_code(test_data); +} + +static void assert_ucall_vector(struct kvm_vcpu *vcpu, int vector) +{ + struct ucall uc; + + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_SYNC: + TEST_ASSERT(vector == uc.args[1], + "Expected L2 to ask for %d, got %ld", vector, uc.args[1]); + break; + case UCALL_DONE: + TEST_ASSERT(vector == -1, + "Expected L2 to ask for %d, L2 says it's done", vector); + break; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + default: + TEST_FAIL("Expected L2 to ask for %d, got unexpected ucall %lu", vector, uc.cmd); + } +} + +static void queue_ss_exception(struct kvm_vcpu *vcpu, bool inject) +{ + struct kvm_vcpu_events events; + + vcpu_events_get(vcpu, &events); + + TEST_ASSERT(!events.exception.pending, + "Vector %d unexpectedlt pending", events.exception.nr); + TEST_ASSERT(!events.exception.injected, + "Vector %d unexpectedly injected", events.exception.nr); + + events.flags = KVM_VCPUEVENT_VALID_PAYLOAD; + events.exception.pending = !inject; + events.exception.injected = inject; + events.exception.nr = SS_VECTOR; + events.exception.has_error_code = true; + events.exception.error_code = SS_ERROR_CODE; + vcpu_events_set(vcpu, &events); +} + +/* + * Verify KVM_{G,S}ET_EVENTS play nice with pending vs. injected exceptions + * when an exception is being queued for L2. Specifically, verify that KVM + * honors L1 exception intercept controls when a #SS is pending/injected, + * triggers a #GP on vectoring the #SS, morphs to #DF if #GP isn't intercepted + * by L1, and finally causes (nested) SHUTDOWN if #DF isn't intercepted by L1. + */ +int main(int argc, char *argv[]) +{ + vm_vaddr_t nested_test_data_gva; + struct kvm_vcpu_events events; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_EXCEPTION_PAYLOAD)); + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM) || kvm_cpu_has(X86_FEATURE_VMX)); + + vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); + vm_enable_cap(vm, KVM_CAP_EXCEPTION_PAYLOAD, -2ul); + + if (kvm_cpu_has(X86_FEATURE_SVM)) + vcpu_alloc_svm(vm, &nested_test_data_gva); + else + vcpu_alloc_vmx(vm, &nested_test_data_gva); + + vcpu_args_set(vcpu, 1, nested_test_data_gva); + + /* Run L1 => L2. L2 should sync and request #SS. */ + vcpu_run(vcpu); + assert_ucall_vector(vcpu, SS_VECTOR); + + /* Pend #SS and request immediate exit. #SS should still be pending. */ + queue_ss_exception(vcpu, false); + vcpu->run->immediate_exit = true; + vcpu_run_complete_io(vcpu); + + /* Verify the pending events comes back out the same as it went in. */ + vcpu_events_get(vcpu, &events); + TEST_ASSERT_EQ(events.flags & KVM_VCPUEVENT_VALID_PAYLOAD, + KVM_VCPUEVENT_VALID_PAYLOAD); + TEST_ASSERT_EQ(events.exception.pending, true); + TEST_ASSERT_EQ(events.exception.nr, SS_VECTOR); + TEST_ASSERT_EQ(events.exception.has_error_code, true); + TEST_ASSERT_EQ(events.exception.error_code, SS_ERROR_CODE); + + /* + * Run for real with the pending #SS, L1 should get a VM-Exit due to + * #SS interception and re-enter L2 to request #GP (via injected #SS). + */ + vcpu->run->immediate_exit = false; + vcpu_run(vcpu); + assert_ucall_vector(vcpu, GP_VECTOR); + + /* + * Inject #SS, the #SS should bypass interception and cause #GP, which + * L1 should intercept before KVM morphs it to #DF. L1 should then + * disable #GP interception and run L2 to request #DF (via #SS => #GP). + */ + queue_ss_exception(vcpu, true); + vcpu_run(vcpu); + assert_ucall_vector(vcpu, DF_VECTOR); + + /* + * Inject #SS, the #SS should bypass interception and cause #GP, which + * L1 is no longer interception, and so should see a #DF VM-Exit. L1 + * should then signal that is done. + */ + queue_ss_exception(vcpu, true); + vcpu_run(vcpu); + assert_ucall_vector(vcpu, FAKE_TRIPLE_FAULT_VECTOR); + + /* + * Inject #SS yet again. L1 is not intercepting #GP or #DF, and so + * should see nested TRIPLE_FAULT / SHUTDOWN. + */ + queue_ss_exception(vcpu, true); + vcpu_run(vcpu); + assert_ucall_vector(vcpu, -1); + + kvm_vm_free(vm); +} diff --git a/tools/testing/selftests/kvm/x86/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86/nx_huge_pages_test.c new file mode 100644 index 000000000000..e7efb2b35f8b --- /dev/null +++ b/tools/testing/selftests/kvm/x86/nx_huge_pages_test.c @@ -0,0 +1,266 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Usage: to be run via nx_huge_page_test.sh, which does the necessary + * environment setup and teardown + * + * Copyright (C) 2022, Google LLC. + */ +#include +#include +#include + +#include +#include "kvm_util.h" +#include "processor.h" + +#define HPAGE_SLOT 10 +#define HPAGE_GPA (4UL << 30) /* 4G prevents collision w/ slot 0 */ +#define HPAGE_GVA HPAGE_GPA /* GVA is arbitrary, so use GPA. */ +#define PAGES_PER_2MB_HUGE_PAGE 512 +#define HPAGE_SLOT_NPAGES (3 * PAGES_PER_2MB_HUGE_PAGE) + +/* + * Passed by nx_huge_pages_test.sh to provide an easy warning if this test is + * being run without it. + */ +#define MAGIC_TOKEN 887563923 + +/* + * x86 opcode for the return instruction. Used to call into, and then + * immediately return from, memory backed with hugepages. + */ +#define RETURN_OPCODE 0xC3 + +/* Call the specified memory address. */ +static void guest_do_CALL(uint64_t target) +{ + ((void (*)(void)) target)(); +} + +/* + * Exit the VM after each memory access so that the userspace component of the + * test can make assertions about the pages backing the VM. + * + * See the below for an explanation of how each access should affect the + * backing mappings. + */ +void guest_code(void) +{ + uint64_t hpage_1 = HPAGE_GVA; + uint64_t hpage_2 = hpage_1 + (PAGE_SIZE * 512); + uint64_t hpage_3 = hpage_2 + (PAGE_SIZE * 512); + + READ_ONCE(*(uint64_t *)hpage_1); + GUEST_SYNC(1); + + READ_ONCE(*(uint64_t *)hpage_2); + GUEST_SYNC(2); + + guest_do_CALL(hpage_1); + GUEST_SYNC(3); + + guest_do_CALL(hpage_3); + GUEST_SYNC(4); + + READ_ONCE(*(uint64_t *)hpage_1); + GUEST_SYNC(5); + + READ_ONCE(*(uint64_t *)hpage_3); + GUEST_SYNC(6); +} + +static void check_2m_page_count(struct kvm_vm *vm, int expected_pages_2m) +{ + int actual_pages_2m; + + actual_pages_2m = vm_get_stat(vm, "pages_2m"); + + TEST_ASSERT(actual_pages_2m == expected_pages_2m, + "Unexpected 2m page count. Expected %d, got %d", + expected_pages_2m, actual_pages_2m); +} + +static void check_split_count(struct kvm_vm *vm, int expected_splits) +{ + int actual_splits; + + actual_splits = vm_get_stat(vm, "nx_lpage_splits"); + + TEST_ASSERT(actual_splits == expected_splits, + "Unexpected NX huge page split count. Expected %d, got %d", + expected_splits, actual_splits); +} + +static void wait_for_reclaim(int reclaim_period_ms) +{ + long reclaim_wait_ms; + struct timespec ts; + + reclaim_wait_ms = reclaim_period_ms * 5; + ts.tv_sec = reclaim_wait_ms / 1000; + ts.tv_nsec = (reclaim_wait_ms - (ts.tv_sec * 1000)) * 1000000; + nanosleep(&ts, NULL); +} + +void run_test(int reclaim_period_ms, bool disable_nx_huge_pages, + bool reboot_permissions) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + uint64_t nr_bytes; + void *hva; + int r; + + vm = vm_create(1); + + if (disable_nx_huge_pages) { + r = __vm_disable_nx_huge_pages(vm); + if (reboot_permissions) { + TEST_ASSERT(!r, "Disabling NX huge pages should succeed if process has reboot permissions"); + } else { + TEST_ASSERT(r == -1 && errno == EPERM, + "This process should not have permission to disable NX huge pages"); + return; + } + } + + vcpu = vm_vcpu_add(vm, 0, guest_code); + + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS_HUGETLB, + HPAGE_GPA, HPAGE_SLOT, + HPAGE_SLOT_NPAGES, 0); + + nr_bytes = HPAGE_SLOT_NPAGES * vm->page_size; + + /* + * Ensure that KVM can map HPAGE_SLOT with huge pages by mapping the + * region into the guest with 2MiB pages whenever TDP is disabled (i.e. + * whenever KVM is shadowing the guest page tables). + * + * When TDP is enabled, KVM should be able to map HPAGE_SLOT with huge + * pages irrespective of the guest page size, so map with 4KiB pages + * to test that that is the case. + */ + if (kvm_is_tdp_enabled()) + virt_map_level(vm, HPAGE_GVA, HPAGE_GPA, nr_bytes, PG_LEVEL_4K); + else + virt_map_level(vm, HPAGE_GVA, HPAGE_GPA, nr_bytes, PG_LEVEL_2M); + + hva = addr_gpa2hva(vm, HPAGE_GPA); + memset(hva, RETURN_OPCODE, nr_bytes); + + check_2m_page_count(vm, 0); + check_split_count(vm, 0); + + /* + * The guest code will first read from the first hugepage, resulting + * in a huge page mapping being created. + */ + vcpu_run(vcpu); + check_2m_page_count(vm, 1); + check_split_count(vm, 0); + + /* + * Then the guest code will read from the second hugepage, resulting + * in another huge page mapping being created. + */ + vcpu_run(vcpu); + check_2m_page_count(vm, 2); + check_split_count(vm, 0); + + /* + * Next, the guest will execute from the first huge page, causing it + * to be remapped at 4k. + * + * If NX huge pages are disabled, this should have no effect. + */ + vcpu_run(vcpu); + check_2m_page_count(vm, disable_nx_huge_pages ? 2 : 1); + check_split_count(vm, disable_nx_huge_pages ? 0 : 1); + + /* + * Executing from the third huge page (previously unaccessed) will + * cause part to be mapped at 4k. + * + * If NX huge pages are disabled, it should be mapped at 2M. + */ + vcpu_run(vcpu); + check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 1); + check_split_count(vm, disable_nx_huge_pages ? 0 : 2); + + /* Reading from the first huge page again should have no effect. */ + vcpu_run(vcpu); + check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 1); + check_split_count(vm, disable_nx_huge_pages ? 0 : 2); + + /* Give recovery thread time to run. */ + wait_for_reclaim(reclaim_period_ms); + + /* + * Now that the reclaimer has run, all the split pages should be gone. + * + * If NX huge pages are disabled, the relaimer will not run, so + * nothing should change from here on. + */ + check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 1); + check_split_count(vm, 0); + + /* + * The 4k mapping on hpage 3 should have been removed, so check that + * reading from it causes a huge page mapping to be installed. + */ + vcpu_run(vcpu); + check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 2); + check_split_count(vm, 0); + + kvm_vm_free(vm); +} + +static void help(char *name) +{ + puts(""); + printf("usage: %s [-h] [-p period_ms] [-t token]\n", name); + puts(""); + printf(" -p: The NX reclaim period in milliseconds.\n"); + printf(" -t: The magic token to indicate environment setup is done.\n"); + printf(" -r: The test has reboot permissions and can disable NX huge pages.\n"); + puts(""); + exit(0); +} + +int main(int argc, char **argv) +{ + int reclaim_period_ms = 0, token = 0, opt; + bool reboot_permissions = false; + + while ((opt = getopt(argc, argv, "hp:t:r")) != -1) { + switch (opt) { + case 'p': + reclaim_period_ms = atoi_positive("Reclaim period", optarg); + break; + case 't': + token = atoi_paranoid(optarg); + break; + case 'r': + reboot_permissions = true; + break; + case 'h': + default: + help(argv[0]); + break; + } + } + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_DISABLE_NX_HUGE_PAGES)); + + __TEST_REQUIRE(token == MAGIC_TOKEN, + "This test must be run with the magic token via '-t %d'.\n" + "Running via nx_huge_pages_test.sh, which also handles " + "environment setup, is strongly recommended.", MAGIC_TOKEN); + + run_test(reclaim_period_ms, false, reboot_permissions); + run_test(reclaim_period_ms, true, reboot_permissions); + + return 0; +} + diff --git a/tools/testing/selftests/kvm/x86/nx_huge_pages_test.sh b/tools/testing/selftests/kvm/x86/nx_huge_pages_test.sh new file mode 100755 index 000000000000..caad084b8bfd --- /dev/null +++ b/tools/testing/selftests/kvm/x86/nx_huge_pages_test.sh @@ -0,0 +1,69 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-only */ +# +# Wrapper script which performs setup and cleanup for nx_huge_pages_test. +# Makes use of root privileges to set up huge pages and KVM module parameters. +# +# Copyright (C) 2022, Google LLC. + +set -e + +NX_HUGE_PAGES=$(cat /sys/module/kvm/parameters/nx_huge_pages) +NX_HUGE_PAGES_RECOVERY_RATIO=$(cat /sys/module/kvm/parameters/nx_huge_pages_recovery_ratio) +NX_HUGE_PAGES_RECOVERY_PERIOD=$(cat /sys/module/kvm/parameters/nx_huge_pages_recovery_period_ms) +HUGE_PAGES=$(cat /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages) + +# If we're already root, the host might not have sudo. +if [ $(whoami) == "root" ]; then + function do_sudo () { + "$@" + } +else + function do_sudo () { + sudo "$@" + } +fi + +set +e + +function sudo_echo () { + echo "$1" | do_sudo tee -a "$2" > /dev/null +} + +NXECUTABLE="$(dirname $0)/nx_huge_pages_test" + +sudo_echo test /dev/null || exit 4 # KSFT_SKIP=4 + +( + set -e + + sudo_echo 1 /sys/module/kvm/parameters/nx_huge_pages + sudo_echo 1 /sys/module/kvm/parameters/nx_huge_pages_recovery_ratio + sudo_echo 100 /sys/module/kvm/parameters/nx_huge_pages_recovery_period_ms + sudo_echo "$(( $HUGE_PAGES + 3 ))" /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages + + # Test with reboot permissions + if [ $(whoami) == "root" ] || sudo setcap cap_sys_boot+ep $NXECUTABLE 2> /dev/null; then + echo Running test with CAP_SYS_BOOT enabled + $NXECUTABLE -t 887563923 -p 100 -r + test $(whoami) == "root" || sudo setcap cap_sys_boot-ep $NXECUTABLE + else + echo setcap failed, skipping nx_huge_pages_test with CAP_SYS_BOOT enabled + fi + + # Test without reboot permissions + if [ $(whoami) != "root" ] ; then + echo Running test with CAP_SYS_BOOT disabled + $NXECUTABLE -t 887563923 -p 100 + else + echo Running as root, skipping nx_huge_pages_test with CAP_SYS_BOOT disabled + fi +) +RET=$? + +sudo_echo "$NX_HUGE_PAGES" /sys/module/kvm/parameters/nx_huge_pages +sudo_echo "$NX_HUGE_PAGES_RECOVERY_RATIO" /sys/module/kvm/parameters/nx_huge_pages_recovery_ratio +sudo_echo "$NX_HUGE_PAGES_RECOVERY_PERIOD" /sys/module/kvm/parameters/nx_huge_pages_recovery_period_ms +sudo_echo "$HUGE_PAGES" /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages + +exit $RET diff --git a/tools/testing/selftests/kvm/x86/platform_info_test.c b/tools/testing/selftests/kvm/x86/platform_info_test.c new file mode 100644 index 000000000000..9cbf283ebc55 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/platform_info_test.c @@ -0,0 +1,78 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test for x86 KVM_CAP_MSR_PLATFORM_INFO + * + * Copyright (C) 2018, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + * + * Verifies expected behavior of controlling guest access to + * MSR_PLATFORM_INFO. + */ +#include +#include +#include +#include +#include + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" + +#define MSR_PLATFORM_INFO_MAX_TURBO_RATIO 0xff00 + +static void guest_code(void) +{ + uint64_t msr_platform_info; + uint8_t vector; + + GUEST_SYNC(true); + msr_platform_info = rdmsr(MSR_PLATFORM_INFO); + GUEST_ASSERT_EQ(msr_platform_info & MSR_PLATFORM_INFO_MAX_TURBO_RATIO, + MSR_PLATFORM_INFO_MAX_TURBO_RATIO); + + GUEST_SYNC(false); + vector = rdmsr_safe(MSR_PLATFORM_INFO, &msr_platform_info); + GUEST_ASSERT_EQ(vector, GP_VECTOR); + + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + uint64_t msr_platform_info; + struct ucall uc; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_MSR_PLATFORM_INFO)); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + msr_platform_info = vcpu_get_msr(vcpu, MSR_PLATFORM_INFO); + vcpu_set_msr(vcpu, MSR_PLATFORM_INFO, + msr_platform_info | MSR_PLATFORM_INFO_MAX_TURBO_RATIO); + + for (;;) { + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_SYNC: + vm_enable_cap(vm, KVM_CAP_MSR_PLATFORM_INFO, uc.args[1]); + break; + case UCALL_DONE: + goto done; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + default: + TEST_FAIL("Unexpected ucall %lu", uc.cmd); + break; + } + } + +done: + kvm_vm_free(vm); + + return 0; +} diff --git a/tools/testing/selftests/kvm/x86/pmu_counters_test.c b/tools/testing/selftests/kvm/x86/pmu_counters_test.c new file mode 100644 index 000000000000..698cb36989db --- /dev/null +++ b/tools/testing/selftests/kvm/x86/pmu_counters_test.c @@ -0,0 +1,644 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2023, Tencent, Inc. + */ +#include + +#include "pmu.h" +#include "processor.h" + +/* Number of iterations of the loop for the guest measurement payload. */ +#define NUM_LOOPS 10 + +/* Each iteration of the loop retires one branch instruction. */ +#define NUM_BRANCH_INSNS_RETIRED (NUM_LOOPS) + +/* + * Number of instructions in each loop. 1 CLFLUSH/CLFLUSHOPT/NOP, 1 MFENCE, + * 1 LOOP. + */ +#define NUM_INSNS_PER_LOOP 3 + +/* + * Number of "extra" instructions that will be counted, i.e. the number of + * instructions that are needed to set up the loop and then disable the + * counter. 2 MOV, 2 XOR, 1 WRMSR. + */ +#define NUM_EXTRA_INSNS 5 + +/* Total number of instructions retired within the measured section. */ +#define NUM_INSNS_RETIRED (NUM_LOOPS * NUM_INSNS_PER_LOOP + NUM_EXTRA_INSNS) + + +static uint8_t kvm_pmu_version; +static bool kvm_has_perf_caps; + +static struct kvm_vm *pmu_vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, + void *guest_code, + uint8_t pmu_version, + uint64_t perf_capabilities) +{ + struct kvm_vm *vm; + + vm = vm_create_with_one_vcpu(vcpu, guest_code); + sync_global_to_guest(vm, kvm_pmu_version); + + /* + * Set PERF_CAPABILITIES before PMU version as KVM disallows enabling + * features via PERF_CAPABILITIES if the guest doesn't have a vPMU. + */ + if (kvm_has_perf_caps) + vcpu_set_msr(*vcpu, MSR_IA32_PERF_CAPABILITIES, perf_capabilities); + + vcpu_set_cpuid_property(*vcpu, X86_PROPERTY_PMU_VERSION, pmu_version); + return vm; +} + +static void run_vcpu(struct kvm_vcpu *vcpu) +{ + struct ucall uc; + + do { + vcpu_run(vcpu); + switch (get_ucall(vcpu, &uc)) { + case UCALL_SYNC: + break; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + case UCALL_PRINTF: + pr_info("%s", uc.buffer); + break; + case UCALL_DONE: + break; + default: + TEST_FAIL("Unexpected ucall: %lu", uc.cmd); + } + } while (uc.cmd != UCALL_DONE); +} + +static uint8_t guest_get_pmu_version(void) +{ + /* + * Return the effective PMU version, i.e. the minimum between what KVM + * supports and what is enumerated to the guest. The host deliberately + * advertises a PMU version to the guest beyond what is actually + * supported by KVM to verify KVM doesn't freak out and do something + * bizarre with an architecturally valid, but unsupported, version. + */ + return min_t(uint8_t, kvm_pmu_version, this_cpu_property(X86_PROPERTY_PMU_VERSION)); +} + +/* + * If an architectural event is supported and guaranteed to generate at least + * one "hit, assert that its count is non-zero. If an event isn't supported or + * the test can't guarantee the associated action will occur, then all bets are + * off regarding the count, i.e. no checks can be done. + * + * Sanity check that in all cases, the event doesn't count when it's disabled, + * and that KVM correctly emulates the write of an arbitrary value. + */ +static void guest_assert_event_count(uint8_t idx, + struct kvm_x86_pmu_feature event, + uint32_t pmc, uint32_t pmc_msr) +{ + uint64_t count; + + count = _rdpmc(pmc); + if (!this_pmu_has(event)) + goto sanity_checks; + + switch (idx) { + case INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX: + GUEST_ASSERT_EQ(count, NUM_INSNS_RETIRED); + break; + case INTEL_ARCH_BRANCHES_RETIRED_INDEX: + GUEST_ASSERT_EQ(count, NUM_BRANCH_INSNS_RETIRED); + break; + case INTEL_ARCH_LLC_REFERENCES_INDEX: + case INTEL_ARCH_LLC_MISSES_INDEX: + if (!this_cpu_has(X86_FEATURE_CLFLUSHOPT) && + !this_cpu_has(X86_FEATURE_CLFLUSH)) + break; + fallthrough; + case INTEL_ARCH_CPU_CYCLES_INDEX: + case INTEL_ARCH_REFERENCE_CYCLES_INDEX: + GUEST_ASSERT_NE(count, 0); + break; + case INTEL_ARCH_TOPDOWN_SLOTS_INDEX: + GUEST_ASSERT(count >= NUM_INSNS_RETIRED); + break; + default: + break; + } + +sanity_checks: + __asm__ __volatile__("loop ." : "+c"((int){NUM_LOOPS})); + GUEST_ASSERT_EQ(_rdpmc(pmc), count); + + wrmsr(pmc_msr, 0xdead); + GUEST_ASSERT_EQ(_rdpmc(pmc), 0xdead); +} + +/* + * Enable and disable the PMC in a monolithic asm blob to ensure that the + * compiler can't insert _any_ code into the measured sequence. Note, ECX + * doesn't need to be clobbered as the input value, @pmc_msr, is restored + * before the end of the sequence. + * + * If CLFUSH{,OPT} is supported, flush the cacheline containing (at least) the + * CLFUSH{,OPT} instruction on each loop iteration to force LLC references and + * misses, i.e. to allow testing that those events actually count. + * + * If forced emulation is enabled (and specified), force emulation on a subset + * of the measured code to verify that KVM correctly emulates instructions and + * branches retired events in conjunction with hardware also counting said + * events. + */ +#define GUEST_MEASURE_EVENT(_msr, _value, clflush, FEP) \ +do { \ + __asm__ __volatile__("wrmsr\n\t" \ + " mov $" __stringify(NUM_LOOPS) ", %%ecx\n\t" \ + "1:\n\t" \ + clflush "\n\t" \ + "mfence\n\t" \ + FEP "loop 1b\n\t" \ + FEP "mov %%edi, %%ecx\n\t" \ + FEP "xor %%eax, %%eax\n\t" \ + FEP "xor %%edx, %%edx\n\t" \ + "wrmsr\n\t" \ + :: "a"((uint32_t)_value), "d"(_value >> 32), \ + "c"(_msr), "D"(_msr) \ + ); \ +} while (0) + +#define GUEST_TEST_EVENT(_idx, _event, _pmc, _pmc_msr, _ctrl_msr, _value, FEP) \ +do { \ + wrmsr(pmc_msr, 0); \ + \ + if (this_cpu_has(X86_FEATURE_CLFLUSHOPT)) \ + GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflushopt .", FEP); \ + else if (this_cpu_has(X86_FEATURE_CLFLUSH)) \ + GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflush .", FEP); \ + else \ + GUEST_MEASURE_EVENT(_ctrl_msr, _value, "nop", FEP); \ + \ + guest_assert_event_count(_idx, _event, _pmc, _pmc_msr); \ +} while (0) + +static void __guest_test_arch_event(uint8_t idx, struct kvm_x86_pmu_feature event, + uint32_t pmc, uint32_t pmc_msr, + uint32_t ctrl_msr, uint64_t ctrl_msr_value) +{ + GUEST_TEST_EVENT(idx, event, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, ""); + + if (is_forced_emulation_enabled) + GUEST_TEST_EVENT(idx, event, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, KVM_FEP); +} + +#define X86_PMU_FEATURE_NULL \ +({ \ + struct kvm_x86_pmu_feature feature = {}; \ + \ + feature; \ +}) + +static bool pmu_is_null_feature(struct kvm_x86_pmu_feature event) +{ + return !(*(u64 *)&event); +} + +static void guest_test_arch_event(uint8_t idx) +{ + const struct { + struct kvm_x86_pmu_feature gp_event; + struct kvm_x86_pmu_feature fixed_event; + } intel_event_to_feature[] = { + [INTEL_ARCH_CPU_CYCLES_INDEX] = { X86_PMU_FEATURE_CPU_CYCLES, X86_PMU_FEATURE_CPU_CYCLES_FIXED }, + [INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX] = { X86_PMU_FEATURE_INSNS_RETIRED, X86_PMU_FEATURE_INSNS_RETIRED_FIXED }, + /* + * Note, the fixed counter for reference cycles is NOT the same + * as the general purpose architectural event. The fixed counter + * explicitly counts at the same frequency as the TSC, whereas + * the GP event counts at a fixed, but uarch specific, frequency. + * Bundle them here for simplicity. + */ + [INTEL_ARCH_REFERENCE_CYCLES_INDEX] = { X86_PMU_FEATURE_REFERENCE_CYCLES, X86_PMU_FEATURE_REFERENCE_TSC_CYCLES_FIXED }, + [INTEL_ARCH_LLC_REFERENCES_INDEX] = { X86_PMU_FEATURE_LLC_REFERENCES, X86_PMU_FEATURE_NULL }, + [INTEL_ARCH_LLC_MISSES_INDEX] = { X86_PMU_FEATURE_LLC_MISSES, X86_PMU_FEATURE_NULL }, + [INTEL_ARCH_BRANCHES_RETIRED_INDEX] = { X86_PMU_FEATURE_BRANCH_INSNS_RETIRED, X86_PMU_FEATURE_NULL }, + [INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX] = { X86_PMU_FEATURE_BRANCHES_MISPREDICTED, X86_PMU_FEATURE_NULL }, + [INTEL_ARCH_TOPDOWN_SLOTS_INDEX] = { X86_PMU_FEATURE_TOPDOWN_SLOTS, X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED }, + }; + + uint32_t nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS); + uint32_t pmu_version = guest_get_pmu_version(); + /* PERF_GLOBAL_CTRL exists only for Architectural PMU Version 2+. */ + bool guest_has_perf_global_ctrl = pmu_version >= 2; + struct kvm_x86_pmu_feature gp_event, fixed_event; + uint32_t base_pmc_msr; + unsigned int i; + + /* The host side shouldn't invoke this without a guest PMU. */ + GUEST_ASSERT(pmu_version); + + if (this_cpu_has(X86_FEATURE_PDCM) && + rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES) + base_pmc_msr = MSR_IA32_PMC0; + else + base_pmc_msr = MSR_IA32_PERFCTR0; + + gp_event = intel_event_to_feature[idx].gp_event; + GUEST_ASSERT_EQ(idx, gp_event.f.bit); + + GUEST_ASSERT(nr_gp_counters); + + for (i = 0; i < nr_gp_counters; i++) { + uint64_t eventsel = ARCH_PERFMON_EVENTSEL_OS | + ARCH_PERFMON_EVENTSEL_ENABLE | + intel_pmu_arch_events[idx]; + + wrmsr(MSR_P6_EVNTSEL0 + i, 0); + if (guest_has_perf_global_ctrl) + wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, BIT_ULL(i)); + + __guest_test_arch_event(idx, gp_event, i, base_pmc_msr + i, + MSR_P6_EVNTSEL0 + i, eventsel); + } + + if (!guest_has_perf_global_ctrl) + return; + + fixed_event = intel_event_to_feature[idx].fixed_event; + if (pmu_is_null_feature(fixed_event) || !this_pmu_has(fixed_event)) + return; + + i = fixed_event.f.bit; + + wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL)); + + __guest_test_arch_event(idx, fixed_event, i | INTEL_RDPMC_FIXED, + MSR_CORE_PERF_FIXED_CTR0 + i, + MSR_CORE_PERF_GLOBAL_CTRL, + FIXED_PMC_GLOBAL_CTRL_ENABLE(i)); +} + +static void guest_test_arch_events(void) +{ + uint8_t i; + + for (i = 0; i < NR_INTEL_ARCH_EVENTS; i++) + guest_test_arch_event(i); + + GUEST_DONE(); +} + +static void test_arch_events(uint8_t pmu_version, uint64_t perf_capabilities, + uint8_t length, uint8_t unavailable_mask) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + /* Testing arch events requires a vPMU (there are no negative tests). */ + if (!pmu_version) + return; + + vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_arch_events, + pmu_version, perf_capabilities); + + vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH, + length); + vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_EVENTS_MASK, + unavailable_mask); + + run_vcpu(vcpu); + + kvm_vm_free(vm); +} + +/* + * Limit testing to MSRs that are actually defined by Intel (in the SDM). MSRs + * that aren't defined counter MSRs *probably* don't exist, but there's no + * guarantee that currently undefined MSR indices won't be used for something + * other than PMCs in the future. + */ +#define MAX_NR_GP_COUNTERS 8 +#define MAX_NR_FIXED_COUNTERS 3 + +#define GUEST_ASSERT_PMC_MSR_ACCESS(insn, msr, expect_gp, vector) \ +__GUEST_ASSERT(expect_gp ? vector == GP_VECTOR : !vector, \ + "Expected %s on " #insn "(0x%x), got vector %u", \ + expect_gp ? "#GP" : "no fault", msr, vector) \ + +#define GUEST_ASSERT_PMC_VALUE(insn, msr, val, expected) \ + __GUEST_ASSERT(val == expected_val, \ + "Expected " #insn "(0x%x) to yield 0x%lx, got 0x%lx", \ + msr, expected_val, val); + +static void guest_test_rdpmc(uint32_t rdpmc_idx, bool expect_success, + uint64_t expected_val) +{ + uint8_t vector; + uint64_t val; + + vector = rdpmc_safe(rdpmc_idx, &val); + GUEST_ASSERT_PMC_MSR_ACCESS(RDPMC, rdpmc_idx, !expect_success, vector); + if (expect_success) + GUEST_ASSERT_PMC_VALUE(RDPMC, rdpmc_idx, val, expected_val); + + if (!is_forced_emulation_enabled) + return; + + vector = rdpmc_safe_fep(rdpmc_idx, &val); + GUEST_ASSERT_PMC_MSR_ACCESS(RDPMC, rdpmc_idx, !expect_success, vector); + if (expect_success) + GUEST_ASSERT_PMC_VALUE(RDPMC, rdpmc_idx, val, expected_val); +} + +static void guest_rd_wr_counters(uint32_t base_msr, uint8_t nr_possible_counters, + uint8_t nr_counters, uint32_t or_mask) +{ + const bool pmu_has_fast_mode = !guest_get_pmu_version(); + uint8_t i; + + for (i = 0; i < nr_possible_counters; i++) { + /* + * TODO: Test a value that validates full-width writes and the + * width of the counters. + */ + const uint64_t test_val = 0xffff; + const uint32_t msr = base_msr + i; + + /* + * Fixed counters are supported if the counter is less than the + * number of enumerated contiguous counters *or* the counter is + * explicitly enumerated in the supported counters mask. + */ + const bool expect_success = i < nr_counters || (or_mask & BIT(i)); + + /* + * KVM drops writes to MSR_P6_PERFCTR[0|1] if the counters are + * unsupported, i.e. doesn't #GP and reads back '0'. + */ + const uint64_t expected_val = expect_success ? test_val : 0; + const bool expect_gp = !expect_success && msr != MSR_P6_PERFCTR0 && + msr != MSR_P6_PERFCTR1; + uint32_t rdpmc_idx; + uint8_t vector; + uint64_t val; + + vector = wrmsr_safe(msr, test_val); + GUEST_ASSERT_PMC_MSR_ACCESS(WRMSR, msr, expect_gp, vector); + + vector = rdmsr_safe(msr, &val); + GUEST_ASSERT_PMC_MSR_ACCESS(RDMSR, msr, expect_gp, vector); + + /* On #GP, the result of RDMSR is undefined. */ + if (!expect_gp) + GUEST_ASSERT_PMC_VALUE(RDMSR, msr, val, expected_val); + + /* + * Redo the read tests with RDPMC, which has different indexing + * semantics and additional capabilities. + */ + rdpmc_idx = i; + if (base_msr == MSR_CORE_PERF_FIXED_CTR0) + rdpmc_idx |= INTEL_RDPMC_FIXED; + + guest_test_rdpmc(rdpmc_idx, expect_success, expected_val); + + /* + * KVM doesn't support non-architectural PMUs, i.e. it should + * impossible to have fast mode RDPMC. Verify that attempting + * to use fast RDPMC always #GPs. + */ + GUEST_ASSERT(!expect_success || !pmu_has_fast_mode); + rdpmc_idx |= INTEL_RDPMC_FAST; + guest_test_rdpmc(rdpmc_idx, false, -1ull); + + vector = wrmsr_safe(msr, 0); + GUEST_ASSERT_PMC_MSR_ACCESS(WRMSR, msr, expect_gp, vector); + } +} + +static void guest_test_gp_counters(void) +{ + uint8_t pmu_version = guest_get_pmu_version(); + uint8_t nr_gp_counters = 0; + uint32_t base_msr; + + if (pmu_version) + nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS); + + /* + * For v2+ PMUs, PERF_GLOBAL_CTRL's architectural post-RESET value is + * "Sets bits n-1:0 and clears the upper bits", where 'n' is the number + * of GP counters. If there are no GP counters, require KVM to leave + * PERF_GLOBAL_CTRL '0'. This edge case isn't covered by the SDM, but + * follow the spirit of the architecture and only globally enable GP + * counters, of which there are none. + */ + if (pmu_version > 1) { + uint64_t global_ctrl = rdmsr(MSR_CORE_PERF_GLOBAL_CTRL); + + if (nr_gp_counters) + GUEST_ASSERT_EQ(global_ctrl, GENMASK_ULL(nr_gp_counters - 1, 0)); + else + GUEST_ASSERT_EQ(global_ctrl, 0); + } + + if (this_cpu_has(X86_FEATURE_PDCM) && + rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES) + base_msr = MSR_IA32_PMC0; + else + base_msr = MSR_IA32_PERFCTR0; + + guest_rd_wr_counters(base_msr, MAX_NR_GP_COUNTERS, nr_gp_counters, 0); + GUEST_DONE(); +} + +static void test_gp_counters(uint8_t pmu_version, uint64_t perf_capabilities, + uint8_t nr_gp_counters) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_gp_counters, + pmu_version, perf_capabilities); + + vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_NR_GP_COUNTERS, + nr_gp_counters); + + run_vcpu(vcpu); + + kvm_vm_free(vm); +} + +static void guest_test_fixed_counters(void) +{ + uint64_t supported_bitmask = 0; + uint8_t nr_fixed_counters = 0; + uint8_t i; + + /* Fixed counters require Architectural vPMU Version 2+. */ + if (guest_get_pmu_version() >= 2) + nr_fixed_counters = this_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS); + + /* + * The supported bitmask for fixed counters was introduced in PMU + * version 5. + */ + if (guest_get_pmu_version() >= 5) + supported_bitmask = this_cpu_property(X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK); + + guest_rd_wr_counters(MSR_CORE_PERF_FIXED_CTR0, MAX_NR_FIXED_COUNTERS, + nr_fixed_counters, supported_bitmask); + + for (i = 0; i < MAX_NR_FIXED_COUNTERS; i++) { + uint8_t vector; + uint64_t val; + + if (i >= nr_fixed_counters && !(supported_bitmask & BIT_ULL(i))) { + vector = wrmsr_safe(MSR_CORE_PERF_FIXED_CTR_CTRL, + FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL)); + __GUEST_ASSERT(vector == GP_VECTOR, + "Expected #GP for counter %u in FIXED_CTR_CTRL", i); + + vector = wrmsr_safe(MSR_CORE_PERF_GLOBAL_CTRL, + FIXED_PMC_GLOBAL_CTRL_ENABLE(i)); + __GUEST_ASSERT(vector == GP_VECTOR, + "Expected #GP for counter %u in PERF_GLOBAL_CTRL", i); + continue; + } + + wrmsr(MSR_CORE_PERF_FIXED_CTR0 + i, 0); + wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL)); + wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, FIXED_PMC_GLOBAL_CTRL_ENABLE(i)); + __asm__ __volatile__("loop ." : "+c"((int){NUM_LOOPS})); + wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0); + val = rdmsr(MSR_CORE_PERF_FIXED_CTR0 + i); + + GUEST_ASSERT_NE(val, 0); + } + GUEST_DONE(); +} + +static void test_fixed_counters(uint8_t pmu_version, uint64_t perf_capabilities, + uint8_t nr_fixed_counters, + uint32_t supported_bitmask) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_fixed_counters, + pmu_version, perf_capabilities); + + vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK, + supported_bitmask); + vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_NR_FIXED_COUNTERS, + nr_fixed_counters); + + run_vcpu(vcpu); + + kvm_vm_free(vm); +} + +static void test_intel_counters(void) +{ + uint8_t nr_arch_events = kvm_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH); + uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS); + uint8_t nr_gp_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS); + uint8_t pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION); + unsigned int i; + uint8_t v, j; + uint32_t k; + + const uint64_t perf_caps[] = { + 0, + PMU_CAP_FW_WRITES, + }; + + /* + * Test up to PMU v5, which is the current maximum version defined by + * Intel, i.e. is the last version that is guaranteed to be backwards + * compatible with KVM's existing behavior. + */ + uint8_t max_pmu_version = max_t(typeof(pmu_version), pmu_version, 5); + + /* + * Detect the existence of events that aren't supported by selftests. + * This will (obviously) fail any time the kernel adds support for a + * new event, but it's worth paying that price to keep the test fresh. + */ + TEST_ASSERT(nr_arch_events <= NR_INTEL_ARCH_EVENTS, + "New architectural event(s) detected; please update this test (length = %u, mask = %x)", + nr_arch_events, kvm_cpu_property(X86_PROPERTY_PMU_EVENTS_MASK)); + + /* + * Force iterating over known arch events regardless of whether or not + * KVM/hardware supports a given event. + */ + nr_arch_events = max_t(typeof(nr_arch_events), nr_arch_events, NR_INTEL_ARCH_EVENTS); + + for (v = 0; v <= max_pmu_version; v++) { + for (i = 0; i < ARRAY_SIZE(perf_caps); i++) { + if (!kvm_has_perf_caps && perf_caps[i]) + continue; + + pr_info("Testing arch events, PMU version %u, perf_caps = %lx\n", + v, perf_caps[i]); + /* + * To keep the total runtime reasonable, test every + * possible non-zero, non-reserved bitmap combination + * only with the native PMU version and the full bit + * vector length. + */ + if (v == pmu_version) { + for (k = 1; k < (BIT(nr_arch_events) - 1); k++) + test_arch_events(v, perf_caps[i], nr_arch_events, k); + } + /* + * Test single bits for all PMU version and lengths up + * the number of events +1 (to verify KVM doesn't do + * weird things if the guest length is greater than the + * host length). Explicitly test a mask of '0' and all + * ones i.e. all events being available and unavailable. + */ + for (j = 0; j <= nr_arch_events + 1; j++) { + test_arch_events(v, perf_caps[i], j, 0); + test_arch_events(v, perf_caps[i], j, 0xff); + + for (k = 0; k < nr_arch_events; k++) + test_arch_events(v, perf_caps[i], j, BIT(k)); + } + + pr_info("Testing GP counters, PMU version %u, perf_caps = %lx\n", + v, perf_caps[i]); + for (j = 0; j <= nr_gp_counters; j++) + test_gp_counters(v, perf_caps[i], j); + + pr_info("Testing fixed counters, PMU version %u, perf_caps = %lx\n", + v, perf_caps[i]); + for (j = 0; j <= nr_fixed_counters; j++) { + for (k = 0; k <= (BIT(nr_fixed_counters) - 1); k++) + test_fixed_counters(v, perf_caps[i], j, k); + } + } + } +} + +int main(int argc, char *argv[]) +{ + TEST_REQUIRE(kvm_is_pmu_enabled()); + + TEST_REQUIRE(host_cpu_is_intel); + TEST_REQUIRE(kvm_cpu_has_p(X86_PROPERTY_PMU_VERSION)); + TEST_REQUIRE(kvm_cpu_property(X86_PROPERTY_PMU_VERSION) > 0); + + kvm_pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION); + kvm_has_perf_caps = kvm_cpu_has(X86_FEATURE_PDCM); + + test_intel_counters(); + + return 0; +} diff --git a/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c new file mode 100644 index 000000000000..c15513cd74d1 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c @@ -0,0 +1,876 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test for x86 KVM_SET_PMU_EVENT_FILTER. + * + * Copyright (C) 2022, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + * + * Verifies the expected behavior of allow lists and deny lists for + * virtual PMU events. + */ +#include "kvm_util.h" +#include "pmu.h" +#include "processor.h" +#include "test_util.h" + +#define NUM_BRANCHES 42 +#define MAX_TEST_EVENTS 10 + +#define PMU_EVENT_FILTER_INVALID_ACTION (KVM_PMU_EVENT_DENY + 1) +#define PMU_EVENT_FILTER_INVALID_FLAGS (KVM_PMU_EVENT_FLAGS_VALID_MASK << 1) +#define PMU_EVENT_FILTER_INVALID_NEVENTS (KVM_PMU_EVENT_FILTER_MAX_EVENTS + 1) + +struct __kvm_pmu_event_filter { + __u32 action; + __u32 nevents; + __u32 fixed_counter_bitmap; + __u32 flags; + __u32 pad[4]; + __u64 events[KVM_PMU_EVENT_FILTER_MAX_EVENTS]; +}; + +/* + * This event list comprises Intel's known architectural events, plus AMD's + * Branch Instructions Retired for Zen CPUs. Note, AMD and Intel use the + * same encoding for Instructions Retired. + */ +kvm_static_assert(INTEL_ARCH_INSTRUCTIONS_RETIRED == AMD_ZEN_INSTRUCTIONS_RETIRED); + +static const struct __kvm_pmu_event_filter base_event_filter = { + .nevents = ARRAY_SIZE(base_event_filter.events), + .events = { + INTEL_ARCH_CPU_CYCLES, + INTEL_ARCH_INSTRUCTIONS_RETIRED, + INTEL_ARCH_REFERENCE_CYCLES, + INTEL_ARCH_LLC_REFERENCES, + INTEL_ARCH_LLC_MISSES, + INTEL_ARCH_BRANCHES_RETIRED, + INTEL_ARCH_BRANCHES_MISPREDICTED, + INTEL_ARCH_TOPDOWN_SLOTS, + AMD_ZEN_BRANCHES_RETIRED, + }, +}; + +struct { + uint64_t loads; + uint64_t stores; + uint64_t loads_stores; + uint64_t branches_retired; + uint64_t instructions_retired; +} pmc_results; + +/* + * If we encounter a #GP during the guest PMU sanity check, then the guest + * PMU is not functional. Inform the hypervisor via GUEST_SYNC(0). + */ +static void guest_gp_handler(struct ex_regs *regs) +{ + GUEST_SYNC(-EFAULT); +} + +/* + * Check that we can write a new value to the given MSR and read it back. + * The caller should provide a non-empty set of bits that are safe to flip. + * + * Return on success. GUEST_SYNC(0) on error. + */ +static void check_msr(uint32_t msr, uint64_t bits_to_flip) +{ + uint64_t v = rdmsr(msr) ^ bits_to_flip; + + wrmsr(msr, v); + if (rdmsr(msr) != v) + GUEST_SYNC(-EIO); + + v ^= bits_to_flip; + wrmsr(msr, v); + if (rdmsr(msr) != v) + GUEST_SYNC(-EIO); +} + +static void run_and_measure_loop(uint32_t msr_base) +{ + const uint64_t branches_retired = rdmsr(msr_base + 0); + const uint64_t insn_retired = rdmsr(msr_base + 1); + + __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES})); + + pmc_results.branches_retired = rdmsr(msr_base + 0) - branches_retired; + pmc_results.instructions_retired = rdmsr(msr_base + 1) - insn_retired; +} + +static void intel_guest_code(void) +{ + check_msr(MSR_CORE_PERF_GLOBAL_CTRL, 1); + check_msr(MSR_P6_EVNTSEL0, 0xffff); + check_msr(MSR_IA32_PMC0, 0xffff); + GUEST_SYNC(0); + + for (;;) { + wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0); + wrmsr(MSR_P6_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE | + ARCH_PERFMON_EVENTSEL_OS | INTEL_ARCH_BRANCHES_RETIRED); + wrmsr(MSR_P6_EVNTSEL1, ARCH_PERFMON_EVENTSEL_ENABLE | + ARCH_PERFMON_EVENTSEL_OS | INTEL_ARCH_INSTRUCTIONS_RETIRED); + wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0x3); + + run_and_measure_loop(MSR_IA32_PMC0); + GUEST_SYNC(0); + } +} + +/* + * To avoid needing a check for CPUID.80000001:ECX.PerfCtrExtCore[bit 23], + * this code uses the always-available, legacy K7 PMU MSRs, which alias to + * the first four of the six extended core PMU MSRs. + */ +static void amd_guest_code(void) +{ + check_msr(MSR_K7_EVNTSEL0, 0xffff); + check_msr(MSR_K7_PERFCTR0, 0xffff); + GUEST_SYNC(0); + + for (;;) { + wrmsr(MSR_K7_EVNTSEL0, 0); + wrmsr(MSR_K7_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE | + ARCH_PERFMON_EVENTSEL_OS | AMD_ZEN_BRANCHES_RETIRED); + wrmsr(MSR_K7_EVNTSEL1, ARCH_PERFMON_EVENTSEL_ENABLE | + ARCH_PERFMON_EVENTSEL_OS | AMD_ZEN_INSTRUCTIONS_RETIRED); + + run_and_measure_loop(MSR_K7_PERFCTR0); + GUEST_SYNC(0); + } +} + +/* + * Run the VM to the next GUEST_SYNC(value), and return the value passed + * to the sync. Any other exit from the guest is fatal. + */ +static uint64_t run_vcpu_to_sync(struct kvm_vcpu *vcpu) +{ + struct ucall uc; + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + get_ucall(vcpu, &uc); + TEST_ASSERT(uc.cmd == UCALL_SYNC, + "Received ucall other than UCALL_SYNC: %lu", uc.cmd); + return uc.args[1]; +} + +static void run_vcpu_and_sync_pmc_results(struct kvm_vcpu *vcpu) +{ + uint64_t r; + + memset(&pmc_results, 0, sizeof(pmc_results)); + sync_global_to_guest(vcpu->vm, pmc_results); + + r = run_vcpu_to_sync(vcpu); + TEST_ASSERT(!r, "Unexpected sync value: 0x%lx", r); + + sync_global_from_guest(vcpu->vm, pmc_results); +} + +/* + * In a nested environment or if the vPMU is disabled, the guest PMU + * might not work as architected (accessing the PMU MSRs may raise + * #GP, or writes could simply be discarded). In those situations, + * there is no point in running these tests. The guest code will perform + * a sanity check and then GUEST_SYNC(success). In the case of failure, + * the behavior of the guest on resumption is undefined. + */ +static bool sanity_check_pmu(struct kvm_vcpu *vcpu) +{ + uint64_t r; + + vm_install_exception_handler(vcpu->vm, GP_VECTOR, guest_gp_handler); + r = run_vcpu_to_sync(vcpu); + vm_install_exception_handler(vcpu->vm, GP_VECTOR, NULL); + + return !r; +} + +/* + * Remove the first occurrence of 'event' (if any) from the filter's + * event list. + */ +static void remove_event(struct __kvm_pmu_event_filter *f, uint64_t event) +{ + bool found = false; + int i; + + for (i = 0; i < f->nevents; i++) { + if (found) + f->events[i - 1] = f->events[i]; + else + found = f->events[i] == event; + } + if (found) + f->nevents--; +} + +#define ASSERT_PMC_COUNTING_INSTRUCTIONS() \ +do { \ + uint64_t br = pmc_results.branches_retired; \ + uint64_t ir = pmc_results.instructions_retired; \ + \ + if (br && br != NUM_BRANCHES) \ + pr_info("%s: Branch instructions retired = %lu (expected %u)\n", \ + __func__, br, NUM_BRANCHES); \ + TEST_ASSERT(br, "%s: Branch instructions retired = %lu (expected > 0)", \ + __func__, br); \ + TEST_ASSERT(ir, "%s: Instructions retired = %lu (expected > 0)", \ + __func__, ir); \ +} while (0) + +#define ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS() \ +do { \ + uint64_t br = pmc_results.branches_retired; \ + uint64_t ir = pmc_results.instructions_retired; \ + \ + TEST_ASSERT(!br, "%s: Branch instructions retired = %lu (expected 0)", \ + __func__, br); \ + TEST_ASSERT(!ir, "%s: Instructions retired = %lu (expected 0)", \ + __func__, ir); \ +} while (0) + +static void test_without_filter(struct kvm_vcpu *vcpu) +{ + run_vcpu_and_sync_pmc_results(vcpu); + + ASSERT_PMC_COUNTING_INSTRUCTIONS(); +} + +static void test_with_filter(struct kvm_vcpu *vcpu, + struct __kvm_pmu_event_filter *__f) +{ + struct kvm_pmu_event_filter *f = (void *)__f; + + vm_ioctl(vcpu->vm, KVM_SET_PMU_EVENT_FILTER, f); + run_vcpu_and_sync_pmc_results(vcpu); +} + +static void test_amd_deny_list(struct kvm_vcpu *vcpu) +{ + struct __kvm_pmu_event_filter f = { + .action = KVM_PMU_EVENT_DENY, + .nevents = 1, + .events = { + RAW_EVENT(0x1C2, 0), + }, + }; + + test_with_filter(vcpu, &f); + + ASSERT_PMC_COUNTING_INSTRUCTIONS(); +} + +static void test_member_deny_list(struct kvm_vcpu *vcpu) +{ + struct __kvm_pmu_event_filter f = base_event_filter; + + f.action = KVM_PMU_EVENT_DENY; + test_with_filter(vcpu, &f); + + ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS(); +} + +static void test_member_allow_list(struct kvm_vcpu *vcpu) +{ + struct __kvm_pmu_event_filter f = base_event_filter; + + f.action = KVM_PMU_EVENT_ALLOW; + test_with_filter(vcpu, &f); + + ASSERT_PMC_COUNTING_INSTRUCTIONS(); +} + +static void test_not_member_deny_list(struct kvm_vcpu *vcpu) +{ + struct __kvm_pmu_event_filter f = base_event_filter; + + f.action = KVM_PMU_EVENT_DENY; + + remove_event(&f, INTEL_ARCH_INSTRUCTIONS_RETIRED); + remove_event(&f, INTEL_ARCH_BRANCHES_RETIRED); + remove_event(&f, AMD_ZEN_BRANCHES_RETIRED); + test_with_filter(vcpu, &f); + + ASSERT_PMC_COUNTING_INSTRUCTIONS(); +} + +static void test_not_member_allow_list(struct kvm_vcpu *vcpu) +{ + struct __kvm_pmu_event_filter f = base_event_filter; + + f.action = KVM_PMU_EVENT_ALLOW; + + remove_event(&f, INTEL_ARCH_INSTRUCTIONS_RETIRED); + remove_event(&f, INTEL_ARCH_BRANCHES_RETIRED); + remove_event(&f, AMD_ZEN_BRANCHES_RETIRED); + test_with_filter(vcpu, &f); + + ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS(); +} + +/* + * Verify that setting KVM_PMU_CAP_DISABLE prevents the use of the PMU. + * + * Note that KVM_CAP_PMU_CAPABILITY must be invoked prior to creating VCPUs. + */ +static void test_pmu_config_disable(void (*guest_code)(void)) +{ + struct kvm_vcpu *vcpu; + int r; + struct kvm_vm *vm; + + r = kvm_check_cap(KVM_CAP_PMU_CAPABILITY); + if (!(r & KVM_PMU_CAP_DISABLE)) + return; + + vm = vm_create(1); + + vm_enable_cap(vm, KVM_CAP_PMU_CAPABILITY, KVM_PMU_CAP_DISABLE); + + vcpu = vm_vcpu_add(vm, 0, guest_code); + TEST_ASSERT(!sanity_check_pmu(vcpu), + "Guest should not be able to use disabled PMU."); + + kvm_vm_free(vm); +} + +/* + * On Intel, check for a non-zero PMU version, at least one general-purpose + * counter per logical processor, and support for counting the number of branch + * instructions retired. + */ +static bool use_intel_pmu(void) +{ + return host_cpu_is_intel && + kvm_cpu_property(X86_PROPERTY_PMU_VERSION) && + kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS) && + kvm_pmu_has(X86_PMU_FEATURE_BRANCH_INSNS_RETIRED); +} + +/* + * On AMD, all Family 17h+ CPUs (Zen and its successors) use event encoding + * 0xc2,0 for Branch Instructions Retired. + */ +static bool use_amd_pmu(void) +{ + return host_cpu_is_amd && kvm_cpu_family() >= 0x17; +} + +/* + * "MEM_INST_RETIRED.ALL_LOADS", "MEM_INST_RETIRED.ALL_STORES", and + * "MEM_INST_RETIRED.ANY" from https://perfmon-events.intel.com/ + * supported on Intel Xeon processors: + * - Sapphire Rapids, Ice Lake, Cascade Lake, Skylake. + */ +#define MEM_INST_RETIRED 0xD0 +#define MEM_INST_RETIRED_LOAD RAW_EVENT(MEM_INST_RETIRED, 0x81) +#define MEM_INST_RETIRED_STORE RAW_EVENT(MEM_INST_RETIRED, 0x82) +#define MEM_INST_RETIRED_LOAD_STORE RAW_EVENT(MEM_INST_RETIRED, 0x83) + +static bool supports_event_mem_inst_retired(void) +{ + uint32_t eax, ebx, ecx, edx; + + cpuid(1, &eax, &ebx, &ecx, &edx); + if (x86_family(eax) == 0x6) { + switch (x86_model(eax)) { + /* Sapphire Rapids */ + case 0x8F: + /* Ice Lake */ + case 0x6A: + /* Skylake */ + /* Cascade Lake */ + case 0x55: + return true; + } + } + + return false; +} + +/* + * "LS Dispatch", from Processor Programming Reference + * (PPR) for AMD Family 17h Model 01h, Revision B1 Processors, + * Preliminary Processor Programming Reference (PPR) for AMD Family + * 17h Model 31h, Revision B0 Processors, and Preliminary Processor + * Programming Reference (PPR) for AMD Family 19h Model 01h, Revision + * B1 Processors Volume 1 of 2. + */ +#define LS_DISPATCH 0x29 +#define LS_DISPATCH_LOAD RAW_EVENT(LS_DISPATCH, BIT(0)) +#define LS_DISPATCH_STORE RAW_EVENT(LS_DISPATCH, BIT(1)) +#define LS_DISPATCH_LOAD_STORE RAW_EVENT(LS_DISPATCH, BIT(2)) + +#define INCLUDE_MASKED_ENTRY(event_select, mask, match) \ + KVM_PMU_ENCODE_MASKED_ENTRY(event_select, mask, match, false) +#define EXCLUDE_MASKED_ENTRY(event_select, mask, match) \ + KVM_PMU_ENCODE_MASKED_ENTRY(event_select, mask, match, true) + +static void masked_events_guest_test(uint32_t msr_base) +{ + /* + * The actual value of the counters don't determine the outcome of + * the test. Only that they are zero or non-zero. + */ + const uint64_t loads = rdmsr(msr_base + 0); + const uint64_t stores = rdmsr(msr_base + 1); + const uint64_t loads_stores = rdmsr(msr_base + 2); + int val; + + + __asm__ __volatile__("movl $0, %[v];" + "movl %[v], %%eax;" + "incl %[v];" + : [v]"+m"(val) :: "eax"); + + pmc_results.loads = rdmsr(msr_base + 0) - loads; + pmc_results.stores = rdmsr(msr_base + 1) - stores; + pmc_results.loads_stores = rdmsr(msr_base + 2) - loads_stores; +} + +static void intel_masked_events_guest_code(void) +{ + for (;;) { + wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0); + + wrmsr(MSR_P6_EVNTSEL0 + 0, ARCH_PERFMON_EVENTSEL_ENABLE | + ARCH_PERFMON_EVENTSEL_OS | MEM_INST_RETIRED_LOAD); + wrmsr(MSR_P6_EVNTSEL0 + 1, ARCH_PERFMON_EVENTSEL_ENABLE | + ARCH_PERFMON_EVENTSEL_OS | MEM_INST_RETIRED_STORE); + wrmsr(MSR_P6_EVNTSEL0 + 2, ARCH_PERFMON_EVENTSEL_ENABLE | + ARCH_PERFMON_EVENTSEL_OS | MEM_INST_RETIRED_LOAD_STORE); + + wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0x7); + + masked_events_guest_test(MSR_IA32_PMC0); + GUEST_SYNC(0); + } +} + +static void amd_masked_events_guest_code(void) +{ + for (;;) { + wrmsr(MSR_K7_EVNTSEL0, 0); + wrmsr(MSR_K7_EVNTSEL1, 0); + wrmsr(MSR_K7_EVNTSEL2, 0); + + wrmsr(MSR_K7_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE | + ARCH_PERFMON_EVENTSEL_OS | LS_DISPATCH_LOAD); + wrmsr(MSR_K7_EVNTSEL1, ARCH_PERFMON_EVENTSEL_ENABLE | + ARCH_PERFMON_EVENTSEL_OS | LS_DISPATCH_STORE); + wrmsr(MSR_K7_EVNTSEL2, ARCH_PERFMON_EVENTSEL_ENABLE | + ARCH_PERFMON_EVENTSEL_OS | LS_DISPATCH_LOAD_STORE); + + masked_events_guest_test(MSR_K7_PERFCTR0); + GUEST_SYNC(0); + } +} + +static void run_masked_events_test(struct kvm_vcpu *vcpu, + const uint64_t masked_events[], + const int nmasked_events) +{ + struct __kvm_pmu_event_filter f = { + .nevents = nmasked_events, + .action = KVM_PMU_EVENT_ALLOW, + .flags = KVM_PMU_EVENT_FLAG_MASKED_EVENTS, + }; + + memcpy(f.events, masked_events, sizeof(uint64_t) * nmasked_events); + test_with_filter(vcpu, &f); +} + +#define ALLOW_LOADS BIT(0) +#define ALLOW_STORES BIT(1) +#define ALLOW_LOADS_STORES BIT(2) + +struct masked_events_test { + uint64_t intel_events[MAX_TEST_EVENTS]; + uint64_t intel_event_end; + uint64_t amd_events[MAX_TEST_EVENTS]; + uint64_t amd_event_end; + const char *msg; + uint32_t flags; +}; + +/* + * These are the test cases for the masked events tests. + * + * For each test, the guest enables 3 PMU counters (loads, stores, + * loads + stores). The filter is then set in KVM with the masked events + * provided. The test then verifies that the counters agree with which + * ones should be counting and which ones should be filtered. + */ +const struct masked_events_test test_cases[] = { + { + .intel_events = { + INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFF, 0x81), + }, + .amd_events = { + INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xFF, BIT(0)), + }, + .msg = "Only allow loads.", + .flags = ALLOW_LOADS, + }, { + .intel_events = { + INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFF, 0x82), + }, + .amd_events = { + INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xFF, BIT(1)), + }, + .msg = "Only allow stores.", + .flags = ALLOW_STORES, + }, { + .intel_events = { + INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFF, 0x83), + }, + .amd_events = { + INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xFF, BIT(2)), + }, + .msg = "Only allow loads + stores.", + .flags = ALLOW_LOADS_STORES, + }, { + .intel_events = { + INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0x7C, 0), + EXCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFF, 0x83), + }, + .amd_events = { + INCLUDE_MASKED_ENTRY(LS_DISPATCH, ~(BIT(0) | BIT(1)), 0), + }, + .msg = "Only allow loads and stores.", + .flags = ALLOW_LOADS | ALLOW_STORES, + }, { + .intel_events = { + INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0x7C, 0), + EXCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFF, 0x82), + }, + .amd_events = { + INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xF8, 0), + EXCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xFF, BIT(1)), + }, + .msg = "Only allow loads and loads + stores.", + .flags = ALLOW_LOADS | ALLOW_LOADS_STORES + }, { + .intel_events = { + INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFE, 0x82), + }, + .amd_events = { + INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xF8, 0), + EXCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xFF, BIT(0)), + }, + .msg = "Only allow stores and loads + stores.", + .flags = ALLOW_STORES | ALLOW_LOADS_STORES + }, { + .intel_events = { + INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0x7C, 0), + }, + .amd_events = { + INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xF8, 0), + }, + .msg = "Only allow loads, stores, and loads + stores.", + .flags = ALLOW_LOADS | ALLOW_STORES | ALLOW_LOADS_STORES + }, +}; + +static int append_test_events(const struct masked_events_test *test, + uint64_t *events, int nevents) +{ + const uint64_t *evts; + int i; + + evts = use_intel_pmu() ? test->intel_events : test->amd_events; + for (i = 0; i < MAX_TEST_EVENTS; i++) { + if (evts[i] == 0) + break; + + events[nevents + i] = evts[i]; + } + + return nevents + i; +} + +static bool bool_eq(bool a, bool b) +{ + return a == b; +} + +static void run_masked_events_tests(struct kvm_vcpu *vcpu, uint64_t *events, + int nevents) +{ + int ntests = ARRAY_SIZE(test_cases); + int i, n; + + for (i = 0; i < ntests; i++) { + const struct masked_events_test *test = &test_cases[i]; + + /* Do any test case events overflow MAX_TEST_EVENTS? */ + assert(test->intel_event_end == 0); + assert(test->amd_event_end == 0); + + n = append_test_events(test, events, nevents); + + run_masked_events_test(vcpu, events, n); + + TEST_ASSERT(bool_eq(pmc_results.loads, test->flags & ALLOW_LOADS) && + bool_eq(pmc_results.stores, test->flags & ALLOW_STORES) && + bool_eq(pmc_results.loads_stores, + test->flags & ALLOW_LOADS_STORES), + "%s loads: %lu, stores: %lu, loads + stores: %lu", + test->msg, pmc_results.loads, pmc_results.stores, + pmc_results.loads_stores); + } +} + +static void add_dummy_events(uint64_t *events, int nevents) +{ + int i; + + for (i = 0; i < nevents; i++) { + int event_select = i % 0xFF; + bool exclude = ((i % 4) == 0); + + if (event_select == MEM_INST_RETIRED || + event_select == LS_DISPATCH) + event_select++; + + events[i] = KVM_PMU_ENCODE_MASKED_ENTRY(event_select, 0, + 0, exclude); + } +} + +static void test_masked_events(struct kvm_vcpu *vcpu) +{ + int nevents = KVM_PMU_EVENT_FILTER_MAX_EVENTS - MAX_TEST_EVENTS; + uint64_t events[KVM_PMU_EVENT_FILTER_MAX_EVENTS]; + + /* Run the test cases against a sparse PMU event filter. */ + run_masked_events_tests(vcpu, events, 0); + + /* Run the test cases against a dense PMU event filter. */ + add_dummy_events(events, KVM_PMU_EVENT_FILTER_MAX_EVENTS); + run_masked_events_tests(vcpu, events, nevents); +} + +static int set_pmu_event_filter(struct kvm_vcpu *vcpu, + struct __kvm_pmu_event_filter *__f) +{ + struct kvm_pmu_event_filter *f = (void *)__f; + + return __vm_ioctl(vcpu->vm, KVM_SET_PMU_EVENT_FILTER, f); +} + +static int set_pmu_single_event_filter(struct kvm_vcpu *vcpu, uint64_t event, + uint32_t flags, uint32_t action) +{ + struct __kvm_pmu_event_filter f = { + .nevents = 1, + .flags = flags, + .action = action, + .events = { + event, + }, + }; + + return set_pmu_event_filter(vcpu, &f); +} + +static void test_filter_ioctl(struct kvm_vcpu *vcpu) +{ + uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS); + struct __kvm_pmu_event_filter f; + uint64_t e = ~0ul; + int r; + + /* + * Unfortunately having invalid bits set in event data is expected to + * pass when flags == 0 (bits other than eventsel+umask). + */ + r = set_pmu_single_event_filter(vcpu, e, 0, KVM_PMU_EVENT_ALLOW); + TEST_ASSERT(r == 0, "Valid PMU Event Filter is failing"); + + r = set_pmu_single_event_filter(vcpu, e, + KVM_PMU_EVENT_FLAG_MASKED_EVENTS, + KVM_PMU_EVENT_ALLOW); + TEST_ASSERT(r != 0, "Invalid PMU Event Filter is expected to fail"); + + e = KVM_PMU_ENCODE_MASKED_ENTRY(0xff, 0xff, 0xff, 0xf); + r = set_pmu_single_event_filter(vcpu, e, + KVM_PMU_EVENT_FLAG_MASKED_EVENTS, + KVM_PMU_EVENT_ALLOW); + TEST_ASSERT(r == 0, "Valid PMU Event Filter is failing"); + + f = base_event_filter; + f.action = PMU_EVENT_FILTER_INVALID_ACTION; + r = set_pmu_event_filter(vcpu, &f); + TEST_ASSERT(r, "Set invalid action is expected to fail"); + + f = base_event_filter; + f.flags = PMU_EVENT_FILTER_INVALID_FLAGS; + r = set_pmu_event_filter(vcpu, &f); + TEST_ASSERT(r, "Set invalid flags is expected to fail"); + + f = base_event_filter; + f.nevents = PMU_EVENT_FILTER_INVALID_NEVENTS; + r = set_pmu_event_filter(vcpu, &f); + TEST_ASSERT(r, "Exceeding the max number of filter events should fail"); + + f = base_event_filter; + f.fixed_counter_bitmap = ~GENMASK_ULL(nr_fixed_counters, 0); + r = set_pmu_event_filter(vcpu, &f); + TEST_ASSERT(!r, "Masking non-existent fixed counters should be allowed"); +} + +static void intel_run_fixed_counter_guest_code(uint8_t idx) +{ + for (;;) { + wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0); + wrmsr(MSR_CORE_PERF_FIXED_CTR0 + idx, 0); + + /* Only OS_EN bit is enabled for fixed counter[idx]. */ + wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(idx, FIXED_PMC_KERNEL)); + wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, FIXED_PMC_GLOBAL_CTRL_ENABLE(idx)); + __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES})); + wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0); + + GUEST_SYNC(rdmsr(MSR_CORE_PERF_FIXED_CTR0 + idx)); + } +} + +static uint64_t test_with_fixed_counter_filter(struct kvm_vcpu *vcpu, + uint32_t action, uint32_t bitmap) +{ + struct __kvm_pmu_event_filter f = { + .action = action, + .fixed_counter_bitmap = bitmap, + }; + set_pmu_event_filter(vcpu, &f); + + return run_vcpu_to_sync(vcpu); +} + +static uint64_t test_set_gp_and_fixed_event_filter(struct kvm_vcpu *vcpu, + uint32_t action, + uint32_t bitmap) +{ + struct __kvm_pmu_event_filter f = base_event_filter; + + f.action = action; + f.fixed_counter_bitmap = bitmap; + set_pmu_event_filter(vcpu, &f); + + return run_vcpu_to_sync(vcpu); +} + +static void __test_fixed_counter_bitmap(struct kvm_vcpu *vcpu, uint8_t idx, + uint8_t nr_fixed_counters) +{ + unsigned int i; + uint32_t bitmap; + uint64_t count; + + TEST_ASSERT(nr_fixed_counters < sizeof(bitmap) * 8, + "Invalid nr_fixed_counters"); + + /* + * Check the fixed performance counter can count normally when KVM + * userspace doesn't set any pmu filter. + */ + count = run_vcpu_to_sync(vcpu); + TEST_ASSERT(count, "Unexpected count value: %ld", count); + + for (i = 0; i < BIT(nr_fixed_counters); i++) { + bitmap = BIT(i); + count = test_with_fixed_counter_filter(vcpu, KVM_PMU_EVENT_ALLOW, + bitmap); + TEST_ASSERT_EQ(!!count, !!(bitmap & BIT(idx))); + + count = test_with_fixed_counter_filter(vcpu, KVM_PMU_EVENT_DENY, + bitmap); + TEST_ASSERT_EQ(!!count, !(bitmap & BIT(idx))); + + /* + * Check that fixed_counter_bitmap has higher priority than + * events[] when both are set. + */ + count = test_set_gp_and_fixed_event_filter(vcpu, + KVM_PMU_EVENT_ALLOW, + bitmap); + TEST_ASSERT_EQ(!!count, !!(bitmap & BIT(idx))); + + count = test_set_gp_and_fixed_event_filter(vcpu, + KVM_PMU_EVENT_DENY, + bitmap); + TEST_ASSERT_EQ(!!count, !(bitmap & BIT(idx))); + } +} + +static void test_fixed_counter_bitmap(void) +{ + uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS); + struct kvm_vm *vm; + struct kvm_vcpu *vcpu; + uint8_t idx; + + /* + * Check that pmu_event_filter works as expected when it's applied to + * fixed performance counters. + */ + for (idx = 0; idx < nr_fixed_counters; idx++) { + vm = vm_create_with_one_vcpu(&vcpu, + intel_run_fixed_counter_guest_code); + vcpu_args_set(vcpu, 1, idx); + __test_fixed_counter_bitmap(vcpu, idx, nr_fixed_counters); + kvm_vm_free(vm); + } +} + +int main(int argc, char *argv[]) +{ + void (*guest_code)(void); + struct kvm_vcpu *vcpu, *vcpu2 = NULL; + struct kvm_vm *vm; + + TEST_REQUIRE(kvm_is_pmu_enabled()); + TEST_REQUIRE(kvm_has_cap(KVM_CAP_PMU_EVENT_FILTER)); + TEST_REQUIRE(kvm_has_cap(KVM_CAP_PMU_EVENT_MASKED_EVENTS)); + + TEST_REQUIRE(use_intel_pmu() || use_amd_pmu()); + guest_code = use_intel_pmu() ? intel_guest_code : amd_guest_code; + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + TEST_REQUIRE(sanity_check_pmu(vcpu)); + + if (use_amd_pmu()) + test_amd_deny_list(vcpu); + + test_without_filter(vcpu); + test_member_deny_list(vcpu); + test_member_allow_list(vcpu); + test_not_member_deny_list(vcpu); + test_not_member_allow_list(vcpu); + + if (use_intel_pmu() && + supports_event_mem_inst_retired() && + kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS) >= 3) + vcpu2 = vm_vcpu_add(vm, 2, intel_masked_events_guest_code); + else if (use_amd_pmu()) + vcpu2 = vm_vcpu_add(vm, 2, amd_masked_events_guest_code); + + if (vcpu2) + test_masked_events(vcpu2); + test_filter_ioctl(vcpu); + + kvm_vm_free(vm); + + test_pmu_config_disable(guest_code); + test_fixed_counter_bitmap(); + + return 0; +} diff --git a/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c b/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c new file mode 100644 index 000000000000..82a8d88b5338 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c @@ -0,0 +1,483 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022, Google LLC. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include + +#define BASE_DATA_SLOT 10 +#define BASE_DATA_GPA ((uint64_t)(1ull << 32)) +#define PER_CPU_DATA_SIZE ((uint64_t)(SZ_2M + PAGE_SIZE)) + +/* Horrific macro so that the line info is captured accurately :-( */ +#define memcmp_g(gpa, pattern, size) \ +do { \ + uint8_t *mem = (uint8_t *)gpa; \ + size_t i; \ + \ + for (i = 0; i < size; i++) \ + __GUEST_ASSERT(mem[i] == pattern, \ + "Guest expected 0x%x at offset %lu (gpa 0x%lx), got 0x%x", \ + pattern, i, gpa + i, mem[i]); \ +} while (0) + +static void memcmp_h(uint8_t *mem, uint64_t gpa, uint8_t pattern, size_t size) +{ + size_t i; + + for (i = 0; i < size; i++) + TEST_ASSERT(mem[i] == pattern, + "Host expected 0x%x at gpa 0x%lx, got 0x%x", + pattern, gpa + i, mem[i]); +} + +/* + * Run memory conversion tests with explicit conversion: + * Execute KVM hypercall to map/unmap gpa range which will cause userspace exit + * to back/unback private memory. Subsequent accesses by guest to the gpa range + * will not cause exit to userspace. + * + * Test memory conversion scenarios with following steps: + * 1) Access private memory using private access and verify that memory contents + * are not visible to userspace. + * 2) Convert memory to shared using explicit conversions and ensure that + * userspace is able to access the shared regions. + * 3) Convert memory back to private using explicit conversions and ensure that + * userspace is again not able to access converted private regions. + */ + +#define GUEST_STAGE(o, s) { .offset = o, .size = s } + +enum ucall_syncs { + SYNC_SHARED, + SYNC_PRIVATE, +}; + +static void guest_sync_shared(uint64_t gpa, uint64_t size, + uint8_t current_pattern, uint8_t new_pattern) +{ + GUEST_SYNC5(SYNC_SHARED, gpa, size, current_pattern, new_pattern); +} + +static void guest_sync_private(uint64_t gpa, uint64_t size, uint8_t pattern) +{ + GUEST_SYNC4(SYNC_PRIVATE, gpa, size, pattern); +} + +/* Arbitrary values, KVM doesn't care about the attribute flags. */ +#define MAP_GPA_SET_ATTRIBUTES BIT(0) +#define MAP_GPA_SHARED BIT(1) +#define MAP_GPA_DO_FALLOCATE BIT(2) + +static void guest_map_mem(uint64_t gpa, uint64_t size, bool map_shared, + bool do_fallocate) +{ + uint64_t flags = MAP_GPA_SET_ATTRIBUTES; + + if (map_shared) + flags |= MAP_GPA_SHARED; + if (do_fallocate) + flags |= MAP_GPA_DO_FALLOCATE; + kvm_hypercall_map_gpa_range(gpa, size, flags); +} + +static void guest_map_shared(uint64_t gpa, uint64_t size, bool do_fallocate) +{ + guest_map_mem(gpa, size, true, do_fallocate); +} + +static void guest_map_private(uint64_t gpa, uint64_t size, bool do_fallocate) +{ + guest_map_mem(gpa, size, false, do_fallocate); +} + +struct { + uint64_t offset; + uint64_t size; +} static const test_ranges[] = { + GUEST_STAGE(0, PAGE_SIZE), + GUEST_STAGE(0, SZ_2M), + GUEST_STAGE(PAGE_SIZE, PAGE_SIZE), + GUEST_STAGE(PAGE_SIZE, SZ_2M), + GUEST_STAGE(SZ_2M, PAGE_SIZE), +}; + +static void guest_test_explicit_conversion(uint64_t base_gpa, bool do_fallocate) +{ + const uint8_t def_p = 0xaa; + const uint8_t init_p = 0xcc; + uint64_t j; + int i; + + /* Memory should be shared by default. */ + memset((void *)base_gpa, def_p, PER_CPU_DATA_SIZE); + memcmp_g(base_gpa, def_p, PER_CPU_DATA_SIZE); + guest_sync_shared(base_gpa, PER_CPU_DATA_SIZE, def_p, init_p); + + memcmp_g(base_gpa, init_p, PER_CPU_DATA_SIZE); + + for (i = 0; i < ARRAY_SIZE(test_ranges); i++) { + uint64_t gpa = base_gpa + test_ranges[i].offset; + uint64_t size = test_ranges[i].size; + uint8_t p1 = 0x11; + uint8_t p2 = 0x22; + uint8_t p3 = 0x33; + uint8_t p4 = 0x44; + + /* + * Set the test region to pattern one to differentiate it from + * the data range as a whole (contains the initial pattern). + */ + memset((void *)gpa, p1, size); + + /* + * Convert to private, set and verify the private data, and + * then verify that the rest of the data (map shared) still + * holds the initial pattern, and that the host always sees the + * shared memory (initial pattern). Unlike shared memory, + * punching a hole in private memory is destructive, i.e. + * previous values aren't guaranteed to be preserved. + */ + guest_map_private(gpa, size, do_fallocate); + + if (size > PAGE_SIZE) { + memset((void *)gpa, p2, PAGE_SIZE); + goto skip; + } + + memset((void *)gpa, p2, size); + guest_sync_private(gpa, size, p1); + + /* + * Verify that the private memory was set to pattern two, and + * that shared memory still holds the initial pattern. + */ + memcmp_g(gpa, p2, size); + if (gpa > base_gpa) + memcmp_g(base_gpa, init_p, gpa - base_gpa); + if (gpa + size < base_gpa + PER_CPU_DATA_SIZE) + memcmp_g(gpa + size, init_p, + (base_gpa + PER_CPU_DATA_SIZE) - (gpa + size)); + + /* + * Convert odd-number page frames back to shared to verify KVM + * also correctly handles holes in private ranges. + */ + for (j = 0; j < size; j += PAGE_SIZE) { + if ((j >> PAGE_SHIFT) & 1) { + guest_map_shared(gpa + j, PAGE_SIZE, do_fallocate); + guest_sync_shared(gpa + j, PAGE_SIZE, p1, p3); + + memcmp_g(gpa + j, p3, PAGE_SIZE); + } else { + guest_sync_private(gpa + j, PAGE_SIZE, p1); + } + } + +skip: + /* + * Convert the entire region back to shared, explicitly write + * pattern three to fill in the even-number frames before + * asking the host to verify (and write pattern four). + */ + guest_map_shared(gpa, size, do_fallocate); + memset((void *)gpa, p3, size); + guest_sync_shared(gpa, size, p3, p4); + memcmp_g(gpa, p4, size); + + /* Reset the shared memory back to the initial pattern. */ + memset((void *)gpa, init_p, size); + + /* + * Free (via PUNCH_HOLE) *all* private memory so that the next + * iteration starts from a clean slate, e.g. with respect to + * whether or not there are pages/folios in guest_mem. + */ + guest_map_shared(base_gpa, PER_CPU_DATA_SIZE, true); + } +} + +static void guest_punch_hole(uint64_t gpa, uint64_t size) +{ + /* "Mapping" memory shared via fallocate() is done via PUNCH_HOLE. */ + uint64_t flags = MAP_GPA_SHARED | MAP_GPA_DO_FALLOCATE; + + kvm_hypercall_map_gpa_range(gpa, size, flags); +} + +/* + * Test that PUNCH_HOLE actually frees memory by punching holes without doing a + * proper conversion. Freeing (PUNCH_HOLE) should zap SPTEs, and reallocating + * (subsequent fault) should zero memory. + */ +static void guest_test_punch_hole(uint64_t base_gpa, bool precise) +{ + const uint8_t init_p = 0xcc; + int i; + + /* + * Convert the entire range to private, this testcase is all about + * punching holes in guest_memfd, i.e. shared mappings aren't needed. + */ + guest_map_private(base_gpa, PER_CPU_DATA_SIZE, false); + + for (i = 0; i < ARRAY_SIZE(test_ranges); i++) { + uint64_t gpa = base_gpa + test_ranges[i].offset; + uint64_t size = test_ranges[i].size; + + /* + * Free all memory before each iteration, even for the !precise + * case where the memory will be faulted back in. Freeing and + * reallocating should obviously work, and freeing all memory + * minimizes the probability of cross-testcase influence. + */ + guest_punch_hole(base_gpa, PER_CPU_DATA_SIZE); + + /* Fault-in and initialize memory, and verify the pattern. */ + if (precise) { + memset((void *)gpa, init_p, size); + memcmp_g(gpa, init_p, size); + } else { + memset((void *)base_gpa, init_p, PER_CPU_DATA_SIZE); + memcmp_g(base_gpa, init_p, PER_CPU_DATA_SIZE); + } + + /* + * Punch a hole at the target range and verify that reads from + * the guest succeed and return zeroes. + */ + guest_punch_hole(gpa, size); + memcmp_g(gpa, 0, size); + } +} + +static void guest_code(uint64_t base_gpa) +{ + /* + * Run the conversion test twice, with and without doing fallocate() on + * the guest_memfd backing when converting between shared and private. + */ + guest_test_explicit_conversion(base_gpa, false); + guest_test_explicit_conversion(base_gpa, true); + + /* + * Run the PUNCH_HOLE test twice too, once with the entire guest_memfd + * faulted in, once with only the target range faulted in. + */ + guest_test_punch_hole(base_gpa, false); + guest_test_punch_hole(base_gpa, true); + GUEST_DONE(); +} + +static void handle_exit_hypercall(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + uint64_t gpa = run->hypercall.args[0]; + uint64_t size = run->hypercall.args[1] * PAGE_SIZE; + bool set_attributes = run->hypercall.args[2] & MAP_GPA_SET_ATTRIBUTES; + bool map_shared = run->hypercall.args[2] & MAP_GPA_SHARED; + bool do_fallocate = run->hypercall.args[2] & MAP_GPA_DO_FALLOCATE; + struct kvm_vm *vm = vcpu->vm; + + TEST_ASSERT(run->hypercall.nr == KVM_HC_MAP_GPA_RANGE, + "Wanted MAP_GPA_RANGE (%u), got '%llu'", + KVM_HC_MAP_GPA_RANGE, run->hypercall.nr); + + if (do_fallocate) + vm_guest_mem_fallocate(vm, gpa, size, map_shared); + + if (set_attributes) + vm_set_memory_attributes(vm, gpa, size, + map_shared ? 0 : KVM_MEMORY_ATTRIBUTE_PRIVATE); + run->hypercall.ret = 0; +} + +static bool run_vcpus; + +static void *__test_mem_conversions(void *__vcpu) +{ + struct kvm_vcpu *vcpu = __vcpu; + struct kvm_run *run = vcpu->run; + struct kvm_vm *vm = vcpu->vm; + struct ucall uc; + + while (!READ_ONCE(run_vcpus)) + ; + + for ( ;; ) { + vcpu_run(vcpu); + + if (run->exit_reason == KVM_EXIT_HYPERCALL) { + handle_exit_hypercall(vcpu); + continue; + } + + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Wanted KVM_EXIT_IO, got exit reason: %u (%s)", + run->exit_reason, exit_reason_str(run->exit_reason)); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + case UCALL_SYNC: { + uint64_t gpa = uc.args[1]; + size_t size = uc.args[2]; + size_t i; + + TEST_ASSERT(uc.args[0] == SYNC_SHARED || + uc.args[0] == SYNC_PRIVATE, + "Unknown sync command '%ld'", uc.args[0]); + + for (i = 0; i < size; i += vm->page_size) { + size_t nr_bytes = min_t(size_t, vm->page_size, size - i); + uint8_t *hva = addr_gpa2hva(vm, gpa + i); + + /* In all cases, the host should observe the shared data. */ + memcmp_h(hva, gpa + i, uc.args[3], nr_bytes); + + /* For shared, write the new pattern to guest memory. */ + if (uc.args[0] == SYNC_SHARED) + memset(hva, uc.args[4], nr_bytes); + } + break; + } + case UCALL_DONE: + return NULL; + default: + TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd); + } + } +} + +static void test_mem_conversions(enum vm_mem_backing_src_type src_type, uint32_t nr_vcpus, + uint32_t nr_memslots) +{ + /* + * Allocate enough memory so that each vCPU's chunk of memory can be + * naturally aligned with respect to the size of the backing store. + */ + const size_t alignment = max_t(size_t, SZ_2M, get_backing_src_pagesz(src_type)); + const size_t per_cpu_size = align_up(PER_CPU_DATA_SIZE, alignment); + const size_t memfd_size = per_cpu_size * nr_vcpus; + const size_t slot_size = memfd_size / nr_memslots; + struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; + pthread_t threads[KVM_MAX_VCPUS]; + struct kvm_vm *vm; + int memfd, i, r; + + const struct vm_shape shape = { + .mode = VM_MODE_DEFAULT, + .type = KVM_X86_SW_PROTECTED_VM, + }; + + TEST_ASSERT(slot_size * nr_memslots == memfd_size, + "The memfd size (0x%lx) needs to be cleanly divisible by the number of memslots (%u)", + memfd_size, nr_memslots); + vm = __vm_create_with_vcpus(shape, nr_vcpus, 0, guest_code, vcpus); + + vm_enable_cap(vm, KVM_CAP_EXIT_HYPERCALL, (1 << KVM_HC_MAP_GPA_RANGE)); + + memfd = vm_create_guest_memfd(vm, memfd_size, 0); + + for (i = 0; i < nr_memslots; i++) + vm_mem_add(vm, src_type, BASE_DATA_GPA + slot_size * i, + BASE_DATA_SLOT + i, slot_size / vm->page_size, + KVM_MEM_GUEST_MEMFD, memfd, slot_size * i); + + for (i = 0; i < nr_vcpus; i++) { + uint64_t gpa = BASE_DATA_GPA + i * per_cpu_size; + + vcpu_args_set(vcpus[i], 1, gpa); + + /* + * Map only what is needed so that an out-of-bounds access + * results #PF => SHUTDOWN instead of data corruption. + */ + virt_map(vm, gpa, gpa, PER_CPU_DATA_SIZE / vm->page_size); + + pthread_create(&threads[i], NULL, __test_mem_conversions, vcpus[i]); + } + + WRITE_ONCE(run_vcpus, true); + + for (i = 0; i < nr_vcpus; i++) + pthread_join(threads[i], NULL); + + kvm_vm_free(vm); + + /* + * Allocate and free memory from the guest_memfd after closing the VM + * fd. The guest_memfd is gifted a reference to its owning VM, i.e. + * should prevent the VM from being fully destroyed until the last + * reference to the guest_memfd is also put. + */ + r = fallocate(memfd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, memfd_size); + TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r)); + + r = fallocate(memfd, FALLOC_FL_KEEP_SIZE, 0, memfd_size); + TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r)); + + close(memfd); +} + +static void usage(const char *cmd) +{ + puts(""); + printf("usage: %s [-h] [-m nr_memslots] [-s mem_type] [-n nr_vcpus]\n", cmd); + puts(""); + backing_src_help("-s"); + puts(""); + puts(" -n: specify the number of vcpus (default: 1)"); + puts(""); + puts(" -m: specify the number of memslots (default: 1)"); + puts(""); +} + +int main(int argc, char *argv[]) +{ + enum vm_mem_backing_src_type src_type = DEFAULT_VM_MEM_SRC; + uint32_t nr_memslots = 1; + uint32_t nr_vcpus = 1; + int opt; + + TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM)); + + while ((opt = getopt(argc, argv, "hm:s:n:")) != -1) { + switch (opt) { + case 's': + src_type = parse_backing_src_type(optarg); + break; + case 'n': + nr_vcpus = atoi_positive("nr_vcpus", optarg); + break; + case 'm': + nr_memslots = atoi_positive("nr_memslots", optarg); + break; + case 'h': + default: + usage(argv[0]); + exit(0); + } + } + + test_mem_conversions(src_type, nr_vcpus, nr_memslots); + + return 0; +} diff --git a/tools/testing/selftests/kvm/x86/private_mem_kvm_exits_test.c b/tools/testing/selftests/kvm/x86/private_mem_kvm_exits_test.c new file mode 100644 index 000000000000..13e72fcec8dd --- /dev/null +++ b/tools/testing/selftests/kvm/x86/private_mem_kvm_exits_test.c @@ -0,0 +1,120 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2023, Google LLC. + */ +#include +#include +#include + +#include "kvm_util.h" +#include "processor.h" +#include "test_util.h" + +/* Arbitrarily selected to avoid overlaps with anything else */ +#define EXITS_TEST_GVA 0xc0000000 +#define EXITS_TEST_GPA EXITS_TEST_GVA +#define EXITS_TEST_NPAGES 1 +#define EXITS_TEST_SIZE (EXITS_TEST_NPAGES * PAGE_SIZE) +#define EXITS_TEST_SLOT 10 + +static uint64_t guest_repeatedly_read(void) +{ + volatile uint64_t value; + + while (true) + value = *((uint64_t *) EXITS_TEST_GVA); + + return value; +} + +static uint32_t run_vcpu_get_exit_reason(struct kvm_vcpu *vcpu) +{ + int r; + + r = _vcpu_run(vcpu); + if (r) { + TEST_ASSERT(errno == EFAULT, KVM_IOCTL_ERROR(KVM_RUN, r)); + TEST_ASSERT_EQ(vcpu->run->exit_reason, KVM_EXIT_MEMORY_FAULT); + } + return vcpu->run->exit_reason; +} + +const struct vm_shape protected_vm_shape = { + .mode = VM_MODE_DEFAULT, + .type = KVM_X86_SW_PROTECTED_VM, +}; + +static void test_private_access_memslot_deleted(void) +{ + struct kvm_vm *vm; + struct kvm_vcpu *vcpu; + pthread_t vm_thread; + void *thread_return; + uint32_t exit_reason; + + vm = vm_create_shape_with_one_vcpu(protected_vm_shape, &vcpu, + guest_repeatedly_read); + + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, + EXITS_TEST_GPA, EXITS_TEST_SLOT, + EXITS_TEST_NPAGES, + KVM_MEM_GUEST_MEMFD); + + virt_map(vm, EXITS_TEST_GVA, EXITS_TEST_GPA, EXITS_TEST_NPAGES); + + /* Request to access page privately */ + vm_mem_set_private(vm, EXITS_TEST_GPA, EXITS_TEST_SIZE); + + pthread_create(&vm_thread, NULL, + (void *(*)(void *))run_vcpu_get_exit_reason, + (void *)vcpu); + + vm_mem_region_delete(vm, EXITS_TEST_SLOT); + + pthread_join(vm_thread, &thread_return); + exit_reason = (uint32_t)(uint64_t)thread_return; + + TEST_ASSERT_EQ(exit_reason, KVM_EXIT_MEMORY_FAULT); + TEST_ASSERT_EQ(vcpu->run->memory_fault.flags, KVM_MEMORY_EXIT_FLAG_PRIVATE); + TEST_ASSERT_EQ(vcpu->run->memory_fault.gpa, EXITS_TEST_GPA); + TEST_ASSERT_EQ(vcpu->run->memory_fault.size, EXITS_TEST_SIZE); + + kvm_vm_free(vm); +} + +static void test_private_access_memslot_not_private(void) +{ + struct kvm_vm *vm; + struct kvm_vcpu *vcpu; + uint32_t exit_reason; + + vm = vm_create_shape_with_one_vcpu(protected_vm_shape, &vcpu, + guest_repeatedly_read); + + /* Add a non-private memslot (flags = 0) */ + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, + EXITS_TEST_GPA, EXITS_TEST_SLOT, + EXITS_TEST_NPAGES, 0); + + virt_map(vm, EXITS_TEST_GVA, EXITS_TEST_GPA, EXITS_TEST_NPAGES); + + /* Request to access page privately */ + vm_mem_set_private(vm, EXITS_TEST_GPA, EXITS_TEST_SIZE); + + exit_reason = run_vcpu_get_exit_reason(vcpu); + + TEST_ASSERT_EQ(exit_reason, KVM_EXIT_MEMORY_FAULT); + TEST_ASSERT_EQ(vcpu->run->memory_fault.flags, KVM_MEMORY_EXIT_FLAG_PRIVATE); + TEST_ASSERT_EQ(vcpu->run->memory_fault.gpa, EXITS_TEST_GPA); + TEST_ASSERT_EQ(vcpu->run->memory_fault.size, EXITS_TEST_SIZE); + + kvm_vm_free(vm); +} + +int main(int argc, char *argv[]) +{ + TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM)); + + test_private_access_memslot_deleted(); + test_private_access_memslot_not_private(); +} diff --git a/tools/testing/selftests/kvm/x86/recalc_apic_map_test.c b/tools/testing/selftests/kvm/x86/recalc_apic_map_test.c new file mode 100644 index 000000000000..cbc92a862ea9 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/recalc_apic_map_test.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Test edge cases and race conditions in kvm_recalculate_apic_map(). + */ + +#include +#include +#include + +#include "processor.h" +#include "test_util.h" +#include "kvm_util.h" +#include "apic.h" + +#define TIMEOUT 5 /* seconds */ + +#define LAPIC_DISABLED 0 +#define LAPIC_X2APIC (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE) +#define MAX_XAPIC_ID 0xff + +static void *race(void *arg) +{ + struct kvm_lapic_state lapic = {}; + struct kvm_vcpu *vcpu = arg; + + while (1) { + /* Trigger kvm_recalculate_apic_map(). */ + vcpu_ioctl(vcpu, KVM_SET_LAPIC, &lapic); + pthread_testcancel(); + } + + return NULL; +} + +int main(void) +{ + struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; + struct kvm_vcpu *vcpuN; + struct kvm_vm *vm; + pthread_t thread; + time_t t; + int i; + + kvm_static_assert(KVM_MAX_VCPUS > MAX_XAPIC_ID); + + /* + * Create the max number of vCPUs supported by selftests so that KVM + * has decent amount of work to do when recalculating the map, i.e. to + * make the problematic window large enough to hit. + */ + vm = vm_create_with_vcpus(KVM_MAX_VCPUS, NULL, vcpus); + + /* + * Enable x2APIC on all vCPUs so that KVM doesn't bail from the recalc + * due to vCPUs having aliased xAPIC IDs (truncated to 8 bits). + */ + for (i = 0; i < KVM_MAX_VCPUS; i++) + vcpu_set_msr(vcpus[i], MSR_IA32_APICBASE, LAPIC_X2APIC); + + TEST_ASSERT_EQ(pthread_create(&thread, NULL, race, vcpus[0]), 0); + + vcpuN = vcpus[KVM_MAX_VCPUS - 1]; + for (t = time(NULL) + TIMEOUT; time(NULL) < t;) { + vcpu_set_msr(vcpuN, MSR_IA32_APICBASE, LAPIC_X2APIC); + vcpu_set_msr(vcpuN, MSR_IA32_APICBASE, LAPIC_DISABLED); + } + + TEST_ASSERT_EQ(pthread_cancel(thread), 0); + TEST_ASSERT_EQ(pthread_join(thread, NULL), 0); + + kvm_vm_free(vm); + + return 0; +} diff --git a/tools/testing/selftests/kvm/x86/set_boot_cpu_id.c b/tools/testing/selftests/kvm/x86/set_boot_cpu_id.c new file mode 100644 index 000000000000..49913784bc82 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/set_boot_cpu_id.c @@ -0,0 +1,146 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test that KVM_SET_BOOT_CPU_ID works as intended + * + * Copyright (C) 2020, Red Hat, Inc. + */ +#include +#include +#include +#include +#include + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "apic.h" + +static void guest_bsp_vcpu(void *arg) +{ + GUEST_SYNC(1); + + GUEST_ASSERT_NE(get_bsp_flag(), 0); + + GUEST_DONE(); +} + +static void guest_not_bsp_vcpu(void *arg) +{ + GUEST_SYNC(1); + + GUEST_ASSERT_EQ(get_bsp_flag(), 0); + + GUEST_DONE(); +} + +static void test_set_invalid_bsp(struct kvm_vm *vm) +{ + unsigned long max_vcpu_id = vm_check_cap(vm, KVM_CAP_MAX_VCPU_ID); + int r; + + if (max_vcpu_id) { + r = __vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)(max_vcpu_id + 1)); + TEST_ASSERT(r == -1 && errno == EINVAL, "BSP with ID > MAX should fail"); + } + + r = __vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)(1L << 32)); + TEST_ASSERT(r == -1 && errno == EINVAL, "BSP with ID[63:32]!=0 should fail"); +} + +static void test_set_bsp_busy(struct kvm_vcpu *vcpu, const char *msg) +{ + int r = __vm_ioctl(vcpu->vm, KVM_SET_BOOT_CPU_ID, + (void *)(unsigned long)vcpu->id); + + TEST_ASSERT(r == -1 && errno == EBUSY, "KVM_SET_BOOT_CPU_ID set %s", msg); +} + +static void run_vcpu(struct kvm_vcpu *vcpu) +{ + struct ucall uc; + int stage; + + for (stage = 0; stage < 2; stage++) { + + vcpu_run(vcpu); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_SYNC: + TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") && + uc.args[1] == stage + 1, + "Stage %d: Unexpected register values vmexit, got %lx", + stage + 1, (ulong)uc.args[1]); + test_set_bsp_busy(vcpu, "while running vm"); + break; + case UCALL_DONE: + TEST_ASSERT(stage == 1, + "Expected GUEST_DONE in stage 2, got stage %d", + stage); + break; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + default: + TEST_ASSERT(false, "Unexpected exit: %s", + exit_reason_str(vcpu->run->exit_reason)); + } + } +} + +static struct kvm_vm *create_vm(uint32_t nr_vcpus, uint32_t bsp_vcpu_id, + struct kvm_vcpu *vcpus[]) +{ + struct kvm_vm *vm; + uint32_t i; + + vm = vm_create(nr_vcpus); + + test_set_invalid_bsp(vm); + + vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)(unsigned long)bsp_vcpu_id); + + for (i = 0; i < nr_vcpus; i++) + vcpus[i] = vm_vcpu_add(vm, i, i == bsp_vcpu_id ? guest_bsp_vcpu : + guest_not_bsp_vcpu); + return vm; +} + +static void run_vm_bsp(uint32_t bsp_vcpu_id) +{ + struct kvm_vcpu *vcpus[2]; + struct kvm_vm *vm; + + vm = create_vm(ARRAY_SIZE(vcpus), bsp_vcpu_id, vcpus); + + run_vcpu(vcpus[0]); + run_vcpu(vcpus[1]); + + kvm_vm_free(vm); +} + +static void check_set_bsp_busy(void) +{ + struct kvm_vcpu *vcpus[2]; + struct kvm_vm *vm; + + vm = create_vm(ARRAY_SIZE(vcpus), 0, vcpus); + + test_set_bsp_busy(vcpus[1], "after adding vcpu"); + + run_vcpu(vcpus[0]); + run_vcpu(vcpus[1]); + + test_set_bsp_busy(vcpus[1], "to a terminated vcpu"); + + kvm_vm_free(vm); +} + +int main(int argc, char *argv[]) +{ + TEST_REQUIRE(kvm_has_cap(KVM_CAP_SET_BOOT_CPU_ID)); + + run_vm_bsp(0); + run_vm_bsp(1); + run_vm_bsp(0); + + check_set_bsp_busy(); +} diff --git a/tools/testing/selftests/kvm/x86/set_sregs_test.c b/tools/testing/selftests/kvm/x86/set_sregs_test.c new file mode 100644 index 000000000000..c021c0795a96 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/set_sregs_test.c @@ -0,0 +1,141 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * KVM_SET_SREGS tests + * + * Copyright (C) 2018, Google LLC. + * + * This is a regression test for the bug fixed by the following commit: + * d3802286fa0f ("kvm: x86: Disallow illegal IA32_APIC_BASE MSR values") + * + * That bug allowed a user-mode program that called the KVM_SET_SREGS + * ioctl to put a VCPU's local APIC into an invalid state. + */ +#include +#include +#include +#include +#include + +#include "test_util.h" + +#include "kvm_util.h" +#include "processor.h" + +#define TEST_INVALID_CR_BIT(vcpu, cr, orig, bit) \ +do { \ + struct kvm_sregs new; \ + int rc; \ + \ + /* Skip the sub-test, the feature/bit is supported. */ \ + if (orig.cr & bit) \ + break; \ + \ + memcpy(&new, &orig, sizeof(sregs)); \ + new.cr |= bit; \ + \ + rc = _vcpu_sregs_set(vcpu, &new); \ + TEST_ASSERT(rc, "KVM allowed invalid " #cr " bit (0x%lx)", bit); \ + \ + /* Sanity check that KVM didn't change anything. */ \ + vcpu_sregs_get(vcpu, &new); \ + TEST_ASSERT(!memcmp(&new, &orig, sizeof(new)), "KVM modified sregs"); \ +} while (0) + +static uint64_t calc_supported_cr4_feature_bits(void) +{ + uint64_t cr4; + + cr4 = X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE | + X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE | X86_CR4_PGE | + X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT; + if (kvm_cpu_has(X86_FEATURE_UMIP)) + cr4 |= X86_CR4_UMIP; + if (kvm_cpu_has(X86_FEATURE_LA57)) + cr4 |= X86_CR4_LA57; + if (kvm_cpu_has(X86_FEATURE_VMX)) + cr4 |= X86_CR4_VMXE; + if (kvm_cpu_has(X86_FEATURE_SMX)) + cr4 |= X86_CR4_SMXE; + if (kvm_cpu_has(X86_FEATURE_FSGSBASE)) + cr4 |= X86_CR4_FSGSBASE; + if (kvm_cpu_has(X86_FEATURE_PCID)) + cr4 |= X86_CR4_PCIDE; + if (kvm_cpu_has(X86_FEATURE_XSAVE)) + cr4 |= X86_CR4_OSXSAVE; + if (kvm_cpu_has(X86_FEATURE_SMEP)) + cr4 |= X86_CR4_SMEP; + if (kvm_cpu_has(X86_FEATURE_SMAP)) + cr4 |= X86_CR4_SMAP; + if (kvm_cpu_has(X86_FEATURE_PKU)) + cr4 |= X86_CR4_PKE; + + return cr4; +} + +int main(int argc, char *argv[]) +{ + struct kvm_sregs sregs; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + uint64_t cr4; + int rc, i; + + /* + * Create a dummy VM, specifically to avoid doing KVM_SET_CPUID2, and + * use it to verify all supported CR4 bits can be set prior to defining + * the vCPU model, i.e. without doing KVM_SET_CPUID2. + */ + vm = vm_create_barebones(); + vcpu = __vm_vcpu_add(vm, 0); + + vcpu_sregs_get(vcpu, &sregs); + + sregs.cr0 = 0; + sregs.cr4 |= calc_supported_cr4_feature_bits(); + cr4 = sregs.cr4; + + rc = _vcpu_sregs_set(vcpu, &sregs); + TEST_ASSERT(!rc, "Failed to set supported CR4 bits (0x%lx)", cr4); + + vcpu_sregs_get(vcpu, &sregs); + TEST_ASSERT(sregs.cr4 == cr4, "sregs.CR4 (0x%llx) != CR4 (0x%lx)", + sregs.cr4, cr4); + + /* Verify all unsupported features are rejected by KVM. */ + TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_UMIP); + TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_LA57); + TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_VMXE); + TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_SMXE); + TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_FSGSBASE); + TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_PCIDE); + TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_OSXSAVE); + TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_SMEP); + TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_SMAP); + TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_PKE); + + for (i = 32; i < 64; i++) + TEST_INVALID_CR_BIT(vcpu, cr0, sregs, BIT(i)); + + /* NW without CD is illegal, as is PG without PE. */ + TEST_INVALID_CR_BIT(vcpu, cr0, sregs, X86_CR0_NW); + TEST_INVALID_CR_BIT(vcpu, cr0, sregs, X86_CR0_PG); + + kvm_vm_free(vm); + + /* Create a "real" VM and verify APIC_BASE can be set. */ + vm = vm_create_with_one_vcpu(&vcpu, NULL); + + vcpu_sregs_get(vcpu, &sregs); + sregs.apic_base = 1 << 10; + rc = _vcpu_sregs_set(vcpu, &sregs); + TEST_ASSERT(rc, "Set IA32_APIC_BASE to %llx (invalid)", + sregs.apic_base); + sregs.apic_base = 1 << 11; + rc = _vcpu_sregs_set(vcpu, &sregs); + TEST_ASSERT(!rc, "Couldn't set IA32_APIC_BASE to %llx (valid)", + sregs.apic_base); + + kvm_vm_free(vm); + + return 0; +} diff --git a/tools/testing/selftests/kvm/x86/sev_init2_tests.c b/tools/testing/selftests/kvm/x86/sev_init2_tests.c new file mode 100644 index 000000000000..3fb967f40c6a --- /dev/null +++ b/tools/testing/selftests/kvm/x86/sev_init2_tests.c @@ -0,0 +1,152 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include +#include +#include +#include +#include +#include + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "svm_util.h" +#include "kselftest.h" + +#define SVM_SEV_FEAT_DEBUG_SWAP 32u + +/* + * Some features may have hidden dependencies, or may only work + * for certain VM types. Err on the side of safety and don't + * expect that all supported features can be passed one by one + * to KVM_SEV_INIT2. + * + * (Well, right now there's only one...) + */ +#define KNOWN_FEATURES SVM_SEV_FEAT_DEBUG_SWAP + +int kvm_fd; +u64 supported_vmsa_features; +bool have_sev_es; + +static int __sev_ioctl(int vm_fd, int cmd_id, void *data) +{ + struct kvm_sev_cmd cmd = { + .id = cmd_id, + .data = (uint64_t)data, + .sev_fd = open_sev_dev_path_or_exit(), + }; + int ret; + + ret = ioctl(vm_fd, KVM_MEMORY_ENCRYPT_OP, &cmd); + TEST_ASSERT(ret < 0 || cmd.error == SEV_RET_SUCCESS, + "%d failed: fw error: %d\n", + cmd_id, cmd.error); + + return ret; +} + +static void test_init2(unsigned long vm_type, struct kvm_sev_init *init) +{ + struct kvm_vm *vm; + int ret; + + vm = vm_create_barebones_type(vm_type); + ret = __sev_ioctl(vm->fd, KVM_SEV_INIT2, init); + TEST_ASSERT(ret == 0, + "KVM_SEV_INIT2 return code is %d (expected 0), errno: %d", + ret, errno); + kvm_vm_free(vm); +} + +static void test_init2_invalid(unsigned long vm_type, struct kvm_sev_init *init, const char *msg) +{ + struct kvm_vm *vm; + int ret; + + vm = vm_create_barebones_type(vm_type); + ret = __sev_ioctl(vm->fd, KVM_SEV_INIT2, init); + TEST_ASSERT(ret == -1 && errno == EINVAL, + "KVM_SEV_INIT2 should fail, %s.", + msg); + kvm_vm_free(vm); +} + +void test_vm_types(void) +{ + test_init2(KVM_X86_SEV_VM, &(struct kvm_sev_init){}); + + /* + * TODO: check that unsupported types cannot be created. Probably + * a separate selftest. + */ + if (have_sev_es) + test_init2(KVM_X86_SEV_ES_VM, &(struct kvm_sev_init){}); + + test_init2_invalid(0, &(struct kvm_sev_init){}, + "VM type is KVM_X86_DEFAULT_VM"); + if (kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM)) + test_init2_invalid(KVM_X86_SW_PROTECTED_VM, &(struct kvm_sev_init){}, + "VM type is KVM_X86_SW_PROTECTED_VM"); +} + +void test_flags(uint32_t vm_type) +{ + int i; + + for (i = 0; i < 32; i++) + test_init2_invalid(vm_type, + &(struct kvm_sev_init){ .flags = BIT(i) }, + "invalid flag"); +} + +void test_features(uint32_t vm_type, uint64_t supported_features) +{ + int i; + + for (i = 0; i < 64; i++) { + if (!(supported_features & BIT_ULL(i))) + test_init2_invalid(vm_type, + &(struct kvm_sev_init){ .vmsa_features = BIT_ULL(i) }, + "unknown feature"); + else if (KNOWN_FEATURES & BIT_ULL(i)) + test_init2(vm_type, + &(struct kvm_sev_init){ .vmsa_features = BIT_ULL(i) }); + } +} + +int main(int argc, char *argv[]) +{ + int kvm_fd = open_kvm_dev_path_or_exit(); + bool have_sev; + + TEST_REQUIRE(__kvm_has_device_attr(kvm_fd, KVM_X86_GRP_SEV, + KVM_X86_SEV_VMSA_FEATURES) == 0); + kvm_device_attr_get(kvm_fd, KVM_X86_GRP_SEV, + KVM_X86_SEV_VMSA_FEATURES, + &supported_vmsa_features); + + have_sev = kvm_cpu_has(X86_FEATURE_SEV); + TEST_ASSERT(have_sev == !!(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SEV_VM)), + "sev: KVM_CAP_VM_TYPES (%x) does not match cpuid (checking %x)", + kvm_check_cap(KVM_CAP_VM_TYPES), 1 << KVM_X86_SEV_VM); + + TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SEV_VM)); + have_sev_es = kvm_cpu_has(X86_FEATURE_SEV_ES); + + TEST_ASSERT(have_sev_es == !!(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SEV_ES_VM)), + "sev-es: KVM_CAP_VM_TYPES (%x) does not match cpuid (checking %x)", + kvm_check_cap(KVM_CAP_VM_TYPES), 1 << KVM_X86_SEV_ES_VM); + + test_vm_types(); + + test_flags(KVM_X86_SEV_VM); + if (have_sev_es) + test_flags(KVM_X86_SEV_ES_VM); + + test_features(KVM_X86_SEV_VM, 0); + if (have_sev_es) + test_features(KVM_X86_SEV_ES_VM, supported_vmsa_features); + + return 0; +} diff --git a/tools/testing/selftests/kvm/x86/sev_migrate_tests.c b/tools/testing/selftests/kvm/x86/sev_migrate_tests.c new file mode 100644 index 000000000000..0a6dfba3905b --- /dev/null +++ b/tools/testing/selftests/kvm/x86/sev_migrate_tests.c @@ -0,0 +1,397 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include +#include +#include +#include +#include +#include + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "sev.h" +#include "kselftest.h" + +#define NR_MIGRATE_TEST_VCPUS 4 +#define NR_MIGRATE_TEST_VMS 3 +#define NR_LOCK_TESTING_THREADS 3 +#define NR_LOCK_TESTING_ITERATIONS 10000 + +bool have_sev_es; + +static struct kvm_vm *sev_vm_create(bool es) +{ + struct kvm_vm *vm; + int i; + + vm = vm_create_barebones(); + if (!es) + sev_vm_init(vm); + else + sev_es_vm_init(vm); + + for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i) + __vm_vcpu_add(vm, i); + + sev_vm_launch(vm, es ? SEV_POLICY_ES : 0); + + if (es) + vm_sev_ioctl(vm, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL); + return vm; +} + +static struct kvm_vm *aux_vm_create(bool with_vcpus) +{ + struct kvm_vm *vm; + int i; + + vm = vm_create_barebones(); + if (!with_vcpus) + return vm; + + for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i) + __vm_vcpu_add(vm, i); + + return vm; +} + +static int __sev_migrate_from(struct kvm_vm *dst, struct kvm_vm *src) +{ + return __vm_enable_cap(dst, KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM, src->fd); +} + + +static void sev_migrate_from(struct kvm_vm *dst, struct kvm_vm *src) +{ + int ret; + + ret = __sev_migrate_from(dst, src); + TEST_ASSERT(!ret, "Migration failed, ret: %d, errno: %d", ret, errno); +} + +static void test_sev_migrate_from(bool es) +{ + struct kvm_vm *src_vm; + struct kvm_vm *dst_vms[NR_MIGRATE_TEST_VMS]; + int i, ret; + + src_vm = sev_vm_create(es); + for (i = 0; i < NR_MIGRATE_TEST_VMS; ++i) + dst_vms[i] = aux_vm_create(true); + + /* Initial migration from the src to the first dst. */ + sev_migrate_from(dst_vms[0], src_vm); + + for (i = 1; i < NR_MIGRATE_TEST_VMS; i++) + sev_migrate_from(dst_vms[i], dst_vms[i - 1]); + + /* Migrate the guest back to the original VM. */ + ret = __sev_migrate_from(src_vm, dst_vms[NR_MIGRATE_TEST_VMS - 1]); + TEST_ASSERT(ret == -1 && errno == EIO, + "VM that was migrated from should be dead. ret %d, errno: %d", ret, + errno); + + kvm_vm_free(src_vm); + for (i = 0; i < NR_MIGRATE_TEST_VMS; ++i) + kvm_vm_free(dst_vms[i]); +} + +struct locking_thread_input { + struct kvm_vm *vm; + struct kvm_vm *source_vms[NR_LOCK_TESTING_THREADS]; +}; + +static void *locking_test_thread(void *arg) +{ + int i, j; + struct locking_thread_input *input = (struct locking_thread_input *)arg; + + for (i = 0; i < NR_LOCK_TESTING_ITERATIONS; ++i) { + j = i % NR_LOCK_TESTING_THREADS; + __sev_migrate_from(input->vm, input->source_vms[j]); + } + + return NULL; +} + +static void test_sev_migrate_locking(void) +{ + struct locking_thread_input input[NR_LOCK_TESTING_THREADS]; + pthread_t pt[NR_LOCK_TESTING_THREADS]; + int i; + + for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i) { + input[i].vm = sev_vm_create(/* es= */ false); + input[0].source_vms[i] = input[i].vm; + } + for (i = 1; i < NR_LOCK_TESTING_THREADS; ++i) + memcpy(input[i].source_vms, input[0].source_vms, + sizeof(input[i].source_vms)); + + for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i) + pthread_create(&pt[i], NULL, locking_test_thread, &input[i]); + + for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i) + pthread_join(pt[i], NULL); + for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i) + kvm_vm_free(input[i].vm); +} + +static void test_sev_migrate_parameters(void) +{ + struct kvm_vm *sev_vm, *sev_es_vm, *vm_no_vcpu, *vm_no_sev, + *sev_es_vm_no_vmsa; + int ret; + + vm_no_vcpu = vm_create_barebones(); + vm_no_sev = aux_vm_create(true); + ret = __sev_migrate_from(vm_no_vcpu, vm_no_sev); + TEST_ASSERT(ret == -1 && errno == EINVAL, + "Migrations require SEV enabled. ret %d, errno: %d", ret, + errno); + + if (!have_sev_es) + goto out; + + sev_vm = sev_vm_create(/* es= */ false); + sev_es_vm = sev_vm_create(/* es= */ true); + sev_es_vm_no_vmsa = vm_create_barebones(); + sev_es_vm_init(sev_es_vm_no_vmsa); + __vm_vcpu_add(sev_es_vm_no_vmsa, 1); + + ret = __sev_migrate_from(sev_vm, sev_es_vm); + TEST_ASSERT( + ret == -1 && errno == EINVAL, + "Should not be able migrate to SEV enabled VM. ret: %d, errno: %d", + ret, errno); + + ret = __sev_migrate_from(sev_es_vm, sev_vm); + TEST_ASSERT( + ret == -1 && errno == EINVAL, + "Should not be able migrate to SEV-ES enabled VM. ret: %d, errno: %d", + ret, errno); + + ret = __sev_migrate_from(vm_no_vcpu, sev_es_vm); + TEST_ASSERT( + ret == -1 && errno == EINVAL, + "SEV-ES migrations require same number of vCPUS. ret: %d, errno: %d", + ret, errno); + + ret = __sev_migrate_from(vm_no_vcpu, sev_es_vm_no_vmsa); + TEST_ASSERT( + ret == -1 && errno == EINVAL, + "SEV-ES migrations require UPDATE_VMSA. ret %d, errno: %d", + ret, errno); + + kvm_vm_free(sev_vm); + kvm_vm_free(sev_es_vm); + kvm_vm_free(sev_es_vm_no_vmsa); +out: + kvm_vm_free(vm_no_vcpu); + kvm_vm_free(vm_no_sev); +} + +static int __sev_mirror_create(struct kvm_vm *dst, struct kvm_vm *src) +{ + return __vm_enable_cap(dst, KVM_CAP_VM_COPY_ENC_CONTEXT_FROM, src->fd); +} + + +static void sev_mirror_create(struct kvm_vm *dst, struct kvm_vm *src) +{ + int ret; + + ret = __sev_mirror_create(dst, src); + TEST_ASSERT(!ret, "Copying context failed, ret: %d, errno: %d", ret, errno); +} + +static void verify_mirror_allowed_cmds(struct kvm_vm *vm) +{ + struct kvm_sev_guest_status status; + int cmd_id; + + for (cmd_id = KVM_SEV_INIT; cmd_id < KVM_SEV_NR_MAX; ++cmd_id) { + int ret; + + /* + * These commands are allowed for mirror VMs, all others are + * not. + */ + switch (cmd_id) { + case KVM_SEV_LAUNCH_UPDATE_VMSA: + case KVM_SEV_GUEST_STATUS: + case KVM_SEV_DBG_DECRYPT: + case KVM_SEV_DBG_ENCRYPT: + continue; + default: + break; + } + + /* + * These commands should be disallowed before the data + * parameter is examined so NULL is OK here. + */ + ret = __vm_sev_ioctl(vm, cmd_id, NULL); + TEST_ASSERT( + ret == -1 && errno == EINVAL, + "Should not be able call command: %d. ret: %d, errno: %d", + cmd_id, ret, errno); + } + + vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &status); +} + +static void test_sev_mirror(bool es) +{ + struct kvm_vm *src_vm, *dst_vm; + int i; + + src_vm = sev_vm_create(es); + dst_vm = aux_vm_create(false); + + sev_mirror_create(dst_vm, src_vm); + + /* Check that we can complete creation of the mirror VM. */ + for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i) + __vm_vcpu_add(dst_vm, i); + + if (es) + vm_sev_ioctl(dst_vm, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL); + + verify_mirror_allowed_cmds(dst_vm); + + kvm_vm_free(src_vm); + kvm_vm_free(dst_vm); +} + +static void test_sev_mirror_parameters(void) +{ + struct kvm_vm *sev_vm, *sev_es_vm, *vm_no_vcpu, *vm_with_vcpu; + int ret; + + sev_vm = sev_vm_create(/* es= */ false); + vm_with_vcpu = aux_vm_create(true); + vm_no_vcpu = aux_vm_create(false); + + ret = __sev_mirror_create(sev_vm, sev_vm); + TEST_ASSERT( + ret == -1 && errno == EINVAL, + "Should not be able copy context to self. ret: %d, errno: %d", + ret, errno); + + ret = __sev_mirror_create(vm_no_vcpu, vm_with_vcpu); + TEST_ASSERT(ret == -1 && errno == EINVAL, + "Copy context requires SEV enabled. ret %d, errno: %d", ret, + errno); + + ret = __sev_mirror_create(vm_with_vcpu, sev_vm); + TEST_ASSERT( + ret == -1 && errno == EINVAL, + "SEV copy context requires no vCPUS on the destination. ret: %d, errno: %d", + ret, errno); + + if (!have_sev_es) + goto out; + + sev_es_vm = sev_vm_create(/* es= */ true); + ret = __sev_mirror_create(sev_vm, sev_es_vm); + TEST_ASSERT( + ret == -1 && errno == EINVAL, + "Should not be able copy context to SEV enabled VM. ret: %d, errno: %d", + ret, errno); + + ret = __sev_mirror_create(sev_es_vm, sev_vm); + TEST_ASSERT( + ret == -1 && errno == EINVAL, + "Should not be able copy context to SEV-ES enabled VM. ret: %d, errno: %d", + ret, errno); + + kvm_vm_free(sev_es_vm); + +out: + kvm_vm_free(sev_vm); + kvm_vm_free(vm_with_vcpu); + kvm_vm_free(vm_no_vcpu); +} + +static void test_sev_move_copy(void) +{ + struct kvm_vm *dst_vm, *dst2_vm, *dst3_vm, *sev_vm, *mirror_vm, + *dst_mirror_vm, *dst2_mirror_vm, *dst3_mirror_vm; + + sev_vm = sev_vm_create(/* es= */ false); + dst_vm = aux_vm_create(true); + dst2_vm = aux_vm_create(true); + dst3_vm = aux_vm_create(true); + mirror_vm = aux_vm_create(false); + dst_mirror_vm = aux_vm_create(false); + dst2_mirror_vm = aux_vm_create(false); + dst3_mirror_vm = aux_vm_create(false); + + sev_mirror_create(mirror_vm, sev_vm); + + sev_migrate_from(dst_mirror_vm, mirror_vm); + sev_migrate_from(dst_vm, sev_vm); + + sev_migrate_from(dst2_vm, dst_vm); + sev_migrate_from(dst2_mirror_vm, dst_mirror_vm); + + sev_migrate_from(dst3_mirror_vm, dst2_mirror_vm); + sev_migrate_from(dst3_vm, dst2_vm); + + kvm_vm_free(dst_vm); + kvm_vm_free(sev_vm); + kvm_vm_free(dst2_vm); + kvm_vm_free(dst3_vm); + kvm_vm_free(mirror_vm); + kvm_vm_free(dst_mirror_vm); + kvm_vm_free(dst2_mirror_vm); + kvm_vm_free(dst3_mirror_vm); + + /* + * Run similar test be destroy mirrors before mirrored VMs to ensure + * destruction is done safely. + */ + sev_vm = sev_vm_create(/* es= */ false); + dst_vm = aux_vm_create(true); + mirror_vm = aux_vm_create(false); + dst_mirror_vm = aux_vm_create(false); + + sev_mirror_create(mirror_vm, sev_vm); + + sev_migrate_from(dst_mirror_vm, mirror_vm); + sev_migrate_from(dst_vm, sev_vm); + + kvm_vm_free(mirror_vm); + kvm_vm_free(dst_mirror_vm); + kvm_vm_free(dst_vm); + kvm_vm_free(sev_vm); +} + +int main(int argc, char *argv[]) +{ + TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM)); + TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM)); + + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SEV)); + + have_sev_es = kvm_cpu_has(X86_FEATURE_SEV_ES); + + if (kvm_has_cap(KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM)) { + test_sev_migrate_from(/* es= */ false); + if (have_sev_es) + test_sev_migrate_from(/* es= */ true); + test_sev_migrate_locking(); + test_sev_migrate_parameters(); + if (kvm_has_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM)) + test_sev_move_copy(); + } + if (kvm_has_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM)) { + test_sev_mirror(/* es= */ false); + if (have_sev_es) + test_sev_mirror(/* es= */ true); + test_sev_mirror_parameters(); + } + return 0; +} diff --git a/tools/testing/selftests/kvm/x86/sev_smoke_test.c b/tools/testing/selftests/kvm/x86/sev_smoke_test.c new file mode 100644 index 000000000000..ae77698e6e97 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/sev_smoke_test.c @@ -0,0 +1,205 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include +#include +#include +#include +#include + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "svm_util.h" +#include "linux/psp-sev.h" +#include "sev.h" + + +#define XFEATURE_MASK_X87_AVX (XFEATURE_MASK_FP | XFEATURE_MASK_SSE | XFEATURE_MASK_YMM) + +static void guest_sev_es_code(void) +{ + /* TODO: Check CPUID after GHCB-based hypercall support is added. */ + GUEST_ASSERT(rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ENABLED); + GUEST_ASSERT(rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ES_ENABLED); + + /* + * TODO: Add GHCB and ucall support for SEV-ES guests. For now, simply + * force "termination" to signal "done" via the GHCB MSR protocol. + */ + wrmsr(MSR_AMD64_SEV_ES_GHCB, GHCB_MSR_TERM_REQ); + __asm__ __volatile__("rep; vmmcall"); +} + +static void guest_sev_code(void) +{ + GUEST_ASSERT(this_cpu_has(X86_FEATURE_SEV)); + GUEST_ASSERT(rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ENABLED); + + GUEST_DONE(); +} + +/* Stash state passed via VMSA before any compiled code runs. */ +extern void guest_code_xsave(void); +asm("guest_code_xsave:\n" + "mov $" __stringify(XFEATURE_MASK_X87_AVX) ", %eax\n" + "xor %edx, %edx\n" + "xsave (%rdi)\n" + "jmp guest_sev_es_code"); + +static void compare_xsave(u8 *from_host, u8 *from_guest) +{ + int i; + bool bad = false; + for (i = 0; i < 4095; i++) { + if (from_host[i] != from_guest[i]) { + printf("mismatch at %02hhx | %02hhx %02hhx\n", i, from_host[i], from_guest[i]); + bad = true; + } + } + + if (bad) + abort(); +} + +static void test_sync_vmsa(uint32_t policy) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + vm_vaddr_t gva; + void *hva; + + double x87val = M_PI; + struct kvm_xsave __attribute__((aligned(64))) xsave = { 0 }; + + vm = vm_sev_create_with_one_vcpu(KVM_X86_SEV_ES_VM, guest_code_xsave, &vcpu); + gva = vm_vaddr_alloc_shared(vm, PAGE_SIZE, KVM_UTIL_MIN_VADDR, + MEM_REGION_TEST_DATA); + hva = addr_gva2hva(vm, gva); + + vcpu_args_set(vcpu, 1, gva); + + asm("fninit\n" + "vpcmpeqb %%ymm4, %%ymm4, %%ymm4\n" + "fldl %3\n" + "xsave (%2)\n" + "fstp %%st\n" + : "=m"(xsave) + : "A"(XFEATURE_MASK_X87_AVX), "r"(&xsave), "m" (x87val) + : "ymm4", "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)"); + vcpu_xsave_set(vcpu, &xsave); + + vm_sev_launch(vm, SEV_POLICY_ES | policy, NULL); + + /* This page is shared, so make it decrypted. */ + memset(hva, 0, 4096); + + vcpu_run(vcpu); + + TEST_ASSERT(vcpu->run->exit_reason == KVM_EXIT_SYSTEM_EVENT, + "Wanted SYSTEM_EVENT, got %s", + exit_reason_str(vcpu->run->exit_reason)); + TEST_ASSERT_EQ(vcpu->run->system_event.type, KVM_SYSTEM_EVENT_SEV_TERM); + TEST_ASSERT_EQ(vcpu->run->system_event.ndata, 1); + TEST_ASSERT_EQ(vcpu->run->system_event.data[0], GHCB_MSR_TERM_REQ); + + compare_xsave((u8 *)&xsave, (u8 *)hva); + + kvm_vm_free(vm); +} + +static void test_sev(void *guest_code, uint64_t policy) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + struct ucall uc; + + uint32_t type = policy & SEV_POLICY_ES ? KVM_X86_SEV_ES_VM : KVM_X86_SEV_VM; + + vm = vm_sev_create_with_one_vcpu(type, guest_code, &vcpu); + + /* TODO: Validate the measurement is as expected. */ + vm_sev_launch(vm, policy, NULL); + + for (;;) { + vcpu_run(vcpu); + + if (policy & SEV_POLICY_ES) { + TEST_ASSERT(vcpu->run->exit_reason == KVM_EXIT_SYSTEM_EVENT, + "Wanted SYSTEM_EVENT, got %s", + exit_reason_str(vcpu->run->exit_reason)); + TEST_ASSERT_EQ(vcpu->run->system_event.type, KVM_SYSTEM_EVENT_SEV_TERM); + TEST_ASSERT_EQ(vcpu->run->system_event.ndata, 1); + TEST_ASSERT_EQ(vcpu->run->system_event.data[0], GHCB_MSR_TERM_REQ); + break; + } + + switch (get_ucall(vcpu, &uc)) { + case UCALL_SYNC: + continue; + case UCALL_DONE: + return; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + default: + TEST_FAIL("Unexpected exit: %s", + exit_reason_str(vcpu->run->exit_reason)); + } + } + + kvm_vm_free(vm); +} + +static void guest_shutdown_code(void) +{ + struct desc_ptr idt; + + /* Clobber the IDT so that #UD is guaranteed to trigger SHUTDOWN. */ + memset(&idt, 0, sizeof(idt)); + __asm__ __volatile__("lidt %0" :: "m"(idt)); + + __asm__ __volatile__("ud2"); +} + +static void test_sev_es_shutdown(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + uint32_t type = KVM_X86_SEV_ES_VM; + + vm = vm_sev_create_with_one_vcpu(type, guest_shutdown_code, &vcpu); + + vm_sev_launch(vm, SEV_POLICY_ES, NULL); + + vcpu_run(vcpu); + TEST_ASSERT(vcpu->run->exit_reason == KVM_EXIT_SHUTDOWN, + "Wanted SHUTDOWN, got %s", + exit_reason_str(vcpu->run->exit_reason)); + + kvm_vm_free(vm); +} + +int main(int argc, char *argv[]) +{ + const u64 xf_mask = XFEATURE_MASK_X87_AVX; + + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SEV)); + + test_sev(guest_sev_code, SEV_POLICY_NO_DBG); + test_sev(guest_sev_code, 0); + + if (kvm_cpu_has(X86_FEATURE_SEV_ES)) { + test_sev(guest_sev_es_code, SEV_POLICY_ES | SEV_POLICY_NO_DBG); + test_sev(guest_sev_es_code, SEV_POLICY_ES); + + test_sev_es_shutdown(); + + if (kvm_has_cap(KVM_CAP_XCRS) && + (xgetbv(0) & kvm_cpu_supported_xcr0() & xf_mask) == xf_mask) { + test_sync_vmsa(0); + test_sync_vmsa(SEV_POLICY_NO_DBG); + } + } + + return 0; +} diff --git a/tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c b/tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c new file mode 100644 index 000000000000..fabeeaddfb3a --- /dev/null +++ b/tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020, Google LLC. + * + * Test that KVM emulates instructions in response to EPT violations when + * allow_smaller_maxphyaddr is enabled and guest.MAXPHYADDR < host.MAXPHYADDR. + */ +#include "flds_emulation.h" + +#include "test_util.h" +#include "kvm_util.h" +#include "vmx.h" + +#define MAXPHYADDR 36 + +#define MEM_REGION_GVA 0x0000123456789000 +#define MEM_REGION_GPA 0x0000000700000000 +#define MEM_REGION_SLOT 10 +#define MEM_REGION_SIZE PAGE_SIZE + +static void guest_code(bool tdp_enabled) +{ + uint64_t error_code; + uint64_t vector; + + vector = kvm_asm_safe_ec(FLDS_MEM_EAX, error_code, "a"(MEM_REGION_GVA)); + + /* + * When TDP is enabled, flds will trigger an emulation failure, exit to + * userspace, and then the selftest host "VMM" skips the instruction. + * + * When TDP is disabled, no instruction emulation is required so flds + * should generate #PF(RSVD). + */ + if (tdp_enabled) { + GUEST_ASSERT(!vector); + } else { + GUEST_ASSERT_EQ(vector, PF_VECTOR); + GUEST_ASSERT(error_code & PFERR_RSVD_MASK); + } + + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + struct ucall uc; + uint64_t *pte; + uint64_t *hva; + uint64_t gpa; + int rc; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_SMALLER_MAXPHYADDR)); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + vcpu_args_set(vcpu, 1, kvm_is_tdp_enabled()); + + vcpu_set_cpuid_property(vcpu, X86_PROPERTY_MAX_PHY_ADDR, MAXPHYADDR); + + rc = kvm_check_cap(KVM_CAP_EXIT_ON_EMULATION_FAILURE); + TEST_ASSERT(rc, "KVM_CAP_EXIT_ON_EMULATION_FAILURE is unavailable"); + vm_enable_cap(vm, KVM_CAP_EXIT_ON_EMULATION_FAILURE, 1); + + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, + MEM_REGION_GPA, MEM_REGION_SLOT, + MEM_REGION_SIZE / PAGE_SIZE, 0); + gpa = vm_phy_pages_alloc(vm, MEM_REGION_SIZE / PAGE_SIZE, + MEM_REGION_GPA, MEM_REGION_SLOT); + TEST_ASSERT(gpa == MEM_REGION_GPA, "Failed vm_phy_pages_alloc"); + virt_map(vm, MEM_REGION_GVA, MEM_REGION_GPA, 1); + hva = addr_gpa2hva(vm, MEM_REGION_GPA); + memset(hva, 0, PAGE_SIZE); + + pte = vm_get_page_table_entry(vm, MEM_REGION_GVA); + *pte |= BIT_ULL(MAXPHYADDR); + + vcpu_run(vcpu); + + /* + * When TDP is enabled, KVM must emulate in response the guest physical + * address that is illegal from the guest's perspective, but is legal + * from hardware's perspeective. This should result in an emulation + * failure exit to userspace since KVM doesn't support emulating flds. + */ + if (kvm_is_tdp_enabled()) { + handle_flds_emulation_failure_exit(vcpu); + vcpu_run(vcpu); + } + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + case UCALL_DONE: + break; + default: + TEST_FAIL("Unrecognized ucall: %lu", uc.cmd); + } + + kvm_vm_free(vm); + + return 0; +} diff --git a/tools/testing/selftests/kvm/x86/smm_test.c b/tools/testing/selftests/kvm/x86/smm_test.c new file mode 100644 index 000000000000..55c88d664a94 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/smm_test.c @@ -0,0 +1,209 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2018, Red Hat, Inc. + * + * Tests for SMM. + */ +#include +#include +#include +#include +#include +#include + +#include "test_util.h" + +#include "kvm_util.h" + +#include "vmx.h" +#include "svm_util.h" + +#define SMRAM_SIZE 65536 +#define SMRAM_MEMSLOT ((1 << 16) | 1) +#define SMRAM_PAGES (SMRAM_SIZE / PAGE_SIZE) +#define SMRAM_GPA 0x1000000 +#define SMRAM_STAGE 0xfe + +#define STR(x) #x +#define XSTR(s) STR(s) + +#define SYNC_PORT 0xe +#define DONE 0xff + +/* + * This is compiled as normal 64-bit code, however, SMI handler is executed + * in real-address mode. To stay simple we're limiting ourselves to a mode + * independent subset of asm here. + * SMI handler always report back fixed stage SMRAM_STAGE. + */ +uint8_t smi_handler[] = { + 0xb0, SMRAM_STAGE, /* mov $SMRAM_STAGE, %al */ + 0xe4, SYNC_PORT, /* in $SYNC_PORT, %al */ + 0x0f, 0xaa, /* rsm */ +}; + +static inline void sync_with_host(uint64_t phase) +{ + asm volatile("in $" XSTR(SYNC_PORT)", %%al \n" + : "+a" (phase)); +} + +static void self_smi(void) +{ + x2apic_write_reg(APIC_ICR, + APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_SMI); +} + +static void l2_guest_code(void) +{ + sync_with_host(8); + + sync_with_host(10); + + vmcall(); +} + +static void guest_code(void *arg) +{ + #define L2_GUEST_STACK_SIZE 64 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + uint64_t apicbase = rdmsr(MSR_IA32_APICBASE); + struct svm_test_data *svm = arg; + struct vmx_pages *vmx_pages = arg; + + sync_with_host(1); + + wrmsr(MSR_IA32_APICBASE, apicbase | X2APIC_ENABLE); + + sync_with_host(2); + + self_smi(); + + sync_with_host(4); + + if (arg) { + if (this_cpu_has(X86_FEATURE_SVM)) { + generic_svm_setup(svm, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + } else { + GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); + GUEST_ASSERT(load_vmcs(vmx_pages)); + prepare_vmcs(vmx_pages, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + } + + sync_with_host(5); + + self_smi(); + + sync_with_host(7); + + if (this_cpu_has(X86_FEATURE_SVM)) { + run_guest(svm->vmcb, svm->vmcb_gpa); + run_guest(svm->vmcb, svm->vmcb_gpa); + } else { + vmlaunch(); + vmresume(); + } + + /* Stages 8-11 are eaten by SMM (SMRAM_STAGE reported instead) */ + sync_with_host(12); + } + + sync_with_host(DONE); +} + +void inject_smi(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_events events; + + vcpu_events_get(vcpu, &events); + + events.smi.pending = 1; + events.flags |= KVM_VCPUEVENT_VALID_SMM; + + vcpu_events_set(vcpu, &events); +} + +int main(int argc, char *argv[]) +{ + vm_vaddr_t nested_gva = 0; + + struct kvm_vcpu *vcpu; + struct kvm_regs regs; + struct kvm_vm *vm; + struct kvm_x86_state *state; + int stage, stage_reported; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_SMM)); + + /* Create VM */ + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, SMRAM_GPA, + SMRAM_MEMSLOT, SMRAM_PAGES, 0); + TEST_ASSERT(vm_phy_pages_alloc(vm, SMRAM_PAGES, SMRAM_GPA, SMRAM_MEMSLOT) + == SMRAM_GPA, "could not allocate guest physical addresses?"); + + memset(addr_gpa2hva(vm, SMRAM_GPA), 0x0, SMRAM_SIZE); + memcpy(addr_gpa2hva(vm, SMRAM_GPA) + 0x8000, smi_handler, + sizeof(smi_handler)); + + vcpu_set_msr(vcpu, MSR_IA32_SMBASE, SMRAM_GPA); + + if (kvm_has_cap(KVM_CAP_NESTED_STATE)) { + if (kvm_cpu_has(X86_FEATURE_SVM)) + vcpu_alloc_svm(vm, &nested_gva); + else if (kvm_cpu_has(X86_FEATURE_VMX)) + vcpu_alloc_vmx(vm, &nested_gva); + } + + if (!nested_gva) + pr_info("will skip SMM test with VMX enabled\n"); + + vcpu_args_set(vcpu, 1, nested_gva); + + for (stage = 1;; stage++) { + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + memset(®s, 0, sizeof(regs)); + vcpu_regs_get(vcpu, ®s); + + stage_reported = regs.rax & 0xff; + + if (stage_reported == DONE) + goto done; + + TEST_ASSERT(stage_reported == stage || + stage_reported == SMRAM_STAGE, + "Unexpected stage: #%x, got %x", + stage, stage_reported); + + /* + * Enter SMM during L2 execution and check that we correctly + * return from it. Do not perform save/restore while in SMM yet. + */ + if (stage == 8) { + inject_smi(vcpu); + continue; + } + + /* + * Perform save/restore while the guest is in SMM triggered + * during L2 execution. + */ + if (stage == 10) + inject_smi(vcpu); + + state = vcpu_save_state(vcpu); + kvm_vm_release(vm); + + vcpu = vm_recreate_with_one_vcpu(vm); + vcpu_load_state(vcpu, state); + kvm_x86_state_cleanup(state); + } + +done: + kvm_vm_free(vm); +} diff --git a/tools/testing/selftests/kvm/x86/state_test.c b/tools/testing/selftests/kvm/x86/state_test.c new file mode 100644 index 000000000000..141b7fc0c965 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/state_test.c @@ -0,0 +1,323 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * KVM_GET/SET_* tests + * + * Copyright (C) 2018, Red Hat, Inc. + * + * Tests for vCPU state save/restore, including nested guest state. + */ +#include +#include +#include +#include +#include + +#include "test_util.h" + +#include "kvm_util.h" +#include "processor.h" +#include "vmx.h" +#include "svm_util.h" + +#define L2_GUEST_STACK_SIZE 256 + +void svm_l2_guest_code(void) +{ + GUEST_SYNC(4); + /* Exit to L1 */ + vmcall(); + GUEST_SYNC(6); + /* Done, exit to L1 and never come back. */ + vmcall(); +} + +static void svm_l1_guest_code(struct svm_test_data *svm) +{ + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + struct vmcb *vmcb = svm->vmcb; + + GUEST_ASSERT(svm->vmcb_gpa); + /* Prepare for L2 execution. */ + generic_svm_setup(svm, svm_l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + GUEST_SYNC(3); + run_guest(vmcb, svm->vmcb_gpa); + GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL); + GUEST_SYNC(5); + vmcb->save.rip += 3; + run_guest(vmcb, svm->vmcb_gpa); + GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL); + GUEST_SYNC(7); +} + +void vmx_l2_guest_code(void) +{ + GUEST_SYNC(6); + + /* Exit to L1 */ + vmcall(); + + /* L1 has now set up a shadow VMCS for us. */ + GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee); + GUEST_SYNC(10); + GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee); + GUEST_ASSERT(!vmwrite(GUEST_RIP, 0xc0fffee)); + GUEST_SYNC(11); + GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0fffee); + GUEST_ASSERT(!vmwrite(GUEST_RIP, 0xc0ffffee)); + GUEST_SYNC(12); + + /* Done, exit to L1 and never come back. */ + vmcall(); +} + +static void vmx_l1_guest_code(struct vmx_pages *vmx_pages) +{ + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + + GUEST_ASSERT(vmx_pages->vmcs_gpa); + GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); + GUEST_SYNC(3); + GUEST_ASSERT(load_vmcs(vmx_pages)); + GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa); + + GUEST_SYNC(4); + GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa); + + prepare_vmcs(vmx_pages, vmx_l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + GUEST_SYNC(5); + GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa); + GUEST_ASSERT(!vmlaunch()); + GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + + /* Check that the launched state is preserved. */ + GUEST_ASSERT(vmlaunch()); + + GUEST_ASSERT(!vmresume()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + + GUEST_SYNC(7); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + + GUEST_ASSERT(!vmresume()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + + vmwrite(GUEST_RIP, vmreadz(GUEST_RIP) + 3); + + vmwrite(SECONDARY_VM_EXEC_CONTROL, SECONDARY_EXEC_SHADOW_VMCS); + vmwrite(VMCS_LINK_POINTER, vmx_pages->shadow_vmcs_gpa); + + GUEST_ASSERT(!vmptrld(vmx_pages->shadow_vmcs_gpa)); + GUEST_ASSERT(vmlaunch()); + GUEST_SYNC(8); + GUEST_ASSERT(vmlaunch()); + GUEST_ASSERT(vmresume()); + + vmwrite(GUEST_RIP, 0xc0ffee); + GUEST_SYNC(9); + GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee); + + GUEST_ASSERT(!vmptrld(vmx_pages->vmcs_gpa)); + GUEST_ASSERT(!vmresume()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + + GUEST_ASSERT(!vmptrld(vmx_pages->shadow_vmcs_gpa)); + GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffffee); + GUEST_ASSERT(vmlaunch()); + GUEST_ASSERT(vmresume()); + GUEST_SYNC(13); + GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffffee); + GUEST_ASSERT(vmlaunch()); + GUEST_ASSERT(vmresume()); +} + +static void __attribute__((__flatten__)) guest_code(void *arg) +{ + GUEST_SYNC(1); + + if (this_cpu_has(X86_FEATURE_XSAVE)) { + uint64_t supported_xcr0 = this_cpu_supported_xcr0(); + uint8_t buffer[4096]; + + memset(buffer, 0xcc, sizeof(buffer)); + + /* + * Modify state for all supported xfeatures to take them out of + * their "init" state, i.e. to make them show up in XSTATE_BV. + * + * Note off-by-default features, e.g. AMX, are out of scope for + * this particular testcase as they have a different ABI. + */ + GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_FP); + asm volatile ("fincstp"); + + GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_SSE); + asm volatile ("vmovdqu %0, %%xmm0" :: "m" (buffer)); + + if (supported_xcr0 & XFEATURE_MASK_YMM) + asm volatile ("vmovdqu %0, %%ymm0" :: "m" (buffer)); + + if (supported_xcr0 & XFEATURE_MASK_AVX512) { + asm volatile ("kmovq %0, %%k1" :: "r" (-1ull)); + asm volatile ("vmovupd %0, %%zmm0" :: "m" (buffer)); + asm volatile ("vmovupd %0, %%zmm16" :: "m" (buffer)); + } + + if (this_cpu_has(X86_FEATURE_MPX)) { + uint64_t bounds[2] = { 10, 0xffffffffull }; + uint64_t output[2] = { }; + + GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDREGS); + GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDCSR); + + /* + * Don't bother trying to get BNDCSR into the INUSE + * state. MSR_IA32_BNDCFGS doesn't count as it isn't + * managed via XSAVE/XRSTOR, and BNDCFGU can only be + * modified by XRSTOR. Stuffing XSTATE_BV in the host + * is simpler than doing XRSTOR here in the guest. + * + * However, temporarily enable MPX in BNDCFGS so that + * BNDMOV actually loads BND1. If MPX isn't *fully* + * enabled, all MPX instructions are treated as NOPs. + * + * Hand encode "bndmov (%rax),%bnd1" as support for MPX + * mnemonics/registers has been removed from gcc and + * clang (and was never fully supported by clang). + */ + wrmsr(MSR_IA32_BNDCFGS, BIT_ULL(0)); + asm volatile (".byte 0x66,0x0f,0x1a,0x08" :: "a" (bounds)); + /* + * Hand encode "bndmov %bnd1, (%rax)" to sanity check + * that BND1 actually got loaded. + */ + asm volatile (".byte 0x66,0x0f,0x1b,0x08" :: "a" (output)); + wrmsr(MSR_IA32_BNDCFGS, 0); + + GUEST_ASSERT_EQ(bounds[0], output[0]); + GUEST_ASSERT_EQ(bounds[1], output[1]); + } + if (this_cpu_has(X86_FEATURE_PKU)) { + GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_PKRU); + set_cr4(get_cr4() | X86_CR4_PKE); + GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSPKE)); + + wrpkru(-1u); + } + } + + GUEST_SYNC(2); + + if (arg) { + if (this_cpu_has(X86_FEATURE_SVM)) + svm_l1_guest_code(arg); + else + vmx_l1_guest_code(arg); + } + + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + uint64_t *xstate_bv, saved_xstate_bv; + vm_vaddr_t nested_gva = 0; + struct kvm_cpuid2 empty_cpuid = {}; + struct kvm_regs regs1, regs2; + struct kvm_vcpu *vcpu, *vcpuN; + struct kvm_vm *vm; + struct kvm_x86_state *state; + struct ucall uc; + int stage; + + /* Create VM */ + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + vcpu_regs_get(vcpu, ®s1); + + if (kvm_has_cap(KVM_CAP_NESTED_STATE)) { + if (kvm_cpu_has(X86_FEATURE_SVM)) + vcpu_alloc_svm(vm, &nested_gva); + else if (kvm_cpu_has(X86_FEATURE_VMX)) + vcpu_alloc_vmx(vm, &nested_gva); + } + + if (!nested_gva) + pr_info("will skip nested state checks\n"); + + vcpu_args_set(vcpu, 1, nested_gva); + + for (stage = 1;; stage++) { + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + /* NOT REACHED */ + case UCALL_SYNC: + break; + case UCALL_DONE: + goto done; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + + /* UCALL_SYNC is handled here. */ + TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") && + uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx", + stage, (ulong)uc.args[1]); + + state = vcpu_save_state(vcpu); + memset(®s1, 0, sizeof(regs1)); + vcpu_regs_get(vcpu, ®s1); + + kvm_vm_release(vm); + + /* Restore state in a new VM. */ + vcpu = vm_recreate_with_one_vcpu(vm); + vcpu_load_state(vcpu, state); + + /* + * Restore XSAVE state in a dummy vCPU, first without doing + * KVM_SET_CPUID2, and then with an empty guest CPUID. Except + * for off-by-default xfeatures, e.g. AMX, KVM is supposed to + * allow KVM_SET_XSAVE regardless of guest CPUID. Manually + * load only XSAVE state, MSRs in particular have a much more + * convoluted ABI. + * + * Load two versions of XSAVE state: one with the actual guest + * XSAVE state, and one with all supported features forced "on" + * in xstate_bv, e.g. to ensure that KVM allows loading all + * supported features, even if something goes awry in saving + * the original snapshot. + */ + xstate_bv = (void *)&((uint8_t *)state->xsave->region)[512]; + saved_xstate_bv = *xstate_bv; + + vcpuN = __vm_vcpu_add(vm, vcpu->id + 1); + vcpu_xsave_set(vcpuN, state->xsave); + *xstate_bv = kvm_cpu_supported_xcr0(); + vcpu_xsave_set(vcpuN, state->xsave); + + vcpu_init_cpuid(vcpuN, &empty_cpuid); + vcpu_xsave_set(vcpuN, state->xsave); + *xstate_bv = saved_xstate_bv; + vcpu_xsave_set(vcpuN, state->xsave); + + kvm_x86_state_cleanup(state); + + memset(®s2, 0, sizeof(regs2)); + vcpu_regs_get(vcpu, ®s2); + TEST_ASSERT(!memcmp(®s1, ®s2, sizeof(regs2)), + "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx", + (ulong) regs2.rdi, (ulong) regs2.rsi); + } + +done: + kvm_vm_free(vm); +} diff --git a/tools/testing/selftests/kvm/x86/svm_int_ctl_test.c b/tools/testing/selftests/kvm/x86/svm_int_ctl_test.c new file mode 100644 index 000000000000..916e04248fbb --- /dev/null +++ b/tools/testing/selftests/kvm/x86/svm_int_ctl_test.c @@ -0,0 +1,118 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * svm_int_ctl_test + * + * Copyright (C) 2021, Red Hat, Inc. + * + * Nested SVM testing: test simultaneous use of V_IRQ from L1 and L0. + */ + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "svm_util.h" +#include "apic.h" + +bool vintr_irq_called; +bool intr_irq_called; + +#define VINTR_IRQ_NUMBER 0x20 +#define INTR_IRQ_NUMBER 0x30 + +static void vintr_irq_handler(struct ex_regs *regs) +{ + vintr_irq_called = true; +} + +static void intr_irq_handler(struct ex_regs *regs) +{ + x2apic_write_reg(APIC_EOI, 0x00); + intr_irq_called = true; +} + +static void l2_guest_code(struct svm_test_data *svm) +{ + /* This code raises interrupt INTR_IRQ_NUMBER in the L1's LAPIC, + * and since L1 didn't enable virtual interrupt masking, + * L2 should receive it and not L1. + * + * L2 also has virtual interrupt 'VINTR_IRQ_NUMBER' pending in V_IRQ + * so it should also receive it after the following 'sti'. + */ + x2apic_write_reg(APIC_ICR, + APIC_DEST_SELF | APIC_INT_ASSERT | INTR_IRQ_NUMBER); + + __asm__ __volatile__( + "sti\n" + "nop\n" + ); + + GUEST_ASSERT(vintr_irq_called); + GUEST_ASSERT(intr_irq_called); + + __asm__ __volatile__( + "vmcall\n" + ); +} + +static void l1_guest_code(struct svm_test_data *svm) +{ + #define L2_GUEST_STACK_SIZE 64 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + struct vmcb *vmcb = svm->vmcb; + + x2apic_enable(); + + /* Prepare for L2 execution. */ + generic_svm_setup(svm, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + /* No virtual interrupt masking */ + vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK; + + /* No intercepts for real and virtual interrupts */ + vmcb->control.intercept &= ~(BIT(INTERCEPT_INTR) | BIT(INTERCEPT_VINTR)); + + /* Make a virtual interrupt VINTR_IRQ_NUMBER pending */ + vmcb->control.int_ctl |= V_IRQ_MASK | (0x1 << V_INTR_PRIO_SHIFT); + vmcb->control.int_vector = VINTR_IRQ_NUMBER; + + run_guest(vmcb, svm->vmcb_gpa); + GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL); + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + vm_vaddr_t svm_gva; + struct kvm_vm *vm; + struct ucall uc; + + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM)); + + vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); + + vm_install_exception_handler(vm, VINTR_IRQ_NUMBER, vintr_irq_handler); + vm_install_exception_handler(vm, INTR_IRQ_NUMBER, intr_irq_handler); + + vcpu_alloc_svm(vm, &svm_gva); + vcpu_args_set(vcpu, 1, svm_gva); + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + /* NOT REACHED */ + case UCALL_DONE: + goto done; + default: + TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd); + } +done: + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/x86/svm_nested_shutdown_test.c b/tools/testing/selftests/kvm/x86/svm_nested_shutdown_test.c new file mode 100644 index 000000000000..00135cbba35e --- /dev/null +++ b/tools/testing/selftests/kvm/x86/svm_nested_shutdown_test.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * svm_nested_shutdown_test + * + * Copyright (C) 2022, Red Hat, Inc. + * + * Nested SVM testing: test that unintercepted shutdown in L2 doesn't crash the host + */ + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "svm_util.h" + +static void l2_guest_code(struct svm_test_data *svm) +{ + __asm__ __volatile__("ud2"); +} + +static void l1_guest_code(struct svm_test_data *svm, struct idt_entry *idt) +{ + #define L2_GUEST_STACK_SIZE 64 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + struct vmcb *vmcb = svm->vmcb; + + generic_svm_setup(svm, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + vmcb->control.intercept &= ~(BIT(INTERCEPT_SHUTDOWN)); + + idt[6].p = 0; // #UD is intercepted but its injection will cause #NP + idt[11].p = 0; // #NP is not intercepted and will cause another + // #NP that will be converted to #DF + idt[8].p = 0; // #DF will cause #NP which will cause SHUTDOWN + + run_guest(vmcb, svm->vmcb_gpa); + + /* should not reach here */ + GUEST_ASSERT(0); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + vm_vaddr_t svm_gva; + struct kvm_vm *vm; + + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM)); + + vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); + vcpu_alloc_svm(vm, &svm_gva); + + vcpu_args_set(vcpu, 2, svm_gva, vm->arch.idt); + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_SHUTDOWN); + + kvm_vm_free(vm); +} diff --git a/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c b/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c new file mode 100644 index 000000000000..7b6481d6c0d3 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c @@ -0,0 +1,210 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2022 Oracle and/or its affiliates. + * + * Based on: + * svm_int_ctl_test + * + * Copyright (C) 2021, Red Hat, Inc. + * + */ +#include +#include +#include +#include "apic.h" +#include "kvm_util.h" +#include "processor.h" +#include "svm_util.h" +#include "test_util.h" + +#define INT_NR 0x20 + +static_assert(ATOMIC_INT_LOCK_FREE == 2, "atomic int is not lockless"); + +static unsigned int bp_fired; +static void guest_bp_handler(struct ex_regs *regs) +{ + bp_fired++; +} + +static unsigned int int_fired; +static void l2_guest_code_int(void); + +static void guest_int_handler(struct ex_regs *regs) +{ + int_fired++; + GUEST_ASSERT_EQ(regs->rip, (unsigned long)l2_guest_code_int); +} + +static void l2_guest_code_int(void) +{ + GUEST_ASSERT_EQ(int_fired, 1); + + /* + * Same as the vmmcall() function, but with a ud2 sneaked after the + * vmmcall. The caller injects an exception with the return address + * increased by 2, so the "pop rbp" must be after the ud2 and we cannot + * use vmmcall() directly. + */ + __asm__ __volatile__("push %%rbp; vmmcall; ud2; pop %%rbp" + : : "a"(0xdeadbeef), "c"(0xbeefdead) + : "rbx", "rdx", "rsi", "rdi", "r8", "r9", + "r10", "r11", "r12", "r13", "r14", "r15"); + + GUEST_ASSERT_EQ(bp_fired, 1); + hlt(); +} + +static atomic_int nmi_stage; +#define nmi_stage_get() atomic_load_explicit(&nmi_stage, memory_order_acquire) +#define nmi_stage_inc() atomic_fetch_add_explicit(&nmi_stage, 1, memory_order_acq_rel) +static void guest_nmi_handler(struct ex_regs *regs) +{ + nmi_stage_inc(); + + if (nmi_stage_get() == 1) { + vmmcall(); + GUEST_FAIL("Unexpected resume after VMMCALL"); + } else { + GUEST_ASSERT_EQ(nmi_stage_get(), 3); + GUEST_DONE(); + } +} + +static void l2_guest_code_nmi(void) +{ + ud2(); +} + +static void l1_guest_code(struct svm_test_data *svm, uint64_t is_nmi, uint64_t idt_alt) +{ + #define L2_GUEST_STACK_SIZE 64 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + struct vmcb *vmcb = svm->vmcb; + + if (is_nmi) + x2apic_enable(); + + /* Prepare for L2 execution. */ + generic_svm_setup(svm, + is_nmi ? l2_guest_code_nmi : l2_guest_code_int, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + vmcb->control.intercept_exceptions |= BIT(PF_VECTOR) | BIT(UD_VECTOR); + vmcb->control.intercept |= BIT(INTERCEPT_NMI) | BIT(INTERCEPT_HLT); + + if (is_nmi) { + vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI; + } else { + vmcb->control.event_inj = INT_NR | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_SOFT; + /* The return address pushed on stack */ + vmcb->control.next_rip = vmcb->save.rip; + } + + run_guest(vmcb, svm->vmcb_gpa); + __GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL, + "Expected VMMCAL #VMEXIT, got '0x%x', info1 = '0x%lx, info2 = '0x%lx'", + vmcb->control.exit_code, + vmcb->control.exit_info_1, vmcb->control.exit_info_2); + + if (is_nmi) { + clgi(); + x2apic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_NMI); + + GUEST_ASSERT_EQ(nmi_stage_get(), 1); + nmi_stage_inc(); + + stgi(); + /* self-NMI happens here */ + while (true) + cpu_relax(); + } + + /* Skip over VMMCALL */ + vmcb->save.rip += 3; + + /* Switch to alternate IDT to cause intervening NPF again */ + vmcb->save.idtr.base = idt_alt; + vmcb->control.clean = 0; /* &= ~BIT(VMCB_DT) would be enough */ + + vmcb->control.event_inj = BP_VECTOR | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_EXEPT; + /* The return address pushed on stack, skip over UD2 */ + vmcb->control.next_rip = vmcb->save.rip + 2; + + run_guest(vmcb, svm->vmcb_gpa); + __GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_HLT, + "Expected HLT #VMEXIT, got '0x%x', info1 = '0x%lx, info2 = '0x%lx'", + vmcb->control.exit_code, + vmcb->control.exit_info_1, vmcb->control.exit_info_2); + + GUEST_DONE(); +} + +static void run_test(bool is_nmi) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + vm_vaddr_t svm_gva; + vm_vaddr_t idt_alt_vm; + struct kvm_guest_debug debug; + + pr_info("Running %s test\n", is_nmi ? "NMI" : "soft int"); + + vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); + + vm_install_exception_handler(vm, NMI_VECTOR, guest_nmi_handler); + vm_install_exception_handler(vm, BP_VECTOR, guest_bp_handler); + vm_install_exception_handler(vm, INT_NR, guest_int_handler); + + vcpu_alloc_svm(vm, &svm_gva); + + if (!is_nmi) { + void *idt, *idt_alt; + + idt_alt_vm = vm_vaddr_alloc_page(vm); + idt_alt = addr_gva2hva(vm, idt_alt_vm); + idt = addr_gva2hva(vm, vm->arch.idt); + memcpy(idt_alt, idt, getpagesize()); + } else { + idt_alt_vm = 0; + } + vcpu_args_set(vcpu, 3, svm_gva, (uint64_t)is_nmi, (uint64_t)idt_alt_vm); + + memset(&debug, 0, sizeof(debug)); + vcpu_guest_debug_set(vcpu, &debug); + + struct ucall uc; + + alarm(2); + vcpu_run(vcpu); + alarm(0); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + /* NOT REACHED */ + case UCALL_DONE: + goto done; + default: + TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd); + } +done: + kvm_vm_free(vm); +} + +int main(int argc, char *argv[]) +{ + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM)); + + TEST_ASSERT(kvm_cpu_has(X86_FEATURE_NRIPS), + "KVM with nSVM is supposed to unconditionally advertise nRIP Save"); + + atomic_init(&nmi_stage, 0); + + run_test(false); + run_test(true); + + return 0; +} diff --git a/tools/testing/selftests/kvm/x86/svm_vmcall_test.c b/tools/testing/selftests/kvm/x86/svm_vmcall_test.c new file mode 100644 index 000000000000..8a62cca28cfb --- /dev/null +++ b/tools/testing/selftests/kvm/x86/svm_vmcall_test.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * svm_vmcall_test + * + * Copyright (C) 2020, Red Hat, Inc. + * + * Nested SVM testing: VMCALL + */ + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "svm_util.h" + +static void l2_guest_code(struct svm_test_data *svm) +{ + __asm__ __volatile__("vmcall"); +} + +static void l1_guest_code(struct svm_test_data *svm) +{ + #define L2_GUEST_STACK_SIZE 64 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + struct vmcb *vmcb = svm->vmcb; + + /* Prepare for L2 execution. */ + generic_svm_setup(svm, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + run_guest(vmcb, svm->vmcb_gpa); + + GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL); + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + vm_vaddr_t svm_gva; + struct kvm_vm *vm; + + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM)); + + vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); + + vcpu_alloc_svm(vm, &svm_gva); + vcpu_args_set(vcpu, 1, svm_gva); + + for (;;) { + struct ucall uc; + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + /* NOT REACHED */ + case UCALL_SYNC: + break; + case UCALL_DONE: + goto done; + default: + TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd); + } + } +done: + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/x86/sync_regs_test.c b/tools/testing/selftests/kvm/x86/sync_regs_test.c new file mode 100644 index 000000000000..8fa3948b0170 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/sync_regs_test.c @@ -0,0 +1,411 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Test for x86 KVM_CAP_SYNC_REGS + * + * Copyright (C) 2018, Google LLC. + * + * Verifies expected behavior of x86 KVM_CAP_SYNC_REGS functionality, + * including requesting an invalid register set, updates to/from values + * in kvm_run.s.regs when kvm_valid_regs and kvm_dirty_regs are toggled. + */ +#include +#include +#include +#include +#include +#include + +#include "kvm_test_harness.h" +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" + +#define UCALL_PIO_PORT ((uint16_t)0x1000) + +struct ucall uc_none = { + .cmd = UCALL_NONE, +}; + +/* + * ucall is embedded here to protect against compiler reshuffling registers + * before calling a function. In this test we only need to get KVM_EXIT_IO + * vmexit and preserve RBX, no additional information is needed. + */ +void guest_code(void) +{ + asm volatile("1: in %[port], %%al\n" + "add $0x1, %%rbx\n" + "jmp 1b" + : : [port] "d" (UCALL_PIO_PORT), "D" (&uc_none) + : "rax", "rbx"); +} + +KVM_ONE_VCPU_TEST_SUITE(sync_regs_test); + +static void compare_regs(struct kvm_regs *left, struct kvm_regs *right) +{ +#define REG_COMPARE(reg) \ + TEST_ASSERT(left->reg == right->reg, \ + "Register " #reg \ + " values did not match: 0x%llx, 0x%llx", \ + left->reg, right->reg) + REG_COMPARE(rax); + REG_COMPARE(rbx); + REG_COMPARE(rcx); + REG_COMPARE(rdx); + REG_COMPARE(rsi); + REG_COMPARE(rdi); + REG_COMPARE(rsp); + REG_COMPARE(rbp); + REG_COMPARE(r8); + REG_COMPARE(r9); + REG_COMPARE(r10); + REG_COMPARE(r11); + REG_COMPARE(r12); + REG_COMPARE(r13); + REG_COMPARE(r14); + REG_COMPARE(r15); + REG_COMPARE(rip); + REG_COMPARE(rflags); +#undef REG_COMPARE +} + +static void compare_sregs(struct kvm_sregs *left, struct kvm_sregs *right) +{ +} + +static void compare_vcpu_events(struct kvm_vcpu_events *left, + struct kvm_vcpu_events *right) +{ +} + +#define TEST_SYNC_FIELDS (KVM_SYNC_X86_REGS|KVM_SYNC_X86_SREGS|KVM_SYNC_X86_EVENTS) +#define INVALID_SYNC_FIELD 0x80000000 + +/* + * Set an exception as pending *and* injected while KVM is processing events. + * KVM is supposed to ignore/drop pending exceptions if userspace is also + * requesting that an exception be injected. + */ +static void *race_events_inj_pen(void *arg) +{ + struct kvm_run *run = (struct kvm_run *)arg; + struct kvm_vcpu_events *events = &run->s.regs.events; + + WRITE_ONCE(events->exception.nr, UD_VECTOR); + + for (;;) { + WRITE_ONCE(run->kvm_dirty_regs, KVM_SYNC_X86_EVENTS); + WRITE_ONCE(events->flags, 0); + WRITE_ONCE(events->exception.injected, 1); + WRITE_ONCE(events->exception.pending, 1); + + pthread_testcancel(); + } + + return NULL; +} + +/* + * Set an invalid exception vector while KVM is processing events. KVM is + * supposed to reject any vector >= 32, as well as NMIs (vector 2). + */ +static void *race_events_exc(void *arg) +{ + struct kvm_run *run = (struct kvm_run *)arg; + struct kvm_vcpu_events *events = &run->s.regs.events; + + for (;;) { + WRITE_ONCE(run->kvm_dirty_regs, KVM_SYNC_X86_EVENTS); + WRITE_ONCE(events->flags, 0); + WRITE_ONCE(events->exception.nr, UD_VECTOR); + WRITE_ONCE(events->exception.pending, 1); + WRITE_ONCE(events->exception.nr, 255); + + pthread_testcancel(); + } + + return NULL; +} + +/* + * Toggle CR4.PAE while KVM is processing SREGS, EFER.LME=1 with CR4.PAE=0 is + * illegal, and KVM's MMU heavily relies on vCPU state being valid. + */ +static noinline void *race_sregs_cr4(void *arg) +{ + struct kvm_run *run = (struct kvm_run *)arg; + __u64 *cr4 = &run->s.regs.sregs.cr4; + __u64 pae_enabled = *cr4; + __u64 pae_disabled = *cr4 & ~X86_CR4_PAE; + + for (;;) { + WRITE_ONCE(run->kvm_dirty_regs, KVM_SYNC_X86_SREGS); + WRITE_ONCE(*cr4, pae_enabled); + asm volatile(".rept 512\n\t" + "nop\n\t" + ".endr"); + WRITE_ONCE(*cr4, pae_disabled); + + pthread_testcancel(); + } + + return NULL; +} + +static void race_sync_regs(struct kvm_vcpu *vcpu, void *racer) +{ + const time_t TIMEOUT = 2; /* seconds, roughly */ + struct kvm_x86_state *state; + struct kvm_translation tr; + struct kvm_run *run; + pthread_t thread; + time_t t; + + run = vcpu->run; + + run->kvm_valid_regs = KVM_SYNC_X86_SREGS; + vcpu_run(vcpu); + run->kvm_valid_regs = 0; + + /* Save state *before* spawning the thread that mucks with vCPU state. */ + state = vcpu_save_state(vcpu); + + /* + * Selftests run 64-bit guests by default, both EFER.LME and CR4.PAE + * should already be set in guest state. + */ + TEST_ASSERT((run->s.regs.sregs.cr4 & X86_CR4_PAE) && + (run->s.regs.sregs.efer & EFER_LME), + "vCPU should be in long mode, CR4.PAE=%d, EFER.LME=%d", + !!(run->s.regs.sregs.cr4 & X86_CR4_PAE), + !!(run->s.regs.sregs.efer & EFER_LME)); + + TEST_ASSERT_EQ(pthread_create(&thread, NULL, racer, (void *)run), 0); + + for (t = time(NULL) + TIMEOUT; time(NULL) < t;) { + /* + * Reload known good state if the vCPU triple faults, e.g. due + * to the unhandled #GPs being injected. VMX preserves state + * on shutdown, but SVM synthesizes an INIT as the VMCB state + * is architecturally undefined on triple fault. + */ + if (!__vcpu_run(vcpu) && run->exit_reason == KVM_EXIT_SHUTDOWN) + vcpu_load_state(vcpu, state); + + if (racer == race_sregs_cr4) { + tr = (struct kvm_translation) { .linear_address = 0 }; + __vcpu_ioctl(vcpu, KVM_TRANSLATE, &tr); + } + } + + TEST_ASSERT_EQ(pthread_cancel(thread), 0); + TEST_ASSERT_EQ(pthread_join(thread, NULL), 0); + + kvm_x86_state_cleanup(state); +} + +KVM_ONE_VCPU_TEST(sync_regs_test, read_invalid, guest_code) +{ + struct kvm_run *run = vcpu->run; + int rv; + + /* Request reading invalid register set from VCPU. */ + run->kvm_valid_regs = INVALID_SYNC_FIELD; + rv = _vcpu_run(vcpu); + TEST_ASSERT(rv < 0 && errno == EINVAL, + "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d", + rv); + run->kvm_valid_regs = 0; + + run->kvm_valid_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS; + rv = _vcpu_run(vcpu); + TEST_ASSERT(rv < 0 && errno == EINVAL, + "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d", + rv); + run->kvm_valid_regs = 0; +} + +KVM_ONE_VCPU_TEST(sync_regs_test, set_invalid, guest_code) +{ + struct kvm_run *run = vcpu->run; + int rv; + + /* Request setting invalid register set into VCPU. */ + run->kvm_dirty_regs = INVALID_SYNC_FIELD; + rv = _vcpu_run(vcpu); + TEST_ASSERT(rv < 0 && errno == EINVAL, + "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d", + rv); + run->kvm_dirty_regs = 0; + + run->kvm_dirty_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS; + rv = _vcpu_run(vcpu); + TEST_ASSERT(rv < 0 && errno == EINVAL, + "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d", + rv); + run->kvm_dirty_regs = 0; +} + +KVM_ONE_VCPU_TEST(sync_regs_test, req_and_verify_all_valid, guest_code) +{ + struct kvm_run *run = vcpu->run; + struct kvm_vcpu_events events; + struct kvm_sregs sregs; + struct kvm_regs regs; + + /* Request and verify all valid register sets. */ + /* TODO: BUILD TIME CHECK: TEST_ASSERT(KVM_SYNC_X86_NUM_FIELDS != 3); */ + run->kvm_valid_regs = TEST_SYNC_FIELDS; + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + vcpu_regs_get(vcpu, ®s); + compare_regs(®s, &run->s.regs.regs); + + vcpu_sregs_get(vcpu, &sregs); + compare_sregs(&sregs, &run->s.regs.sregs); + + vcpu_events_get(vcpu, &events); + compare_vcpu_events(&events, &run->s.regs.events); +} + +KVM_ONE_VCPU_TEST(sync_regs_test, set_and_verify_various, guest_code) +{ + struct kvm_run *run = vcpu->run; + struct kvm_vcpu_events events; + struct kvm_sregs sregs; + struct kvm_regs regs; + + /* Run once to get register set */ + run->kvm_valid_regs = TEST_SYNC_FIELDS; + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + /* Set and verify various register values. */ + run->s.regs.regs.rbx = 0xBAD1DEA; + run->s.regs.sregs.apic_base = 1 << 11; + /* TODO run->s.regs.events.XYZ = ABC; */ + + run->kvm_valid_regs = TEST_SYNC_FIELDS; + run->kvm_dirty_regs = KVM_SYNC_X86_REGS | KVM_SYNC_X86_SREGS; + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + TEST_ASSERT(run->s.regs.regs.rbx == 0xBAD1DEA + 1, + "rbx sync regs value incorrect 0x%llx.", + run->s.regs.regs.rbx); + TEST_ASSERT(run->s.regs.sregs.apic_base == 1 << 11, + "apic_base sync regs value incorrect 0x%llx.", + run->s.regs.sregs.apic_base); + + vcpu_regs_get(vcpu, ®s); + compare_regs(®s, &run->s.regs.regs); + + vcpu_sregs_get(vcpu, &sregs); + compare_sregs(&sregs, &run->s.regs.sregs); + + vcpu_events_get(vcpu, &events); + compare_vcpu_events(&events, &run->s.regs.events); +} + +KVM_ONE_VCPU_TEST(sync_regs_test, clear_kvm_dirty_regs_bits, guest_code) +{ + struct kvm_run *run = vcpu->run; + + /* Clear kvm_dirty_regs bits, verify new s.regs values are + * overwritten with existing guest values. + */ + run->kvm_valid_regs = TEST_SYNC_FIELDS; + run->kvm_dirty_regs = 0; + run->s.regs.regs.rbx = 0xDEADBEEF; + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + TEST_ASSERT(run->s.regs.regs.rbx != 0xDEADBEEF, + "rbx sync regs value incorrect 0x%llx.", + run->s.regs.regs.rbx); +} + +KVM_ONE_VCPU_TEST(sync_regs_test, clear_kvm_valid_and_dirty_regs, guest_code) +{ + struct kvm_run *run = vcpu->run; + struct kvm_regs regs; + + /* Run once to get register set */ + run->kvm_valid_regs = TEST_SYNC_FIELDS; + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + /* Clear kvm_valid_regs bits and kvm_dirty_bits. + * Verify s.regs values are not overwritten with existing guest values + * and that guest values are not overwritten with kvm_sync_regs values. + */ + run->kvm_valid_regs = 0; + run->kvm_dirty_regs = 0; + run->s.regs.regs.rbx = 0xAAAA; + vcpu_regs_get(vcpu, ®s); + regs.rbx = 0xBAC0; + vcpu_regs_set(vcpu, ®s); + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + TEST_ASSERT(run->s.regs.regs.rbx == 0xAAAA, + "rbx sync regs value incorrect 0x%llx.", + run->s.regs.regs.rbx); + vcpu_regs_get(vcpu, ®s); + TEST_ASSERT(regs.rbx == 0xBAC0 + 1, + "rbx guest value incorrect 0x%llx.", + regs.rbx); +} + +KVM_ONE_VCPU_TEST(sync_regs_test, clear_kvm_valid_regs_bits, guest_code) +{ + struct kvm_run *run = vcpu->run; + struct kvm_regs regs; + + /* Run once to get register set */ + run->kvm_valid_regs = TEST_SYNC_FIELDS; + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + /* Clear kvm_valid_regs bits. Verify s.regs values are not overwritten + * with existing guest values but that guest values are overwritten + * with kvm_sync_regs values. + */ + run->kvm_valid_regs = 0; + run->kvm_dirty_regs = TEST_SYNC_FIELDS; + run->s.regs.regs.rbx = 0xBBBB; + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + TEST_ASSERT(run->s.regs.regs.rbx == 0xBBBB, + "rbx sync regs value incorrect 0x%llx.", + run->s.regs.regs.rbx); + vcpu_regs_get(vcpu, ®s); + TEST_ASSERT(regs.rbx == 0xBBBB + 1, + "rbx guest value incorrect 0x%llx.", + regs.rbx); +} + +KVM_ONE_VCPU_TEST(sync_regs_test, race_cr4, guest_code) +{ + race_sync_regs(vcpu, race_sregs_cr4); +} + +KVM_ONE_VCPU_TEST(sync_regs_test, race_exc, guest_code) +{ + race_sync_regs(vcpu, race_events_exc); +} + +KVM_ONE_VCPU_TEST(sync_regs_test, race_inj_pen, guest_code) +{ + race_sync_regs(vcpu, race_events_inj_pen); +} + +int main(int argc, char *argv[]) +{ + int cap; + + cap = kvm_check_cap(KVM_CAP_SYNC_REGS); + TEST_REQUIRE((cap & TEST_SYNC_FIELDS) == TEST_SYNC_FIELDS); + TEST_REQUIRE(!(cap & INVALID_SYNC_FIELD)); + + return test_harness_run(argc, argv); +} diff --git a/tools/testing/selftests/kvm/x86/triple_fault_event_test.c b/tools/testing/selftests/kvm/x86/triple_fault_event_test.c new file mode 100644 index 000000000000..56306a19144a --- /dev/null +++ b/tools/testing/selftests/kvm/x86/triple_fault_event_test.c @@ -0,0 +1,124 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "vmx.h" +#include "svm_util.h" + +#include +#include + +#include "kselftest.h" + +#define ARBITRARY_IO_PORT 0x2000 + +/* The virtual machine object. */ +static struct kvm_vm *vm; + +static void l2_guest_code(void) +{ + asm volatile("inb %%dx, %%al" + : : [port] "d" (ARBITRARY_IO_PORT) : "rax"); +} + +#define L2_GUEST_STACK_SIZE 64 +unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + +void l1_guest_code_vmx(struct vmx_pages *vmx) +{ + + GUEST_ASSERT(vmx->vmcs_gpa); + GUEST_ASSERT(prepare_for_vmx_operation(vmx)); + GUEST_ASSERT(load_vmcs(vmx)); + + prepare_vmcs(vmx, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + GUEST_ASSERT(!vmlaunch()); + /* L2 should triple fault after a triple fault event injected. */ + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_TRIPLE_FAULT); + GUEST_DONE(); +} + +void l1_guest_code_svm(struct svm_test_data *svm) +{ + struct vmcb *vmcb = svm->vmcb; + + generic_svm_setup(svm, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + /* don't intercept shutdown to test the case of SVM allowing to do so */ + vmcb->control.intercept &= ~(BIT(INTERCEPT_SHUTDOWN)); + + run_guest(vmcb, svm->vmcb_gpa); + + /* should not reach here, L1 should crash */ + GUEST_ASSERT(0); +} + +int main(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_run *run; + struct kvm_vcpu_events events; + struct ucall uc; + + bool has_vmx = kvm_cpu_has(X86_FEATURE_VMX); + bool has_svm = kvm_cpu_has(X86_FEATURE_SVM); + + TEST_REQUIRE(has_vmx || has_svm); + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_TRIPLE_FAULT_EVENT)); + + + if (has_vmx) { + vm_vaddr_t vmx_pages_gva; + + vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code_vmx); + vcpu_alloc_vmx(vm, &vmx_pages_gva); + vcpu_args_set(vcpu, 1, vmx_pages_gva); + } else { + vm_vaddr_t svm_gva; + + vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code_svm); + vcpu_alloc_svm(vm, &svm_gva); + vcpu_args_set(vcpu, 1, svm_gva); + } + + vm_enable_cap(vm, KVM_CAP_X86_TRIPLE_FAULT_EVENT, 1); + run = vcpu->run; + vcpu_run(vcpu); + + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + TEST_ASSERT(run->io.port == ARBITRARY_IO_PORT, + "Expected IN from port %d from L2, got port %d", + ARBITRARY_IO_PORT, run->io.port); + vcpu_events_get(vcpu, &events); + events.flags |= KVM_VCPUEVENT_VALID_TRIPLE_FAULT; + events.triple_fault.pending = true; + vcpu_events_set(vcpu, &events); + run->immediate_exit = true; + vcpu_run_complete_io(vcpu); + + vcpu_events_get(vcpu, &events); + TEST_ASSERT(events.flags & KVM_VCPUEVENT_VALID_TRIPLE_FAULT, + "Triple fault event invalid"); + TEST_ASSERT(events.triple_fault.pending, + "No triple fault pending"); + vcpu_run(vcpu); + + + if (has_svm) { + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_SHUTDOWN); + } else { + switch (get_ucall(vcpu, &uc)) { + case UCALL_DONE: + break; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + default: + TEST_FAIL("Unexpected ucall: %lu", uc.cmd); + } + } + return 0; +} diff --git a/tools/testing/selftests/kvm/x86/tsc_msrs_test.c b/tools/testing/selftests/kvm/x86/tsc_msrs_test.c new file mode 100644 index 000000000000..12b0964f4f13 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/tsc_msrs_test.c @@ -0,0 +1,161 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Tests for MSR_IA32_TSC and MSR_IA32_TSC_ADJUST. + * + * Copyright (C) 2020, Red Hat, Inc. + */ +#include +#include +#include "kvm_util.h" +#include "processor.h" + +#define UNITY (1ull << 30) +#define HOST_ADJUST (UNITY * 64) +#define GUEST_STEP (UNITY * 4) +#define ROUND(x) ((x + UNITY / 2) & -UNITY) +#define rounded_rdmsr(x) ROUND(rdmsr(x)) +#define rounded_host_rdmsr(x) ROUND(vcpu_get_msr(vcpu, x)) + +static void guest_code(void) +{ + u64 val = 0; + + GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), val); + GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val); + + /* Guest: writes to MSR_IA32_TSC affect both MSRs. */ + val = 1ull * GUEST_STEP; + wrmsr(MSR_IA32_TSC, val); + GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), val); + GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val); + + /* Guest: writes to MSR_IA32_TSC_ADJUST affect both MSRs. */ + GUEST_SYNC(2); + val = 2ull * GUEST_STEP; + wrmsr(MSR_IA32_TSC_ADJUST, val); + GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), val); + GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val); + + /* Host: setting the TSC offset. */ + GUEST_SYNC(3); + GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val); + GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val); + + /* + * Guest: writes to MSR_IA32_TSC_ADJUST do not destroy the + * host-side offset and affect both MSRs. + */ + GUEST_SYNC(4); + val = 3ull * GUEST_STEP; + wrmsr(MSR_IA32_TSC_ADJUST, val); + GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val); + GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val); + + /* + * Guest: writes to MSR_IA32_TSC affect both MSRs, so the host-side + * offset is now visible in MSR_IA32_TSC_ADJUST. + */ + GUEST_SYNC(5); + val = 4ull * GUEST_STEP; + wrmsr(MSR_IA32_TSC, val); + GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), val); + GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val - HOST_ADJUST); + + GUEST_DONE(); +} + +static void run_vcpu(struct kvm_vcpu *vcpu, int stage) +{ + struct ucall uc; + + vcpu_run(vcpu); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_SYNC: + if (!strcmp((const char *)uc.args[0], "hello") && + uc.args[1] == stage + 1) + ksft_test_result_pass("stage %d passed\n", stage + 1); + else + ksft_test_result_fail( + "stage %d: Unexpected register values vmexit, got %lx", + stage + 1, (ulong)uc.args[1]); + return; + case UCALL_DONE: + ksft_test_result_pass("stage %d passed\n", stage + 1); + return; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + default: + TEST_ASSERT(false, "Unexpected exit: %s", + exit_reason_str(vcpu->run->exit_reason)); + } +} + +int main(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + uint64_t val; + + ksft_print_header(); + ksft_set_plan(5); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + val = 0; + TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val); + TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); + + /* Guest: writes to MSR_IA32_TSC affect both MSRs. */ + run_vcpu(vcpu, 1); + val = 1ull * GUEST_STEP; + TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val); + TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); + + /* Guest: writes to MSR_IA32_TSC_ADJUST affect both MSRs. */ + run_vcpu(vcpu, 2); + val = 2ull * GUEST_STEP; + TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val); + TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); + + /* + * Host: writes to MSR_IA32_TSC set the host-side offset + * and therefore do not change MSR_IA32_TSC_ADJUST. + */ + vcpu_set_msr(vcpu, MSR_IA32_TSC, HOST_ADJUST + val); + TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val); + TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); + run_vcpu(vcpu, 3); + + /* Host: writes to MSR_IA32_TSC_ADJUST do not modify the TSC. */ + vcpu_set_msr(vcpu, MSR_IA32_TSC_ADJUST, UNITY * 123456); + TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val); + TEST_ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_TSC_ADJUST), UNITY * 123456); + + /* Restore previous value. */ + vcpu_set_msr(vcpu, MSR_IA32_TSC_ADJUST, val); + TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val); + TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); + + /* + * Guest: writes to MSR_IA32_TSC_ADJUST do not destroy the + * host-side offset and affect both MSRs. + */ + run_vcpu(vcpu, 4); + val = 3ull * GUEST_STEP; + TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val); + TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); + + /* + * Guest: writes to MSR_IA32_TSC affect both MSRs, so the host-side + * offset is now visible in MSR_IA32_TSC_ADJUST. + */ + run_vcpu(vcpu, 5); + val = 4ull * GUEST_STEP; + TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val); + TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val - HOST_ADJUST); + + kvm_vm_free(vm); + + ksft_finished(); /* Print results and exit() accordingly */ +} diff --git a/tools/testing/selftests/kvm/x86/tsc_scaling_sync.c b/tools/testing/selftests/kvm/x86/tsc_scaling_sync.c new file mode 100644 index 000000000000..59c7304f805e --- /dev/null +++ b/tools/testing/selftests/kvm/x86/tsc_scaling_sync.c @@ -0,0 +1,110 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright © 2021 Amazon.com, Inc. or its affiliates. + */ + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" + +#include +#include +#include +#include +#include + +#define NR_TEST_VCPUS 20 + +static struct kvm_vm *vm; +pthread_spinlock_t create_lock; + +#define TEST_TSC_KHZ 2345678UL +#define TEST_TSC_OFFSET 200000000 + +uint64_t tsc_sync; +static void guest_code(void) +{ + uint64_t start_tsc, local_tsc, tmp; + + start_tsc = rdtsc(); + do { + tmp = READ_ONCE(tsc_sync); + local_tsc = rdtsc(); + WRITE_ONCE(tsc_sync, local_tsc); + if (unlikely(local_tsc < tmp)) + GUEST_SYNC_ARGS(0, local_tsc, tmp, 0, 0); + + } while (local_tsc - start_tsc < 5000 * TEST_TSC_KHZ); + + GUEST_DONE(); +} + + +static void *run_vcpu(void *_cpu_nr) +{ + unsigned long vcpu_id = (unsigned long)_cpu_nr; + unsigned long failures = 0; + static bool first_cpu_done; + struct kvm_vcpu *vcpu; + + /* The kernel is fine, but vm_vcpu_add() needs locking */ + pthread_spin_lock(&create_lock); + + vcpu = vm_vcpu_add(vm, vcpu_id, guest_code); + + if (!first_cpu_done) { + first_cpu_done = true; + vcpu_set_msr(vcpu, MSR_IA32_TSC, TEST_TSC_OFFSET); + } + + pthread_spin_unlock(&create_lock); + + for (;;) { + struct ucall uc; + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_DONE: + goto out; + + case UCALL_SYNC: + printf("Guest %d sync %lx %lx %ld\n", vcpu->id, + uc.args[2], uc.args[3], uc.args[2] - uc.args[3]); + failures++; + break; + + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + } + out: + return (void *)failures; +} + +int main(int argc, char *argv[]) +{ + TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_TSC_CONTROL)); + + vm = vm_create(NR_TEST_VCPUS); + vm_ioctl(vm, KVM_SET_TSC_KHZ, (void *) TEST_TSC_KHZ); + + pthread_spin_init(&create_lock, PTHREAD_PROCESS_PRIVATE); + pthread_t cpu_threads[NR_TEST_VCPUS]; + unsigned long cpu; + for (cpu = 0; cpu < NR_TEST_VCPUS; cpu++) + pthread_create(&cpu_threads[cpu], NULL, run_vcpu, (void *)cpu); + + unsigned long failures = 0; + for (cpu = 0; cpu < NR_TEST_VCPUS; cpu++) { + void *this_cpu_failures; + pthread_join(cpu_threads[cpu], &this_cpu_failures); + failures += (unsigned long)this_cpu_failures; + } + + TEST_ASSERT(!failures, "TSC sync failed"); + pthread_spin_destroy(&create_lock); + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/x86/ucna_injection_test.c b/tools/testing/selftests/kvm/x86/ucna_injection_test.c new file mode 100644 index 000000000000..57f157c06b39 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/ucna_injection_test.c @@ -0,0 +1,295 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ucna_injection_test + * + * Copyright (C) 2022, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + * + * Test that user space can inject UnCorrectable No Action required (UCNA) + * memory errors to the guest. + * + * The test starts one vCPU with the MCG_CMCI_P enabled. It verifies that + * proper UCNA errors can be injected to a vCPU with MCG_CMCI_P and + * corresponding per-bank control register (MCI_CTL2) bit enabled. + * The test also checks that the UCNA errors get recorded in the + * Machine Check bank registers no matter the error signal interrupts get + * delivered into the guest or not. + * + */ +#include +#include +#include +#include + +#include "kvm_util.h" +#include "mce.h" +#include "processor.h" +#include "test_util.h" +#include "apic.h" + +#define SYNC_FIRST_UCNA 9 +#define SYNC_SECOND_UCNA 10 +#define SYNC_GP 11 +#define FIRST_UCNA_ADDR 0xdeadbeef +#define SECOND_UCNA_ADDR 0xcafeb0ba + +/* + * Vector for the CMCI interrupt. + * Value is arbitrary. Any value in 0x20-0xFF should work: + * https://wiki.osdev.org/Interrupt_Vector_Table + */ +#define CMCI_VECTOR 0xa9 + +#define UCNA_BANK 0x7 // IMC0 bank + +#define MCI_CTL2_RESERVED_BIT BIT_ULL(29) + +static uint64_t supported_mcg_caps; + +/* + * Record states about the injected UCNA. + * The variables started with the 'i_' prefixes are recorded in interrupt + * handler. Variables without the 'i_' prefixes are recorded in guest main + * execution thread. + */ +static volatile uint64_t i_ucna_rcvd; +static volatile uint64_t i_ucna_addr; +static volatile uint64_t ucna_addr; +static volatile uint64_t ucna_addr2; + +struct thread_params { + struct kvm_vcpu *vcpu; + uint64_t *p_i_ucna_rcvd; + uint64_t *p_i_ucna_addr; + uint64_t *p_ucna_addr; + uint64_t *p_ucna_addr2; +}; + +static void verify_apic_base_addr(void) +{ + uint64_t msr = rdmsr(MSR_IA32_APICBASE); + uint64_t base = GET_APIC_BASE(msr); + + GUEST_ASSERT(base == APIC_DEFAULT_GPA); +} + +static void ucna_injection_guest_code(void) +{ + uint64_t ctl2; + verify_apic_base_addr(); + xapic_enable(); + + /* Sets up the interrupt vector and enables per-bank CMCI sigaling. */ + xapic_write_reg(APIC_LVTCMCI, CMCI_VECTOR | APIC_DM_FIXED); + ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK)); + wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_CMCI_EN); + + /* Enables interrupt in guest. */ + asm volatile("sti"); + + /* Let user space inject the first UCNA */ + GUEST_SYNC(SYNC_FIRST_UCNA); + + ucna_addr = rdmsr(MSR_IA32_MCx_ADDR(UCNA_BANK)); + + /* Disables the per-bank CMCI signaling. */ + ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK)); + wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 & ~MCI_CTL2_CMCI_EN); + + /* Let the user space inject the second UCNA */ + GUEST_SYNC(SYNC_SECOND_UCNA); + + ucna_addr2 = rdmsr(MSR_IA32_MCx_ADDR(UCNA_BANK)); + GUEST_DONE(); +} + +static void cmci_disabled_guest_code(void) +{ + uint64_t ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK)); + wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_CMCI_EN); + + GUEST_DONE(); +} + +static void cmci_enabled_guest_code(void) +{ + uint64_t ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK)); + wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_RESERVED_BIT); + + GUEST_DONE(); +} + +static void guest_cmci_handler(struct ex_regs *regs) +{ + i_ucna_rcvd++; + i_ucna_addr = rdmsr(MSR_IA32_MCx_ADDR(UCNA_BANK)); + xapic_write_reg(APIC_EOI, 0); +} + +static void guest_gp_handler(struct ex_regs *regs) +{ + GUEST_SYNC(SYNC_GP); +} + +static void run_vcpu_expect_gp(struct kvm_vcpu *vcpu) +{ + struct ucall uc; + + vcpu_run(vcpu); + + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + TEST_ASSERT(get_ucall(vcpu, &uc) == UCALL_SYNC, + "Expect UCALL_SYNC"); + TEST_ASSERT(uc.args[1] == SYNC_GP, "#GP is expected."); + printf("vCPU received GP in guest.\n"); +} + +static void inject_ucna(struct kvm_vcpu *vcpu, uint64_t addr) { + /* + * A UCNA error is indicated with VAL=1, UC=1, PCC=0, S=0 and AR=0 in + * the IA32_MCi_STATUS register. + * MSCOD=1 (BIT[16] - MscodDataRdErr). + * MCACOD=0x0090 (Memory controller error format, channel 0) + */ + uint64_t status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN | + MCI_STATUS_MISCV | MCI_STATUS_ADDRV | 0x10090; + struct kvm_x86_mce mce = {}; + mce.status = status; + mce.mcg_status = 0; + /* + * MCM_ADDR_PHYS indicates the reported address is a physical address. + * Lowest 6 bits is the recoverable address LSB, i.e., the injected MCE + * is at 4KB granularity. + */ + mce.misc = (MCM_ADDR_PHYS << 6) | 0xc; + mce.addr = addr; + mce.bank = UCNA_BANK; + + vcpu_ioctl(vcpu, KVM_X86_SET_MCE, &mce); +} + +static void *run_ucna_injection(void *arg) +{ + struct thread_params *params = (struct thread_params *)arg; + struct ucall uc; + int old; + int r; + + r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old); + TEST_ASSERT(r == 0, + "pthread_setcanceltype failed with errno=%d", + r); + + vcpu_run(params->vcpu); + + TEST_ASSERT_KVM_EXIT_REASON(params->vcpu, KVM_EXIT_IO); + TEST_ASSERT(get_ucall(params->vcpu, &uc) == UCALL_SYNC, + "Expect UCALL_SYNC"); + TEST_ASSERT(uc.args[1] == SYNC_FIRST_UCNA, "Injecting first UCNA."); + + printf("Injecting first UCNA at %#x.\n", FIRST_UCNA_ADDR); + + inject_ucna(params->vcpu, FIRST_UCNA_ADDR); + vcpu_run(params->vcpu); + + TEST_ASSERT_KVM_EXIT_REASON(params->vcpu, KVM_EXIT_IO); + TEST_ASSERT(get_ucall(params->vcpu, &uc) == UCALL_SYNC, + "Expect UCALL_SYNC"); + TEST_ASSERT(uc.args[1] == SYNC_SECOND_UCNA, "Injecting second UCNA."); + + printf("Injecting second UCNA at %#x.\n", SECOND_UCNA_ADDR); + + inject_ucna(params->vcpu, SECOND_UCNA_ADDR); + vcpu_run(params->vcpu); + + TEST_ASSERT_KVM_EXIT_REASON(params->vcpu, KVM_EXIT_IO); + if (get_ucall(params->vcpu, &uc) == UCALL_ABORT) { + TEST_ASSERT(false, "vCPU assertion failure: %s.", + (const char *)uc.args[0]); + } + + return NULL; +} + +static void test_ucna_injection(struct kvm_vcpu *vcpu, struct thread_params *params) +{ + struct kvm_vm *vm = vcpu->vm; + params->vcpu = vcpu; + params->p_i_ucna_rcvd = (uint64_t *)addr_gva2hva(vm, (uint64_t)&i_ucna_rcvd); + params->p_i_ucna_addr = (uint64_t *)addr_gva2hva(vm, (uint64_t)&i_ucna_addr); + params->p_ucna_addr = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ucna_addr); + params->p_ucna_addr2 = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ucna_addr2); + + run_ucna_injection(params); + + TEST_ASSERT(*params->p_i_ucna_rcvd == 1, "Only first UCNA get signaled."); + TEST_ASSERT(*params->p_i_ucna_addr == FIRST_UCNA_ADDR, + "Only first UCNA reported addr get recorded via interrupt."); + TEST_ASSERT(*params->p_ucna_addr == FIRST_UCNA_ADDR, + "First injected UCNAs should get exposed via registers."); + TEST_ASSERT(*params->p_ucna_addr2 == SECOND_UCNA_ADDR, + "Second injected UCNAs should get exposed via registers."); + + printf("Test successful.\n" + "UCNA CMCI interrupts received: %ld\n" + "Last UCNA address received via CMCI: %lx\n" + "First UCNA address in vCPU thread: %lx\n" + "Second UCNA address in vCPU thread: %lx\n", + *params->p_i_ucna_rcvd, *params->p_i_ucna_addr, + *params->p_ucna_addr, *params->p_ucna_addr2); +} + +static void setup_mce_cap(struct kvm_vcpu *vcpu, bool enable_cmci_p) +{ + uint64_t mcg_caps = MCG_CTL_P | MCG_SER_P | MCG_LMCE_P | KVM_MAX_MCE_BANKS; + if (enable_cmci_p) + mcg_caps |= MCG_CMCI_P; + + mcg_caps &= supported_mcg_caps | MCG_CAP_BANKS_MASK; + vcpu_ioctl(vcpu, KVM_X86_SETUP_MCE, &mcg_caps); +} + +static struct kvm_vcpu *create_vcpu_with_mce_cap(struct kvm_vm *vm, uint32_t vcpuid, + bool enable_cmci_p, void *guest_code) +{ + struct kvm_vcpu *vcpu = vm_vcpu_add(vm, vcpuid, guest_code); + setup_mce_cap(vcpu, enable_cmci_p); + return vcpu; +} + +int main(int argc, char *argv[]) +{ + struct thread_params params; + struct kvm_vm *vm; + struct kvm_vcpu *ucna_vcpu; + struct kvm_vcpu *cmcidis_vcpu; + struct kvm_vcpu *cmci_vcpu; + + kvm_check_cap(KVM_CAP_MCE); + + vm = __vm_create(VM_SHAPE_DEFAULT, 3, 0); + + kvm_ioctl(vm->kvm_fd, KVM_X86_GET_MCE_CAP_SUPPORTED, + &supported_mcg_caps); + + if (!(supported_mcg_caps & MCG_CMCI_P)) { + print_skip("MCG_CMCI_P is not supported"); + exit(KSFT_SKIP); + } + + ucna_vcpu = create_vcpu_with_mce_cap(vm, 0, true, ucna_injection_guest_code); + cmcidis_vcpu = create_vcpu_with_mce_cap(vm, 1, false, cmci_disabled_guest_code); + cmci_vcpu = create_vcpu_with_mce_cap(vm, 2, true, cmci_enabled_guest_code); + + vm_install_exception_handler(vm, CMCI_VECTOR, guest_cmci_handler); + vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler); + + virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA); + + test_ucna_injection(ucna_vcpu, ¶ms); + run_vcpu_expect_gp(cmcidis_vcpu); + run_vcpu_expect_gp(cmci_vcpu); + + kvm_vm_free(vm); +} diff --git a/tools/testing/selftests/kvm/x86/userspace_io_test.c b/tools/testing/selftests/kvm/x86/userspace_io_test.c new file mode 100644 index 000000000000..9481cbcf284f --- /dev/null +++ b/tools/testing/selftests/kvm/x86/userspace_io_test.c @@ -0,0 +1,103 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include + +#include "test_util.h" + +#include "kvm_util.h" +#include "processor.h" + +static void guest_ins_port80(uint8_t *buffer, unsigned int count) +{ + unsigned long end; + + if (count == 2) + end = (unsigned long)buffer + 1; + else + end = (unsigned long)buffer + 8192; + + asm volatile("cld; rep; insb" : "+D"(buffer), "+c"(count) : "d"(0x80) : "memory"); + GUEST_ASSERT_EQ(count, 0); + GUEST_ASSERT_EQ((unsigned long)buffer, end); +} + +static void guest_code(void) +{ + uint8_t buffer[8192]; + int i; + + /* + * Special case tests. main() will adjust RCX 2 => 1 and 3 => 8192 to + * test that KVM doesn't explode when userspace modifies the "count" on + * a userspace I/O exit. KVM isn't required to play nice with the I/O + * itself as KVM doesn't support manipulating the count, it just needs + * to not explode or overflow a buffer. + */ + guest_ins_port80(buffer, 2); + guest_ins_port80(buffer, 3); + + /* Verify KVM fills the buffer correctly when not stuffing RCX. */ + memset(buffer, 0, sizeof(buffer)); + guest_ins_port80(buffer, 8192); + for (i = 0; i < 8192; i++) + __GUEST_ASSERT(buffer[i] == 0xaa, + "Expected '0xaa', got '0x%x' at buffer[%u]", + buffer[i], i); + + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_regs regs; + struct kvm_run *run; + struct kvm_vm *vm; + struct ucall uc; + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + run = vcpu->run; + + memset(®s, 0, sizeof(regs)); + + while (1) { + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + if (get_ucall(vcpu, &uc)) + break; + + TEST_ASSERT(run->io.port == 0x80, + "Expected I/O at port 0x80, got port 0x%x", run->io.port); + + /* + * Modify the rep string count in RCX: 2 => 1 and 3 => 8192. + * Note, this abuses KVM's batching of rep string I/O to avoid + * getting stuck in an infinite loop. That behavior isn't in + * scope from a testing perspective as it's not ABI in any way, + * i.e. it really is abusing internal KVM knowledge. + */ + vcpu_regs_get(vcpu, ®s); + if (regs.rcx == 2) + regs.rcx = 1; + if (regs.rcx == 3) + regs.rcx = 8192; + memset((void *)run + run->io.data_offset, 0xaa, 4096); + vcpu_regs_set(vcpu, ®s); + } + + switch (uc.cmd) { + case UCALL_DONE: + break; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c b/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c new file mode 100644 index 000000000000..32b2794b78fe --- /dev/null +++ b/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c @@ -0,0 +1,769 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020, Google LLC. + * + * Tests for exiting into userspace on registered MSRs + */ +#include + +#include "kvm_test_harness.h" +#include "test_util.h" +#include "kvm_util.h" +#include "vmx.h" + +#define MSR_NON_EXISTENT 0x474f4f00 + +static u64 deny_bits = 0; +struct kvm_msr_filter filter_allow = { + .flags = KVM_MSR_FILTER_DEFAULT_ALLOW, + .ranges = { + { + .flags = KVM_MSR_FILTER_READ | + KVM_MSR_FILTER_WRITE, + .nmsrs = 1, + /* Test an MSR the kernel knows about. */ + .base = MSR_IA32_XSS, + .bitmap = (uint8_t*)&deny_bits, + }, { + .flags = KVM_MSR_FILTER_READ | + KVM_MSR_FILTER_WRITE, + .nmsrs = 1, + /* Test an MSR the kernel doesn't know about. */ + .base = MSR_IA32_FLUSH_CMD, + .bitmap = (uint8_t*)&deny_bits, + }, { + .flags = KVM_MSR_FILTER_READ | + KVM_MSR_FILTER_WRITE, + .nmsrs = 1, + /* Test a fabricated MSR that no one knows about. */ + .base = MSR_NON_EXISTENT, + .bitmap = (uint8_t*)&deny_bits, + }, + }, +}; + +struct kvm_msr_filter filter_fs = { + .flags = KVM_MSR_FILTER_DEFAULT_ALLOW, + .ranges = { + { + .flags = KVM_MSR_FILTER_READ, + .nmsrs = 1, + .base = MSR_FS_BASE, + .bitmap = (uint8_t*)&deny_bits, + }, + }, +}; + +struct kvm_msr_filter filter_gs = { + .flags = KVM_MSR_FILTER_DEFAULT_ALLOW, + .ranges = { + { + .flags = KVM_MSR_FILTER_READ, + .nmsrs = 1, + .base = MSR_GS_BASE, + .bitmap = (uint8_t*)&deny_bits, + }, + }, +}; + +static uint64_t msr_non_existent_data; +static int guest_exception_count; +static u32 msr_reads, msr_writes; + +static u8 bitmap_00000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE]; +static u8 bitmap_00000000_write[KVM_MSR_FILTER_MAX_BITMAP_SIZE]; +static u8 bitmap_40000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE]; +static u8 bitmap_c0000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE]; +static u8 bitmap_c0000000_read[KVM_MSR_FILTER_MAX_BITMAP_SIZE]; +static u8 bitmap_deadbeef[1] = { 0x1 }; + +static void deny_msr(uint8_t *bitmap, u32 msr) +{ + u32 idx = msr & (KVM_MSR_FILTER_MAX_BITMAP_SIZE - 1); + + bitmap[idx / 8] &= ~(1 << (idx % 8)); +} + +static void prepare_bitmaps(void) +{ + memset(bitmap_00000000, 0xff, sizeof(bitmap_00000000)); + memset(bitmap_00000000_write, 0xff, sizeof(bitmap_00000000_write)); + memset(bitmap_40000000, 0xff, sizeof(bitmap_40000000)); + memset(bitmap_c0000000, 0xff, sizeof(bitmap_c0000000)); + memset(bitmap_c0000000_read, 0xff, sizeof(bitmap_c0000000_read)); + + deny_msr(bitmap_00000000_write, MSR_IA32_POWER_CTL); + deny_msr(bitmap_c0000000_read, MSR_SYSCALL_MASK); + deny_msr(bitmap_c0000000_read, MSR_GS_BASE); +} + +struct kvm_msr_filter filter_deny = { + .flags = KVM_MSR_FILTER_DEFAULT_DENY, + .ranges = { + { + .flags = KVM_MSR_FILTER_READ, + .base = 0x00000000, + .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE, + .bitmap = bitmap_00000000, + }, { + .flags = KVM_MSR_FILTER_WRITE, + .base = 0x00000000, + .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE, + .bitmap = bitmap_00000000_write, + }, { + .flags = KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE, + .base = 0x40000000, + .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE, + .bitmap = bitmap_40000000, + }, { + .flags = KVM_MSR_FILTER_READ, + .base = 0xc0000000, + .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE, + .bitmap = bitmap_c0000000_read, + }, { + .flags = KVM_MSR_FILTER_WRITE, + .base = 0xc0000000, + .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE, + .bitmap = bitmap_c0000000, + }, { + .flags = KVM_MSR_FILTER_WRITE | KVM_MSR_FILTER_READ, + .base = 0xdeadbeef, + .nmsrs = 1, + .bitmap = bitmap_deadbeef, + }, + }, +}; + +struct kvm_msr_filter no_filter_deny = { + .flags = KVM_MSR_FILTER_DEFAULT_ALLOW, +}; + +/* + * Note: Force test_rdmsr() to not be inlined to prevent the labels, + * rdmsr_start and rdmsr_end, from being defined multiple times. + */ +static noinline uint64_t test_rdmsr(uint32_t msr) +{ + uint32_t a, d; + + guest_exception_count = 0; + + __asm__ __volatile__("rdmsr_start: rdmsr; rdmsr_end:" : + "=a"(a), "=d"(d) : "c"(msr) : "memory"); + + return a | ((uint64_t) d << 32); +} + +/* + * Note: Force test_wrmsr() to not be inlined to prevent the labels, + * wrmsr_start and wrmsr_end, from being defined multiple times. + */ +static noinline void test_wrmsr(uint32_t msr, uint64_t value) +{ + uint32_t a = value; + uint32_t d = value >> 32; + + guest_exception_count = 0; + + __asm__ __volatile__("wrmsr_start: wrmsr; wrmsr_end:" :: + "a"(a), "d"(d), "c"(msr) : "memory"); +} + +extern char rdmsr_start, rdmsr_end; +extern char wrmsr_start, wrmsr_end; + +/* + * Note: Force test_em_rdmsr() to not be inlined to prevent the labels, + * rdmsr_start and rdmsr_end, from being defined multiple times. + */ +static noinline uint64_t test_em_rdmsr(uint32_t msr) +{ + uint32_t a, d; + + guest_exception_count = 0; + + __asm__ __volatile__(KVM_FEP "em_rdmsr_start: rdmsr; em_rdmsr_end:" : + "=a"(a), "=d"(d) : "c"(msr) : "memory"); + + return a | ((uint64_t) d << 32); +} + +/* + * Note: Force test_em_wrmsr() to not be inlined to prevent the labels, + * wrmsr_start and wrmsr_end, from being defined multiple times. + */ +static noinline void test_em_wrmsr(uint32_t msr, uint64_t value) +{ + uint32_t a = value; + uint32_t d = value >> 32; + + guest_exception_count = 0; + + __asm__ __volatile__(KVM_FEP "em_wrmsr_start: wrmsr; em_wrmsr_end:" :: + "a"(a), "d"(d), "c"(msr) : "memory"); +} + +extern char em_rdmsr_start, em_rdmsr_end; +extern char em_wrmsr_start, em_wrmsr_end; + +static void guest_code_filter_allow(void) +{ + uint64_t data; + + /* + * Test userspace intercepting rdmsr / wrmsr for MSR_IA32_XSS. + * + * A GP is thrown if anything other than 0 is written to + * MSR_IA32_XSS. + */ + data = test_rdmsr(MSR_IA32_XSS); + GUEST_ASSERT(data == 0); + GUEST_ASSERT(guest_exception_count == 0); + + test_wrmsr(MSR_IA32_XSS, 0); + GUEST_ASSERT(guest_exception_count == 0); + + test_wrmsr(MSR_IA32_XSS, 1); + GUEST_ASSERT(guest_exception_count == 1); + + /* + * Test userspace intercepting rdmsr / wrmsr for MSR_IA32_FLUSH_CMD. + * + * A GP is thrown if MSR_IA32_FLUSH_CMD is read + * from or if a value other than 1 is written to it. + */ + test_rdmsr(MSR_IA32_FLUSH_CMD); + GUEST_ASSERT(guest_exception_count == 1); + + test_wrmsr(MSR_IA32_FLUSH_CMD, 0); + GUEST_ASSERT(guest_exception_count == 1); + + test_wrmsr(MSR_IA32_FLUSH_CMD, 1); + GUEST_ASSERT(guest_exception_count == 0); + + /* + * Test userspace intercepting rdmsr / wrmsr for MSR_NON_EXISTENT. + * + * Test that a fabricated MSR can pass through the kernel + * and be handled in userspace. + */ + test_wrmsr(MSR_NON_EXISTENT, 2); + GUEST_ASSERT(guest_exception_count == 0); + + data = test_rdmsr(MSR_NON_EXISTENT); + GUEST_ASSERT(data == 2); + GUEST_ASSERT(guest_exception_count == 0); + + if (is_forced_emulation_enabled) { + /* Let userspace know we aren't done. */ + GUEST_SYNC(0); + + /* + * Now run the same tests with the instruction emulator. + */ + data = test_em_rdmsr(MSR_IA32_XSS); + GUEST_ASSERT(data == 0); + GUEST_ASSERT(guest_exception_count == 0); + test_em_wrmsr(MSR_IA32_XSS, 0); + GUEST_ASSERT(guest_exception_count == 0); + test_em_wrmsr(MSR_IA32_XSS, 1); + GUEST_ASSERT(guest_exception_count == 1); + + test_em_rdmsr(MSR_IA32_FLUSH_CMD); + GUEST_ASSERT(guest_exception_count == 1); + test_em_wrmsr(MSR_IA32_FLUSH_CMD, 0); + GUEST_ASSERT(guest_exception_count == 1); + test_em_wrmsr(MSR_IA32_FLUSH_CMD, 1); + GUEST_ASSERT(guest_exception_count == 0); + + test_em_wrmsr(MSR_NON_EXISTENT, 2); + GUEST_ASSERT(guest_exception_count == 0); + data = test_em_rdmsr(MSR_NON_EXISTENT); + GUEST_ASSERT(data == 2); + GUEST_ASSERT(guest_exception_count == 0); + } + + GUEST_DONE(); +} + +static void guest_msr_calls(bool trapped) +{ + /* This goes into the in-kernel emulation */ + wrmsr(MSR_SYSCALL_MASK, 0); + + if (trapped) { + /* This goes into user space emulation */ + GUEST_ASSERT(rdmsr(MSR_SYSCALL_MASK) == MSR_SYSCALL_MASK); + GUEST_ASSERT(rdmsr(MSR_GS_BASE) == MSR_GS_BASE); + } else { + GUEST_ASSERT(rdmsr(MSR_SYSCALL_MASK) != MSR_SYSCALL_MASK); + GUEST_ASSERT(rdmsr(MSR_GS_BASE) != MSR_GS_BASE); + } + + /* If trapped == true, this goes into user space emulation */ + wrmsr(MSR_IA32_POWER_CTL, 0x1234); + + /* This goes into the in-kernel emulation */ + rdmsr(MSR_IA32_POWER_CTL); + + /* Invalid MSR, should always be handled by user space exit */ + GUEST_ASSERT(rdmsr(0xdeadbeef) == 0xdeadbeef); + wrmsr(0xdeadbeef, 0x1234); +} + +static void guest_code_filter_deny(void) +{ + guest_msr_calls(true); + + /* + * Disable msr filtering, so that the kernel + * handles everything in the next round + */ + GUEST_SYNC(0); + + guest_msr_calls(false); + + GUEST_DONE(); +} + +static void guest_code_permission_bitmap(void) +{ + uint64_t data; + + data = test_rdmsr(MSR_FS_BASE); + GUEST_ASSERT(data == MSR_FS_BASE); + data = test_rdmsr(MSR_GS_BASE); + GUEST_ASSERT(data != MSR_GS_BASE); + + /* Let userspace know to switch the filter */ + GUEST_SYNC(0); + + data = test_rdmsr(MSR_FS_BASE); + GUEST_ASSERT(data != MSR_FS_BASE); + data = test_rdmsr(MSR_GS_BASE); + GUEST_ASSERT(data == MSR_GS_BASE); + + GUEST_DONE(); +} + +static void __guest_gp_handler(struct ex_regs *regs, + char *r_start, char *r_end, + char *w_start, char *w_end) +{ + if (regs->rip == (uintptr_t)r_start) { + regs->rip = (uintptr_t)r_end; + regs->rax = 0; + regs->rdx = 0; + } else if (regs->rip == (uintptr_t)w_start) { + regs->rip = (uintptr_t)w_end; + } else { + GUEST_ASSERT(!"RIP is at an unknown location!"); + } + + ++guest_exception_count; +} + +static void guest_gp_handler(struct ex_regs *regs) +{ + __guest_gp_handler(regs, &rdmsr_start, &rdmsr_end, + &wrmsr_start, &wrmsr_end); +} + +static void guest_fep_gp_handler(struct ex_regs *regs) +{ + __guest_gp_handler(regs, &em_rdmsr_start, &em_rdmsr_end, + &em_wrmsr_start, &em_wrmsr_end); +} + +static void check_for_guest_assert(struct kvm_vcpu *vcpu) +{ + struct ucall uc; + + if (vcpu->run->exit_reason == KVM_EXIT_IO && + get_ucall(vcpu, &uc) == UCALL_ABORT) { + REPORT_GUEST_ASSERT(uc); + } +} + +static void process_rdmsr(struct kvm_vcpu *vcpu, uint32_t msr_index) +{ + struct kvm_run *run = vcpu->run; + + check_for_guest_assert(vcpu); + + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_X86_RDMSR); + TEST_ASSERT(run->msr.index == msr_index, + "Unexpected msr (0x%04x), expected 0x%04x", + run->msr.index, msr_index); + + switch (run->msr.index) { + case MSR_IA32_XSS: + run->msr.data = 0; + break; + case MSR_IA32_FLUSH_CMD: + run->msr.error = 1; + break; + case MSR_NON_EXISTENT: + run->msr.data = msr_non_existent_data; + break; + case MSR_FS_BASE: + run->msr.data = MSR_FS_BASE; + break; + case MSR_GS_BASE: + run->msr.data = MSR_GS_BASE; + break; + default: + TEST_ASSERT(false, "Unexpected MSR: 0x%04x", run->msr.index); + } +} + +static void process_wrmsr(struct kvm_vcpu *vcpu, uint32_t msr_index) +{ + struct kvm_run *run = vcpu->run; + + check_for_guest_assert(vcpu); + + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_X86_WRMSR); + TEST_ASSERT(run->msr.index == msr_index, + "Unexpected msr (0x%04x), expected 0x%04x", + run->msr.index, msr_index); + + switch (run->msr.index) { + case MSR_IA32_XSS: + if (run->msr.data != 0) + run->msr.error = 1; + break; + case MSR_IA32_FLUSH_CMD: + if (run->msr.data != 1) + run->msr.error = 1; + break; + case MSR_NON_EXISTENT: + msr_non_existent_data = run->msr.data; + break; + default: + TEST_ASSERT(false, "Unexpected MSR: 0x%04x", run->msr.index); + } +} + +static void process_ucall_done(struct kvm_vcpu *vcpu) +{ + struct ucall uc; + + check_for_guest_assert(vcpu); + + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + TEST_ASSERT(get_ucall(vcpu, &uc) == UCALL_DONE, + "Unexpected ucall command: %lu, expected UCALL_DONE (%d)", + uc.cmd, UCALL_DONE); +} + +static uint64_t process_ucall(struct kvm_vcpu *vcpu) +{ + struct ucall uc = {}; + + check_for_guest_assert(vcpu); + + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_SYNC: + break; + case UCALL_ABORT: + check_for_guest_assert(vcpu); + break; + case UCALL_DONE: + process_ucall_done(vcpu); + break; + default: + TEST_ASSERT(false, "Unexpected ucall"); + } + + return uc.cmd; +} + +static void run_guest_then_process_rdmsr(struct kvm_vcpu *vcpu, + uint32_t msr_index) +{ + vcpu_run(vcpu); + process_rdmsr(vcpu, msr_index); +} + +static void run_guest_then_process_wrmsr(struct kvm_vcpu *vcpu, + uint32_t msr_index) +{ + vcpu_run(vcpu); + process_wrmsr(vcpu, msr_index); +} + +static uint64_t run_guest_then_process_ucall(struct kvm_vcpu *vcpu) +{ + vcpu_run(vcpu); + return process_ucall(vcpu); +} + +static void run_guest_then_process_ucall_done(struct kvm_vcpu *vcpu) +{ + vcpu_run(vcpu); + process_ucall_done(vcpu); +} + +KVM_ONE_VCPU_TEST_SUITE(user_msr); + +KVM_ONE_VCPU_TEST(user_msr, msr_filter_allow, guest_code_filter_allow) +{ + struct kvm_vm *vm = vcpu->vm; + uint64_t cmd; + int rc; + + rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR); + TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available"); + vm_enable_cap(vm, KVM_CAP_X86_USER_SPACE_MSR, KVM_MSR_EXIT_REASON_FILTER); + + rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER); + TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available"); + + vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_allow); + + vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler); + + /* Process guest code userspace exits. */ + run_guest_then_process_rdmsr(vcpu, MSR_IA32_XSS); + run_guest_then_process_wrmsr(vcpu, MSR_IA32_XSS); + run_guest_then_process_wrmsr(vcpu, MSR_IA32_XSS); + + run_guest_then_process_rdmsr(vcpu, MSR_IA32_FLUSH_CMD); + run_guest_then_process_wrmsr(vcpu, MSR_IA32_FLUSH_CMD); + run_guest_then_process_wrmsr(vcpu, MSR_IA32_FLUSH_CMD); + + run_guest_then_process_wrmsr(vcpu, MSR_NON_EXISTENT); + run_guest_then_process_rdmsr(vcpu, MSR_NON_EXISTENT); + + vcpu_run(vcpu); + cmd = process_ucall(vcpu); + + if (is_forced_emulation_enabled) { + TEST_ASSERT_EQ(cmd, UCALL_SYNC); + vm_install_exception_handler(vm, GP_VECTOR, guest_fep_gp_handler); + + /* Process emulated rdmsr and wrmsr instructions. */ + run_guest_then_process_rdmsr(vcpu, MSR_IA32_XSS); + run_guest_then_process_wrmsr(vcpu, MSR_IA32_XSS); + run_guest_then_process_wrmsr(vcpu, MSR_IA32_XSS); + + run_guest_then_process_rdmsr(vcpu, MSR_IA32_FLUSH_CMD); + run_guest_then_process_wrmsr(vcpu, MSR_IA32_FLUSH_CMD); + run_guest_then_process_wrmsr(vcpu, MSR_IA32_FLUSH_CMD); + + run_guest_then_process_wrmsr(vcpu, MSR_NON_EXISTENT); + run_guest_then_process_rdmsr(vcpu, MSR_NON_EXISTENT); + + /* Confirm the guest completed without issues. */ + run_guest_then_process_ucall_done(vcpu); + } else { + TEST_ASSERT_EQ(cmd, UCALL_DONE); + printf("To run the instruction emulated tests set the module parameter 'kvm.force_emulation_prefix=1'\n"); + } +} + +static int handle_ucall(struct kvm_vcpu *vcpu) +{ + struct ucall uc; + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + case UCALL_SYNC: + vm_ioctl(vcpu->vm, KVM_X86_SET_MSR_FILTER, &no_filter_deny); + break; + case UCALL_DONE: + return 1; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + + return 0; +} + +static void handle_rdmsr(struct kvm_run *run) +{ + run->msr.data = run->msr.index; + msr_reads++; + + if (run->msr.index == MSR_SYSCALL_MASK || + run->msr.index == MSR_GS_BASE) { + TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_FILTER, + "MSR read trap w/o access fault"); + } + + if (run->msr.index == 0xdeadbeef) { + TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_UNKNOWN, + "MSR deadbeef read trap w/o inval fault"); + } +} + +static void handle_wrmsr(struct kvm_run *run) +{ + /* ignore */ + msr_writes++; + + if (run->msr.index == MSR_IA32_POWER_CTL) { + TEST_ASSERT(run->msr.data == 0x1234, + "MSR data for MSR_IA32_POWER_CTL incorrect"); + TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_FILTER, + "MSR_IA32_POWER_CTL trap w/o access fault"); + } + + if (run->msr.index == 0xdeadbeef) { + TEST_ASSERT(run->msr.data == 0x1234, + "MSR data for deadbeef incorrect"); + TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_UNKNOWN, + "deadbeef trap w/o inval fault"); + } +} + +KVM_ONE_VCPU_TEST(user_msr, msr_filter_deny, guest_code_filter_deny) +{ + struct kvm_vm *vm = vcpu->vm; + struct kvm_run *run = vcpu->run; + int rc; + + rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR); + TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available"); + vm_enable_cap(vm, KVM_CAP_X86_USER_SPACE_MSR, KVM_MSR_EXIT_REASON_INVAL | + KVM_MSR_EXIT_REASON_UNKNOWN | + KVM_MSR_EXIT_REASON_FILTER); + + rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER); + TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available"); + + prepare_bitmaps(); + vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_deny); + + while (1) { + vcpu_run(vcpu); + + switch (run->exit_reason) { + case KVM_EXIT_X86_RDMSR: + handle_rdmsr(run); + break; + case KVM_EXIT_X86_WRMSR: + handle_wrmsr(run); + break; + case KVM_EXIT_IO: + if (handle_ucall(vcpu)) + goto done; + break; + } + + } + +done: + TEST_ASSERT(msr_reads == 4, "Handled 4 rdmsr in user space"); + TEST_ASSERT(msr_writes == 3, "Handled 3 wrmsr in user space"); +} + +KVM_ONE_VCPU_TEST(user_msr, msr_permission_bitmap, guest_code_permission_bitmap) +{ + struct kvm_vm *vm = vcpu->vm; + int rc; + + rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR); + TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available"); + vm_enable_cap(vm, KVM_CAP_X86_USER_SPACE_MSR, KVM_MSR_EXIT_REASON_FILTER); + + rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER); + TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available"); + + vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_fs); + run_guest_then_process_rdmsr(vcpu, MSR_FS_BASE); + TEST_ASSERT(run_guest_then_process_ucall(vcpu) == UCALL_SYNC, + "Expected ucall state to be UCALL_SYNC."); + vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_gs); + run_guest_then_process_rdmsr(vcpu, MSR_GS_BASE); + run_guest_then_process_ucall_done(vcpu); +} + +#define test_user_exit_msr_ioctl(vm, cmd, arg, flag, valid_mask) \ +({ \ + int r = __vm_ioctl(vm, cmd, arg); \ + \ + if (flag & valid_mask) \ + TEST_ASSERT(!r, __KVM_IOCTL_ERROR(#cmd, r)); \ + else \ + TEST_ASSERT(r == -1 && errno == EINVAL, \ + "Wanted EINVAL for %s with flag = 0x%llx, got rc: %i errno: %i (%s)", \ + #cmd, flag, r, errno, strerror(errno)); \ +}) + +static void run_user_space_msr_flag_test(struct kvm_vm *vm) +{ + struct kvm_enable_cap cap = { .cap = KVM_CAP_X86_USER_SPACE_MSR }; + int nflags = sizeof(cap.args[0]) * BITS_PER_BYTE; + int rc; + int i; + + rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR); + TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available"); + + for (i = 0; i < nflags; i++) { + cap.args[0] = BIT_ULL(i); + test_user_exit_msr_ioctl(vm, KVM_ENABLE_CAP, &cap, + BIT_ULL(i), KVM_MSR_EXIT_REASON_VALID_MASK); + } +} + +static void run_msr_filter_flag_test(struct kvm_vm *vm) +{ + u64 deny_bits = 0; + struct kvm_msr_filter filter = { + .flags = KVM_MSR_FILTER_DEFAULT_ALLOW, + .ranges = { + { + .flags = KVM_MSR_FILTER_READ, + .nmsrs = 1, + .base = 0, + .bitmap = (uint8_t *)&deny_bits, + }, + }, + }; + int nflags; + int rc; + int i; + + rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER); + TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available"); + + nflags = sizeof(filter.flags) * BITS_PER_BYTE; + for (i = 0; i < nflags; i++) { + filter.flags = BIT_ULL(i); + test_user_exit_msr_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter, + BIT_ULL(i), KVM_MSR_FILTER_VALID_MASK); + } + + filter.flags = KVM_MSR_FILTER_DEFAULT_ALLOW; + nflags = sizeof(filter.ranges[0].flags) * BITS_PER_BYTE; + for (i = 0; i < nflags; i++) { + filter.ranges[0].flags = BIT_ULL(i); + test_user_exit_msr_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter, + BIT_ULL(i), KVM_MSR_FILTER_RANGE_VALID_MASK); + } +} + +/* Test that attempts to write to the unused bits in a flag fails. */ +KVM_ONE_VCPU_TEST(user_msr, user_exit_msr_flags, NULL) +{ + struct kvm_vm *vm = vcpu->vm; + + /* Test flags for KVM_CAP_X86_USER_SPACE_MSR. */ + run_user_space_msr_flag_test(vm); + + /* Test flags and range flags for KVM_X86_SET_MSR_FILTER. */ + run_msr_filter_flag_test(vm); +} + +int main(int argc, char *argv[]) +{ + return test_harness_run(argc, argv); +} diff --git a/tools/testing/selftests/kvm/x86/vmx_apic_access_test.c b/tools/testing/selftests/kvm/x86/vmx_apic_access_test.c new file mode 100644 index 000000000000..a81a24761aac --- /dev/null +++ b/tools/testing/selftests/kvm/x86/vmx_apic_access_test.c @@ -0,0 +1,124 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * vmx_apic_access_test + * + * Copyright (C) 2020, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + * + * The first subtest simply checks to see that an L2 guest can be + * launched with a valid APIC-access address that is backed by a + * page of L1 physical memory. + * + * The second subtest sets the APIC-access address to a (valid) L1 + * physical address that is not backed by memory. KVM can't handle + * this situation, so resuming L2 should result in a KVM exit for + * internal error (emulation). This is not an architectural + * requirement. It is just a shortcoming of KVM. The internal error + * is unfortunate, but it's better than what used to happen! + */ + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "vmx.h" + +#include +#include + +#include "kselftest.h" + +static void l2_guest_code(void) +{ + /* Exit to L1 */ + __asm__ __volatile__("vmcall"); +} + +static void l1_guest_code(struct vmx_pages *vmx_pages, unsigned long high_gpa) +{ +#define L2_GUEST_STACK_SIZE 64 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + uint32_t control; + + GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); + GUEST_ASSERT(load_vmcs(vmx_pages)); + + /* Prepare the VMCS for L2 execution. */ + prepare_vmcs(vmx_pages, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + control = vmreadz(CPU_BASED_VM_EXEC_CONTROL); + control |= CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; + vmwrite(CPU_BASED_VM_EXEC_CONTROL, control); + control = vmreadz(SECONDARY_VM_EXEC_CONTROL); + control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; + vmwrite(SECONDARY_VM_EXEC_CONTROL, control); + vmwrite(APIC_ACCESS_ADDR, vmx_pages->apic_access_gpa); + + /* Try to launch L2 with the memory-backed APIC-access address. */ + GUEST_SYNC(vmreadz(APIC_ACCESS_ADDR)); + GUEST_ASSERT(!vmlaunch()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + + vmwrite(APIC_ACCESS_ADDR, high_gpa); + + /* Try to resume L2 with the unbacked APIC-access address. */ + GUEST_SYNC(vmreadz(APIC_ACCESS_ADDR)); + GUEST_ASSERT(!vmresume()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + unsigned long apic_access_addr = ~0ul; + vm_vaddr_t vmx_pages_gva; + unsigned long high_gpa; + struct vmx_pages *vmx; + bool done = false; + + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); + + vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); + + high_gpa = (vm->max_gfn - 1) << vm->page_shift; + + vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva); + prepare_virtualize_apic_accesses(vmx, vm); + vcpu_args_set(vcpu, 2, vmx_pages_gva, high_gpa); + + while (!done) { + volatile struct kvm_run *run = vcpu->run; + struct ucall uc; + + vcpu_run(vcpu); + if (apic_access_addr == high_gpa) { + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR); + TEST_ASSERT(run->internal.suberror == + KVM_INTERNAL_ERROR_EMULATION, + "Got internal suberror other than KVM_INTERNAL_ERROR_EMULATION: %u", + run->internal.suberror); + break; + } + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + /* NOT REACHED */ + case UCALL_SYNC: + apic_access_addr = uc.args[1]; + break; + case UCALL_DONE: + done = true; + break; + default: + TEST_ASSERT(false, "Unknown ucall %lu", uc.cmd); + } + } + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/x86/vmx_close_while_nested_test.c b/tools/testing/selftests/kvm/x86/vmx_close_while_nested_test.c new file mode 100644 index 000000000000..dad988351493 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/vmx_close_while_nested_test.c @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * vmx_close_while_nested + * + * Copyright (C) 2019, Red Hat, Inc. + * + * Verify that nothing bad happens if a KVM user exits with open + * file descriptors while executing a nested guest. + */ + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "vmx.h" + +#include +#include + +#include "kselftest.h" + +enum { + PORT_L0_EXIT = 0x2000, +}; + +static void l2_guest_code(void) +{ + /* Exit to L0 */ + asm volatile("inb %%dx, %%al" + : : [port] "d" (PORT_L0_EXIT) : "rax"); +} + +static void l1_guest_code(struct vmx_pages *vmx_pages) +{ +#define L2_GUEST_STACK_SIZE 64 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + + GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); + GUEST_ASSERT(load_vmcs(vmx_pages)); + + /* Prepare the VMCS for L2 execution. */ + prepare_vmcs(vmx_pages, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + GUEST_ASSERT(!vmlaunch()); + GUEST_ASSERT(0); +} + +int main(int argc, char *argv[]) +{ + vm_vaddr_t vmx_pages_gva; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); + + vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); + + /* Allocate VMX pages and shared descriptors (vmx_pages). */ + vcpu_alloc_vmx(vm, &vmx_pages_gva); + vcpu_args_set(vcpu, 1, vmx_pages_gva); + + for (;;) { + volatile struct kvm_run *run = vcpu->run; + struct ucall uc; + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + if (run->io.port == PORT_L0_EXIT) + break; + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + /* NOT REACHED */ + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + } +} diff --git a/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c new file mode 100644 index 000000000000..fa512d033205 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c @@ -0,0 +1,179 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * KVM dirty page logging test + * + * Copyright (C) 2018, Red Hat, Inc. + */ +#include +#include +#include +#include + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "vmx.h" + +/* The memory slot index to track dirty pages */ +#define TEST_MEM_SLOT_INDEX 1 +#define TEST_MEM_PAGES 3 + +/* L1 guest test virtual memory offset */ +#define GUEST_TEST_MEM 0xc0000000 + +/* L2 guest test virtual memory offset */ +#define NESTED_TEST_MEM1 0xc0001000 +#define NESTED_TEST_MEM2 0xc0002000 + +static void l2_guest_code(u64 *a, u64 *b) +{ + READ_ONCE(*a); + WRITE_ONCE(*a, 1); + GUEST_SYNC(true); + GUEST_SYNC(false); + + WRITE_ONCE(*b, 1); + GUEST_SYNC(true); + WRITE_ONCE(*b, 1); + GUEST_SYNC(true); + GUEST_SYNC(false); + + /* Exit to L1 and never come back. */ + vmcall(); +} + +static void l2_guest_code_ept_enabled(void) +{ + l2_guest_code((u64 *)NESTED_TEST_MEM1, (u64 *)NESTED_TEST_MEM2); +} + +static void l2_guest_code_ept_disabled(void) +{ + /* Access the same L1 GPAs as l2_guest_code_ept_enabled() */ + l2_guest_code((u64 *)GUEST_TEST_MEM, (u64 *)GUEST_TEST_MEM); +} + +void l1_guest_code(struct vmx_pages *vmx) +{ +#define L2_GUEST_STACK_SIZE 64 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + void *l2_rip; + + GUEST_ASSERT(vmx->vmcs_gpa); + GUEST_ASSERT(prepare_for_vmx_operation(vmx)); + GUEST_ASSERT(load_vmcs(vmx)); + + if (vmx->eptp_gpa) + l2_rip = l2_guest_code_ept_enabled; + else + l2_rip = l2_guest_code_ept_disabled; + + prepare_vmcs(vmx, l2_rip, &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + GUEST_SYNC(false); + GUEST_ASSERT(!vmlaunch()); + GUEST_SYNC(false); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + GUEST_DONE(); +} + +static void test_vmx_dirty_log(bool enable_ept) +{ + vm_vaddr_t vmx_pages_gva = 0; + struct vmx_pages *vmx; + unsigned long *bmap; + uint64_t *host_test_mem; + + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + struct ucall uc; + bool done = false; + + pr_info("Nested EPT: %s\n", enable_ept ? "enabled" : "disabled"); + + /* Create VM */ + vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); + vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva); + vcpu_args_set(vcpu, 1, vmx_pages_gva); + + /* Add an extra memory slot for testing dirty logging */ + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, + GUEST_TEST_MEM, + TEST_MEM_SLOT_INDEX, + TEST_MEM_PAGES, + KVM_MEM_LOG_DIRTY_PAGES); + + /* + * Add an identity map for GVA range [0xc0000000, 0xc0002000). This + * affects both L1 and L2. However... + */ + virt_map(vm, GUEST_TEST_MEM, GUEST_TEST_MEM, TEST_MEM_PAGES); + + /* + * ... pages in the L2 GPA range [0xc0001000, 0xc0003000) will map to + * 0xc0000000. + * + * Note that prepare_eptp should be called only L1's GPA map is done, + * meaning after the last call to virt_map. + * + * When EPT is disabled, the L2 guest code will still access the same L1 + * GPAs as the EPT enabled case. + */ + if (enable_ept) { + prepare_eptp(vmx, vm, 0); + nested_map_memslot(vmx, vm, 0); + nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, 4096); + nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, 4096); + } + + bmap = bitmap_zalloc(TEST_MEM_PAGES); + host_test_mem = addr_gpa2hva(vm, GUEST_TEST_MEM); + + while (!done) { + memset(host_test_mem, 0xaa, TEST_MEM_PAGES * 4096); + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + /* NOT REACHED */ + case UCALL_SYNC: + /* + * The nested guest wrote at offset 0x1000 in the memslot, but the + * dirty bitmap must be filled in according to L1 GPA, not L2. + */ + kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap); + if (uc.args[1]) { + TEST_ASSERT(test_bit(0, bmap), "Page 0 incorrectly reported clean"); + TEST_ASSERT(host_test_mem[0] == 1, "Page 0 not written by guest"); + } else { + TEST_ASSERT(!test_bit(0, bmap), "Page 0 incorrectly reported dirty"); + TEST_ASSERT(host_test_mem[0] == 0xaaaaaaaaaaaaaaaaULL, "Page 0 written by guest"); + } + + TEST_ASSERT(!test_bit(1, bmap), "Page 1 incorrectly reported dirty"); + TEST_ASSERT(host_test_mem[4096 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 1 written by guest"); + TEST_ASSERT(!test_bit(2, bmap), "Page 2 incorrectly reported dirty"); + TEST_ASSERT(host_test_mem[8192 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 2 written by guest"); + break; + case UCALL_DONE: + done = true; + break; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + } +} + +int main(int argc, char *argv[]) +{ + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); + + test_vmx_dirty_log(/*enable_ept=*/false); + + if (kvm_cpu_has_ept()) + test_vmx_dirty_log(/*enable_ept=*/true); + + return 0; +} diff --git a/tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c b/tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c new file mode 100644 index 000000000000..3fd6eceab46f --- /dev/null +++ b/tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c @@ -0,0 +1,142 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" + +#include +#include +#include +#include + +#include "kselftest.h" + +static void guest_ud_handler(struct ex_regs *regs) +{ + /* Loop on the ud2 until guest state is made invalid. */ +} + +static void guest_code(void) +{ + asm volatile("ud2"); +} + +static void __run_vcpu_with_invalid_state(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + + vcpu_run(vcpu); + + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR); + TEST_ASSERT(run->emulation_failure.suberror == KVM_INTERNAL_ERROR_EMULATION, + "Expected emulation failure, got %d", + run->emulation_failure.suberror); +} + +static void run_vcpu_with_invalid_state(struct kvm_vcpu *vcpu) +{ + /* + * Always run twice to verify KVM handles the case where _KVM_ queues + * an exception with invalid state and then exits to userspace, i.e. + * that KVM doesn't explode if userspace ignores the initial error. + */ + __run_vcpu_with_invalid_state(vcpu); + __run_vcpu_with_invalid_state(vcpu); +} + +static void set_timer(void) +{ + struct itimerval timer; + + timer.it_value.tv_sec = 0; + timer.it_value.tv_usec = 200; + timer.it_interval = timer.it_value; + TEST_ASSERT_EQ(setitimer(ITIMER_REAL, &timer, NULL), 0); +} + +static void set_or_clear_invalid_guest_state(struct kvm_vcpu *vcpu, bool set) +{ + static struct kvm_sregs sregs; + + if (!sregs.cr0) + vcpu_sregs_get(vcpu, &sregs); + sregs.tr.unusable = !!set; + vcpu_sregs_set(vcpu, &sregs); +} + +static void set_invalid_guest_state(struct kvm_vcpu *vcpu) +{ + set_or_clear_invalid_guest_state(vcpu, true); +} + +static void clear_invalid_guest_state(struct kvm_vcpu *vcpu) +{ + set_or_clear_invalid_guest_state(vcpu, false); +} + +static struct kvm_vcpu *get_set_sigalrm_vcpu(struct kvm_vcpu *__vcpu) +{ + static struct kvm_vcpu *vcpu = NULL; + + if (__vcpu) + vcpu = __vcpu; + return vcpu; +} + +static void sigalrm_handler(int sig) +{ + struct kvm_vcpu *vcpu = get_set_sigalrm_vcpu(NULL); + struct kvm_vcpu_events events; + + TEST_ASSERT(sig == SIGALRM, "Unexpected signal = %d", sig); + + vcpu_events_get(vcpu, &events); + + /* + * If an exception is pending, attempt KVM_RUN with invalid guest, + * otherwise rearm the timer and keep doing so until the timer fires + * between KVM queueing an exception and re-entering the guest. + */ + if (events.exception.pending) { + set_invalid_guest_state(vcpu); + run_vcpu_with_invalid_state(vcpu); + } else { + set_timer(); + } +} + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + TEST_REQUIRE(host_cpu_is_intel); + TEST_REQUIRE(!vm_is_unrestricted_guest(NULL)); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + get_set_sigalrm_vcpu(vcpu); + + vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler); + + /* + * Stuff invalid guest state for L2 by making TR unusuable. The next + * KVM_RUN should induce a TRIPLE_FAULT in L2 as KVM doesn't support + * emulating invalid guest state for L2. + */ + set_invalid_guest_state(vcpu); + run_vcpu_with_invalid_state(vcpu); + + /* + * Verify KVM also handles the case where userspace gains control while + * an exception is pending and stuffs invalid state. Run with valid + * guest state and a timer firing every 200us, and attempt to enter the + * guest with invalid state when the handler interrupts KVM with an + * exception pending. + */ + clear_invalid_guest_state(vcpu); + TEST_ASSERT(signal(SIGALRM, sigalrm_handler) != SIG_ERR, + "Failed to register SIGALRM handler, errno = %d (%s)", + errno, strerror(errno)); + + set_timer(); + run_vcpu_with_invalid_state(vcpu); +} diff --git a/tools/testing/selftests/kvm/x86/vmx_invalid_nested_guest_state.c b/tools/testing/selftests/kvm/x86/vmx_invalid_nested_guest_state.c new file mode 100644 index 000000000000..a100ee5f0009 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/vmx_invalid_nested_guest_state.c @@ -0,0 +1,103 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "vmx.h" + +#include +#include + +#include "kselftest.h" + +#define ARBITRARY_IO_PORT 0x2000 + +static struct kvm_vm *vm; + +static void l2_guest_code(void) +{ + /* + * Generate an exit to L0 userspace, i.e. main(), via I/O to an + * arbitrary port. + */ + asm volatile("inb %%dx, %%al" + : : [port] "d" (ARBITRARY_IO_PORT) : "rax"); +} + +static void l1_guest_code(struct vmx_pages *vmx_pages) +{ +#define L2_GUEST_STACK_SIZE 64 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + + GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); + GUEST_ASSERT(load_vmcs(vmx_pages)); + + /* Prepare the VMCS for L2 execution. */ + prepare_vmcs(vmx_pages, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + /* + * L2 must be run without unrestricted guest, verify that the selftests + * library hasn't enabled it. Because KVM selftests jump directly to + * 64-bit mode, unrestricted guest support isn't required. + */ + GUEST_ASSERT(!(vmreadz(CPU_BASED_VM_EXEC_CONTROL) & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) || + !(vmreadz(SECONDARY_VM_EXEC_CONTROL) & SECONDARY_EXEC_UNRESTRICTED_GUEST)); + + GUEST_ASSERT(!vmlaunch()); + + /* L2 should triple fault after main() stuffs invalid guest state. */ + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_TRIPLE_FAULT); + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + vm_vaddr_t vmx_pages_gva; + struct kvm_sregs sregs; + struct kvm_vcpu *vcpu; + struct kvm_run *run; + struct ucall uc; + + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); + + vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); + + /* Allocate VMX pages and shared descriptors (vmx_pages). */ + vcpu_alloc_vmx(vm, &vmx_pages_gva); + vcpu_args_set(vcpu, 1, vmx_pages_gva); + + vcpu_run(vcpu); + + run = vcpu->run; + + /* + * The first exit to L0 userspace should be an I/O access from L2. + * Running L1 should launch L2 without triggering an exit to userspace. + */ + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + TEST_ASSERT(run->io.port == ARBITRARY_IO_PORT, + "Expected IN from port %d from L2, got port %d", + ARBITRARY_IO_PORT, run->io.port); + + /* + * Stuff invalid guest state for L2 by making TR unusuable. The next + * KVM_RUN should induce a TRIPLE_FAULT in L2 as KVM doesn't support + * emulating invalid guest state for L2. + */ + memset(&sregs, 0, sizeof(sregs)); + vcpu_sregs_get(vcpu, &sregs); + sregs.tr.unusable = 1; + vcpu_sregs_set(vcpu, &sregs); + + vcpu_run(vcpu); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_DONE: + break; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + default: + TEST_FAIL("Unexpected ucall: %lu", uc.cmd); + } +} diff --git a/tools/testing/selftests/kvm/x86/vmx_msrs_test.c b/tools/testing/selftests/kvm/x86/vmx_msrs_test.c new file mode 100644 index 000000000000..90720b6205f4 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/vmx_msrs_test.c @@ -0,0 +1,131 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * VMX control MSR test + * + * Copyright (C) 2022 Google LLC. + * + * Tests for KVM ownership of bits in the VMX entry/exit control MSRs. Checks + * that KVM will set owned bits where appropriate, and will not if + * KVM_X86_QUIRK_TWEAK_VMX_CTRL_MSRS is disabled. + */ +#include +#include "kvm_util.h" +#include "vmx.h" + +static void vmx_fixed1_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index, + uint64_t mask) +{ + uint64_t val = vcpu_get_msr(vcpu, msr_index); + uint64_t bit; + + mask &= val; + + for_each_set_bit(bit, &mask, 64) { + vcpu_set_msr(vcpu, msr_index, val & ~BIT_ULL(bit)); + vcpu_set_msr(vcpu, msr_index, val); + } +} + +static void vmx_fixed0_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index, + uint64_t mask) +{ + uint64_t val = vcpu_get_msr(vcpu, msr_index); + uint64_t bit; + + mask = ~mask | val; + + for_each_clear_bit(bit, &mask, 64) { + vcpu_set_msr(vcpu, msr_index, val | BIT_ULL(bit)); + vcpu_set_msr(vcpu, msr_index, val); + } +} + +static void vmx_fixed0and1_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index) +{ + vmx_fixed0_msr_test(vcpu, msr_index, GENMASK_ULL(31, 0)); + vmx_fixed1_msr_test(vcpu, msr_index, GENMASK_ULL(63, 32)); +} + +static void vmx_save_restore_msrs_test(struct kvm_vcpu *vcpu) +{ + vcpu_set_msr(vcpu, MSR_IA32_VMX_VMCS_ENUM, 0); + vcpu_set_msr(vcpu, MSR_IA32_VMX_VMCS_ENUM, -1ull); + + vmx_fixed1_msr_test(vcpu, MSR_IA32_VMX_BASIC, + BIT_ULL(49) | BIT_ULL(54) | BIT_ULL(55)); + + vmx_fixed1_msr_test(vcpu, MSR_IA32_VMX_MISC, + BIT_ULL(5) | GENMASK_ULL(8, 6) | BIT_ULL(14) | + BIT_ULL(15) | BIT_ULL(28) | BIT_ULL(29) | BIT_ULL(30)); + + vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_PROCBASED_CTLS2); + vmx_fixed1_msr_test(vcpu, MSR_IA32_VMX_EPT_VPID_CAP, -1ull); + vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_TRUE_PINBASED_CTLS); + vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_TRUE_PROCBASED_CTLS); + vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_TRUE_EXIT_CTLS); + vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_TRUE_ENTRY_CTLS); + vmx_fixed1_msr_test(vcpu, MSR_IA32_VMX_VMFUNC, -1ull); +} + +static void __ia32_feature_control_msr_test(struct kvm_vcpu *vcpu, + uint64_t msr_bit, + struct kvm_x86_cpu_feature feature) +{ + uint64_t val; + + vcpu_clear_cpuid_feature(vcpu, feature); + + val = vcpu_get_msr(vcpu, MSR_IA32_FEAT_CTL); + vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, val | msr_bit | FEAT_CTL_LOCKED); + vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, (val & ~msr_bit) | FEAT_CTL_LOCKED); + vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, val | msr_bit | FEAT_CTL_LOCKED); + vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, (val & ~msr_bit) | FEAT_CTL_LOCKED); + vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, val); + + if (!kvm_cpu_has(feature)) + return; + + vcpu_set_cpuid_feature(vcpu, feature); +} + +static void ia32_feature_control_msr_test(struct kvm_vcpu *vcpu) +{ + uint64_t supported_bits = FEAT_CTL_LOCKED | + FEAT_CTL_VMX_ENABLED_INSIDE_SMX | + FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX | + FEAT_CTL_SGX_LC_ENABLED | + FEAT_CTL_SGX_ENABLED | + FEAT_CTL_LMCE_ENABLED; + int bit, r; + + __ia32_feature_control_msr_test(vcpu, FEAT_CTL_VMX_ENABLED_INSIDE_SMX, X86_FEATURE_SMX); + __ia32_feature_control_msr_test(vcpu, FEAT_CTL_VMX_ENABLED_INSIDE_SMX, X86_FEATURE_VMX); + __ia32_feature_control_msr_test(vcpu, FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX, X86_FEATURE_VMX); + __ia32_feature_control_msr_test(vcpu, FEAT_CTL_SGX_LC_ENABLED, X86_FEATURE_SGX_LC); + __ia32_feature_control_msr_test(vcpu, FEAT_CTL_SGX_LC_ENABLED, X86_FEATURE_SGX); + __ia32_feature_control_msr_test(vcpu, FEAT_CTL_SGX_ENABLED, X86_FEATURE_SGX); + __ia32_feature_control_msr_test(vcpu, FEAT_CTL_LMCE_ENABLED, X86_FEATURE_MCE); + + for_each_clear_bit(bit, &supported_bits, 64) { + r = _vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, BIT(bit)); + TEST_ASSERT(r == 0, + "Setting reserved bit %d in IA32_FEATURE_CONTROL should fail", bit); + } +} + +int main(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2)); + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); + + /* No need to actually do KVM_RUN, thus no guest code. */ + vm = vm_create_with_one_vcpu(&vcpu, NULL); + + vmx_save_restore_msrs_test(vcpu); + ia32_feature_control_msr_test(vcpu); + + kvm_vm_free(vm); +} diff --git a/tools/testing/selftests/kvm/x86/vmx_nested_tsc_scaling_test.c b/tools/testing/selftests/kvm/x86/vmx_nested_tsc_scaling_test.c new file mode 100644 index 000000000000..1759fa5cb3f2 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/vmx_nested_tsc_scaling_test.c @@ -0,0 +1,206 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * vmx_nested_tsc_scaling_test + * + * Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * This test case verifies that nested TSC scaling behaves as expected when + * both L1 and L2 are scaled using different ratios. For this test we scale + * L1 down and scale L2 up. + */ + +#include + +#include "kvm_util.h" +#include "vmx.h" +#include "kselftest.h" + +/* L2 is scaled up (from L1's perspective) by this factor */ +#define L2_SCALE_FACTOR 4ULL + +#define TSC_OFFSET_L2 ((uint64_t) -33125236320908) +#define TSC_MULTIPLIER_L2 (L2_SCALE_FACTOR << 48) + +#define L2_GUEST_STACK_SIZE 64 + +enum { USLEEP, UCHECK_L1, UCHECK_L2 }; +#define GUEST_SLEEP(sec) ucall(UCALL_SYNC, 2, USLEEP, sec) +#define GUEST_CHECK(level, freq) ucall(UCALL_SYNC, 2, level, freq) + + +/* + * This function checks whether the "actual" TSC frequency of a guest matches + * its expected frequency. In order to account for delays in taking the TSC + * measurements, a difference of 1% between the actual and the expected value + * is tolerated. + */ +static void compare_tsc_freq(uint64_t actual, uint64_t expected) +{ + uint64_t tolerance, thresh_low, thresh_high; + + tolerance = expected / 100; + thresh_low = expected - tolerance; + thresh_high = expected + tolerance; + + TEST_ASSERT(thresh_low < actual, + "TSC freq is expected to be between %"PRIu64" and %"PRIu64 + " but it actually is %"PRIu64, + thresh_low, thresh_high, actual); + TEST_ASSERT(thresh_high > actual, + "TSC freq is expected to be between %"PRIu64" and %"PRIu64 + " but it actually is %"PRIu64, + thresh_low, thresh_high, actual); +} + +static void check_tsc_freq(int level) +{ + uint64_t tsc_start, tsc_end, tsc_freq; + + /* + * Reading the TSC twice with about a second's difference should give + * us an approximation of the TSC frequency from the guest's + * perspective. Now, this won't be completely accurate, but it should + * be good enough for the purposes of this test. + */ + tsc_start = rdmsr(MSR_IA32_TSC); + GUEST_SLEEP(1); + tsc_end = rdmsr(MSR_IA32_TSC); + + tsc_freq = tsc_end - tsc_start; + + GUEST_CHECK(level, tsc_freq); +} + +static void l2_guest_code(void) +{ + check_tsc_freq(UCHECK_L2); + + /* exit to L1 */ + __asm__ __volatile__("vmcall"); +} + +static void l1_guest_code(struct vmx_pages *vmx_pages) +{ + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + uint32_t control; + + /* check that L1's frequency looks alright before launching L2 */ + check_tsc_freq(UCHECK_L1); + + GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); + GUEST_ASSERT(load_vmcs(vmx_pages)); + + /* prepare the VMCS for L2 execution */ + prepare_vmcs(vmx_pages, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + /* enable TSC offsetting and TSC scaling for L2 */ + control = vmreadz(CPU_BASED_VM_EXEC_CONTROL); + control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETTING; + vmwrite(CPU_BASED_VM_EXEC_CONTROL, control); + + control = vmreadz(SECONDARY_VM_EXEC_CONTROL); + control |= SECONDARY_EXEC_TSC_SCALING; + vmwrite(SECONDARY_VM_EXEC_CONTROL, control); + + vmwrite(TSC_OFFSET, TSC_OFFSET_L2); + vmwrite(TSC_MULTIPLIER, TSC_MULTIPLIER_L2); + vmwrite(TSC_MULTIPLIER_HIGH, TSC_MULTIPLIER_L2 >> 32); + + /* launch L2 */ + GUEST_ASSERT(!vmlaunch()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + + /* check that L1's frequency still looks good */ + check_tsc_freq(UCHECK_L1); + + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + vm_vaddr_t vmx_pages_gva; + + uint64_t tsc_start, tsc_end; + uint64_t tsc_khz; + uint64_t l1_scale_factor; + uint64_t l0_tsc_freq = 0; + uint64_t l1_tsc_freq = 0; + uint64_t l2_tsc_freq = 0; + + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); + TEST_REQUIRE(kvm_has_cap(KVM_CAP_TSC_CONTROL)); + TEST_REQUIRE(sys_clocksource_is_based_on_tsc()); + + /* + * We set L1's scale factor to be a random number from 2 to 10. + * Ideally we would do the same for L2's factor but that one is + * referenced by both main() and l1_guest_code() and using a global + * variable does not work. + */ + srand(time(NULL)); + l1_scale_factor = (rand() % 9) + 2; + printf("L1's scale down factor is: %"PRIu64"\n", l1_scale_factor); + printf("L2's scale up factor is: %llu\n", L2_SCALE_FACTOR); + + tsc_start = rdtsc(); + sleep(1); + tsc_end = rdtsc(); + + l0_tsc_freq = tsc_end - tsc_start; + printf("real TSC frequency is around: %"PRIu64"\n", l0_tsc_freq); + + vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); + vcpu_alloc_vmx(vm, &vmx_pages_gva); + vcpu_args_set(vcpu, 1, vmx_pages_gva); + + tsc_khz = __vcpu_ioctl(vcpu, KVM_GET_TSC_KHZ, NULL); + TEST_ASSERT(tsc_khz != -1, "vcpu ioctl KVM_GET_TSC_KHZ failed"); + + /* scale down L1's TSC frequency */ + vcpu_ioctl(vcpu, KVM_SET_TSC_KHZ, (void *) (tsc_khz / l1_scale_factor)); + + for (;;) { + struct ucall uc; + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + case UCALL_SYNC: + switch (uc.args[0]) { + case USLEEP: + sleep(uc.args[1]); + break; + case UCHECK_L1: + l1_tsc_freq = uc.args[1]; + printf("L1's TSC frequency is around: %"PRIu64 + "\n", l1_tsc_freq); + + compare_tsc_freq(l1_tsc_freq, + l0_tsc_freq / l1_scale_factor); + break; + case UCHECK_L2: + l2_tsc_freq = uc.args[1]; + printf("L2's TSC frequency is around: %"PRIu64 + "\n", l2_tsc_freq); + + compare_tsc_freq(l2_tsc_freq, + l1_tsc_freq * L2_SCALE_FACTOR); + break; + } + break; + case UCALL_DONE: + goto done; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + } + +done: + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c b/tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c new file mode 100644 index 000000000000..a1f5ff45d518 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c @@ -0,0 +1,247 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test for VMX-pmu perf capability msr + * + * Copyright (C) 2021 Intel Corporation + * + * Test to check the effect of various CPUID settings on + * MSR_IA32_PERF_CAPABILITIES MSR, and check that what + * we write with KVM_SET_MSR is _not_ modified by the guest + * and check it can be retrieved with KVM_GET_MSR, also test + * the invalid LBR formats are rejected. + */ +#include + +#include + +#include "kvm_test_harness.h" +#include "kvm_util.h" +#include "vmx.h" + +static union perf_capabilities { + struct { + u64 lbr_format:6; + u64 pebs_trap:1; + u64 pebs_arch_reg:1; + u64 pebs_format:4; + u64 smm_freeze:1; + u64 full_width_write:1; + u64 pebs_baseline:1; + u64 perf_metrics:1; + u64 pebs_output_pt_available:1; + u64 anythread_deprecated:1; + }; + u64 capabilities; +} host_cap; + +/* + * The LBR format and most PEBS features are immutable, all other features are + * fungible (if supported by the host and KVM). + */ +static const union perf_capabilities immutable_caps = { + .lbr_format = -1, + .pebs_trap = 1, + .pebs_arch_reg = 1, + .pebs_format = -1, + .pebs_baseline = 1, +}; + +static const union perf_capabilities format_caps = { + .lbr_format = -1, + .pebs_format = -1, +}; + +static void guest_test_perf_capabilities_gp(uint64_t val) +{ + uint8_t vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES, val); + + __GUEST_ASSERT(vector == GP_VECTOR, + "Expected #GP for value '0x%lx', got vector '0x%x'", + val, vector); +} + +static void guest_code(uint64_t current_val) +{ + int i; + + guest_test_perf_capabilities_gp(current_val); + guest_test_perf_capabilities_gp(0); + + for (i = 0; i < 64; i++) + guest_test_perf_capabilities_gp(current_val ^ BIT_ULL(i)); + + GUEST_DONE(); +} + +KVM_ONE_VCPU_TEST_SUITE(vmx_pmu_caps); + +/* + * Verify that guest WRMSRs to PERF_CAPABILITIES #GP regardless of the value + * written, that the guest always sees the userspace controlled value, and that + * PERF_CAPABILITIES is immutable after KVM_RUN. + */ +KVM_ONE_VCPU_TEST(vmx_pmu_caps, guest_wrmsr_perf_capabilities, guest_code) +{ + struct ucall uc; + int r, i; + + vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities); + + vcpu_args_set(vcpu, 1, host_cap.capabilities); + vcpu_run(vcpu); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + case UCALL_DONE: + break; + default: + TEST_FAIL("Unexpected ucall: %lu", uc.cmd); + } + + TEST_ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES), + host_cap.capabilities); + + vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities); + + r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, 0); + TEST_ASSERT(!r, "Post-KVM_RUN write '0' didn't fail"); + + for (i = 0; i < 64; i++) { + r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, + host_cap.capabilities ^ BIT_ULL(i)); + TEST_ASSERT(!r, "Post-KVM_RUN write '0x%llx'didn't fail", + host_cap.capabilities ^ BIT_ULL(i)); + } +} + +/* + * Verify KVM allows writing PERF_CAPABILITIES with all KVM-supported features + * enabled, as well as '0' (to disable all features). + */ +KVM_ONE_VCPU_TEST(vmx_pmu_caps, basic_perf_capabilities, guest_code) +{ + vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, 0); + vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities); +} + +KVM_ONE_VCPU_TEST(vmx_pmu_caps, fungible_perf_capabilities, guest_code) +{ + const uint64_t fungible_caps = host_cap.capabilities & ~immutable_caps.capabilities; + int bit; + + for_each_set_bit(bit, &fungible_caps, 64) { + vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, BIT_ULL(bit)); + vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, + host_cap.capabilities & ~BIT_ULL(bit)); + } + vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities); +} + +/* + * Verify KVM rejects attempts to set unsupported and/or immutable features in + * PERF_CAPABILITIES. Note, LBR format and PEBS format need to be validated + * separately as they are multi-bit values, e.g. toggling or setting a single + * bit can generate a false positive without dedicated safeguards. + */ +KVM_ONE_VCPU_TEST(vmx_pmu_caps, immutable_perf_capabilities, guest_code) +{ + const uint64_t reserved_caps = (~host_cap.capabilities | + immutable_caps.capabilities) & + ~format_caps.capabilities; + union perf_capabilities val = host_cap; + int r, bit; + + for_each_set_bit(bit, &reserved_caps, 64) { + r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, + host_cap.capabilities ^ BIT_ULL(bit)); + TEST_ASSERT(!r, "%s immutable feature 0x%llx (bit %d) didn't fail", + host_cap.capabilities & BIT_ULL(bit) ? "Setting" : "Clearing", + BIT_ULL(bit), bit); + } + + /* + * KVM only supports the host's native LBR format, as well as '0' (to + * disable LBR support). Verify KVM rejects all other LBR formats. + */ + for (val.lbr_format = 1; val.lbr_format; val.lbr_format++) { + if (val.lbr_format == host_cap.lbr_format) + continue; + + r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, val.capabilities); + TEST_ASSERT(!r, "Bad LBR FMT = 0x%x didn't fail, host = 0x%x", + val.lbr_format, host_cap.lbr_format); + } + + /* Ditto for the PEBS format. */ + for (val.pebs_format = 1; val.pebs_format; val.pebs_format++) { + if (val.pebs_format == host_cap.pebs_format) + continue; + + r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, val.capabilities); + TEST_ASSERT(!r, "Bad PEBS FMT = 0x%x didn't fail, host = 0x%x", + val.pebs_format, host_cap.pebs_format); + } +} + +/* + * Test that LBR MSRs are writable when LBRs are enabled, and then verify that + * disabling the vPMU via CPUID also disables LBR support. Set bits 2:0 of + * LBR_TOS as those bits are writable across all uarch implementations (arch + * LBRs will need to poke a different MSR). + */ +KVM_ONE_VCPU_TEST(vmx_pmu_caps, lbr_perf_capabilities, guest_code) +{ + int r; + + if (!host_cap.lbr_format) + return; + + vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities); + vcpu_set_msr(vcpu, MSR_LBR_TOS, 7); + + vcpu_clear_cpuid_entry(vcpu, X86_PROPERTY_PMU_VERSION.function); + + r = _vcpu_set_msr(vcpu, MSR_LBR_TOS, 7); + TEST_ASSERT(!r, "Writing LBR_TOS should fail after disabling vPMU"); +} + +KVM_ONE_VCPU_TEST(vmx_pmu_caps, perf_capabilities_unsupported, guest_code) +{ + uint64_t val; + int i, r; + + vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities); + val = vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES); + TEST_ASSERT_EQ(val, host_cap.capabilities); + + vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_PDCM); + + val = vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES); + TEST_ASSERT_EQ(val, 0); + + vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, 0); + + for (i = 0; i < 64; i++) { + r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, BIT_ULL(i)); + TEST_ASSERT(!r, "Setting PERF_CAPABILITIES bit %d (= 0x%llx) should fail without PDCM", + i, BIT_ULL(i)); + } +} + +int main(int argc, char *argv[]) +{ + TEST_REQUIRE(kvm_is_pmu_enabled()); + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_PDCM)); + + TEST_REQUIRE(kvm_cpu_has_p(X86_PROPERTY_PMU_VERSION)); + TEST_REQUIRE(kvm_cpu_property(X86_PROPERTY_PMU_VERSION) > 0); + + host_cap.capabilities = kvm_get_feature_msr(MSR_IA32_PERF_CAPABILITIES); + + TEST_ASSERT(host_cap.full_width_write, + "Full-width writes should always be supported"); + + return test_harness_run(argc, argv); +} diff --git a/tools/testing/selftests/kvm/x86/vmx_preemption_timer_test.c b/tools/testing/selftests/kvm/x86/vmx_preemption_timer_test.c new file mode 100644 index 000000000000..00dd2ac07a61 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/vmx_preemption_timer_test.c @@ -0,0 +1,245 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * VMX-preemption timer test + * + * Copyright (C) 2020, Google, LLC. + * + * Test to ensure the VM-Enter after migration doesn't + * incorrectly restarts the timer with the full timer + * value instead of partially decayed timer value + * + */ +#include +#include +#include +#include +#include + +#include "test_util.h" + +#include "kvm_util.h" +#include "processor.h" +#include "vmx.h" + +#define PREEMPTION_TIMER_VALUE 100000000ull +#define PREEMPTION_TIMER_VALUE_THRESHOLD1 80000000ull + +u32 vmx_pt_rate; +bool l2_save_restore_done; +static u64 l2_vmx_pt_start; +volatile u64 l2_vmx_pt_finish; + +union vmx_basic basic; +union vmx_ctrl_msr ctrl_pin_rev; +union vmx_ctrl_msr ctrl_exit_rev; + +void l2_guest_code(void) +{ + u64 vmx_pt_delta; + + vmcall(); + l2_vmx_pt_start = (rdtsc() >> vmx_pt_rate) << vmx_pt_rate; + + /* + * Wait until the 1st threshold has passed + */ + do { + l2_vmx_pt_finish = rdtsc(); + vmx_pt_delta = (l2_vmx_pt_finish - l2_vmx_pt_start) >> + vmx_pt_rate; + } while (vmx_pt_delta < PREEMPTION_TIMER_VALUE_THRESHOLD1); + + /* + * Force L2 through Save and Restore cycle + */ + GUEST_SYNC(1); + + l2_save_restore_done = 1; + + /* + * Now wait for the preemption timer to fire and + * exit to L1 + */ + while ((l2_vmx_pt_finish = rdtsc())) + ; +} + +void l1_guest_code(struct vmx_pages *vmx_pages) +{ +#define L2_GUEST_STACK_SIZE 64 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + u64 l1_vmx_pt_start; + u64 l1_vmx_pt_finish; + u64 l1_tsc_deadline, l2_tsc_deadline; + + GUEST_ASSERT(vmx_pages->vmcs_gpa); + GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); + GUEST_ASSERT(load_vmcs(vmx_pages)); + GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa); + + prepare_vmcs(vmx_pages, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + /* + * Check for Preemption timer support + */ + basic.val = rdmsr(MSR_IA32_VMX_BASIC); + ctrl_pin_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PINBASED_CTLS + : MSR_IA32_VMX_PINBASED_CTLS); + ctrl_exit_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_EXIT_CTLS + : MSR_IA32_VMX_EXIT_CTLS); + + if (!(ctrl_pin_rev.clr & PIN_BASED_VMX_PREEMPTION_TIMER) || + !(ctrl_exit_rev.clr & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) + return; + + GUEST_ASSERT(!vmlaunch()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + vmwrite(GUEST_RIP, vmreadz(GUEST_RIP) + vmreadz(VM_EXIT_INSTRUCTION_LEN)); + + /* + * Turn on PIN control and resume the guest + */ + GUEST_ASSERT(!vmwrite(PIN_BASED_VM_EXEC_CONTROL, + vmreadz(PIN_BASED_VM_EXEC_CONTROL) | + PIN_BASED_VMX_PREEMPTION_TIMER)); + + GUEST_ASSERT(!vmwrite(VMX_PREEMPTION_TIMER_VALUE, + PREEMPTION_TIMER_VALUE)); + + vmx_pt_rate = rdmsr(MSR_IA32_VMX_MISC) & 0x1F; + + l2_save_restore_done = 0; + + l1_vmx_pt_start = (rdtsc() >> vmx_pt_rate) << vmx_pt_rate; + + GUEST_ASSERT(!vmresume()); + + l1_vmx_pt_finish = rdtsc(); + + /* + * Ensure exit from L2 happens after L2 goes through + * save and restore + */ + GUEST_ASSERT(l2_save_restore_done); + + /* + * Ensure the exit from L2 is due to preemption timer expiry + */ + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_PREEMPTION_TIMER); + + l1_tsc_deadline = l1_vmx_pt_start + + (PREEMPTION_TIMER_VALUE << vmx_pt_rate); + + l2_tsc_deadline = l2_vmx_pt_start + + (PREEMPTION_TIMER_VALUE << vmx_pt_rate); + + /* + * Sync with the host and pass the l1|l2 pt_expiry_finish times and + * tsc deadlines so that host can verify they are as expected + */ + GUEST_SYNC_ARGS(2, l1_vmx_pt_finish, l1_tsc_deadline, + l2_vmx_pt_finish, l2_tsc_deadline); +} + +void guest_code(struct vmx_pages *vmx_pages) +{ + if (vmx_pages) + l1_guest_code(vmx_pages); + + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + vm_vaddr_t vmx_pages_gva = 0; + + struct kvm_regs regs1, regs2; + struct kvm_vm *vm; + struct kvm_vcpu *vcpu; + struct kvm_x86_state *state; + struct ucall uc; + int stage; + + /* + * AMD currently does not implement any VMX features, so for now we + * just early out. + */ + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE)); + + /* Create VM */ + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + vcpu_regs_get(vcpu, ®s1); + + vcpu_alloc_vmx(vm, &vmx_pages_gva); + vcpu_args_set(vcpu, 1, vmx_pages_gva); + + for (stage = 1;; stage++) { + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + /* NOT REACHED */ + case UCALL_SYNC: + break; + case UCALL_DONE: + goto done; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + + /* UCALL_SYNC is handled here. */ + TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") && + uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx", + stage, (ulong)uc.args[1]); + /* + * If this stage 2 then we should verify the vmx pt expiry + * is as expected. + * From L1's perspective verify Preemption timer hasn't + * expired too early. + * From L2's perspective verify Preemption timer hasn't + * expired too late. + */ + if (stage == 2) { + + pr_info("Stage %d: L1 PT expiry TSC (%lu) , L1 TSC deadline (%lu)\n", + stage, uc.args[2], uc.args[3]); + + pr_info("Stage %d: L2 PT expiry TSC (%lu) , L2 TSC deadline (%lu)\n", + stage, uc.args[4], uc.args[5]); + + TEST_ASSERT(uc.args[2] >= uc.args[3], + "Stage %d: L1 PT expiry TSC (%lu) < L1 TSC deadline (%lu)", + stage, uc.args[2], uc.args[3]); + + TEST_ASSERT(uc.args[4] < uc.args[5], + "Stage %d: L2 PT expiry TSC (%lu) > L2 TSC deadline (%lu)", + stage, uc.args[4], uc.args[5]); + } + + state = vcpu_save_state(vcpu); + memset(®s1, 0, sizeof(regs1)); + vcpu_regs_get(vcpu, ®s1); + + kvm_vm_release(vm); + + /* Restore state in a new VM. */ + vcpu = vm_recreate_with_one_vcpu(vm); + vcpu_load_state(vcpu, state); + kvm_x86_state_cleanup(state); + + memset(®s2, 0, sizeof(regs2)); + vcpu_regs_get(vcpu, ®s2); + TEST_ASSERT(!memcmp(®s1, ®s2, sizeof(regs2)), + "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx", + (ulong) regs2.rdi, (ulong) regs2.rsi); + } + +done: + kvm_vm_free(vm); +} diff --git a/tools/testing/selftests/kvm/x86/vmx_set_nested_state_test.c b/tools/testing/selftests/kvm/x86/vmx_set_nested_state_test.c new file mode 100644 index 000000000000..67a62a5a8895 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/vmx_set_nested_state_test.c @@ -0,0 +1,304 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * vmx_set_nested_state_test + * + * Copyright (C) 2019, Google LLC. + * + * This test verifies the integrity of calling the ioctl KVM_SET_NESTED_STATE. + */ + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "vmx.h" + +#include +#include +#include +#include +#include + +/* + * Mirror of VMCS12_REVISION in arch/x86/kvm/vmx/vmcs12.h. If that value + * changes this should be updated. + */ +#define VMCS12_REVISION 0x11e57ed0 + +bool have_evmcs; + +void test_nested_state(struct kvm_vcpu *vcpu, struct kvm_nested_state *state) +{ + vcpu_nested_state_set(vcpu, state); +} + +void test_nested_state_expect_errno(struct kvm_vcpu *vcpu, + struct kvm_nested_state *state, + int expected_errno) +{ + int rv; + + rv = __vcpu_nested_state_set(vcpu, state); + TEST_ASSERT(rv == -1 && errno == expected_errno, + "Expected %s (%d) from vcpu_nested_state_set but got rv: %i errno: %s (%d)", + strerror(expected_errno), expected_errno, rv, strerror(errno), + errno); +} + +void test_nested_state_expect_einval(struct kvm_vcpu *vcpu, + struct kvm_nested_state *state) +{ + test_nested_state_expect_errno(vcpu, state, EINVAL); +} + +void test_nested_state_expect_efault(struct kvm_vcpu *vcpu, + struct kvm_nested_state *state) +{ + test_nested_state_expect_errno(vcpu, state, EFAULT); +} + +void set_revision_id_for_vmcs12(struct kvm_nested_state *state, + u32 vmcs12_revision) +{ + /* Set revision_id in vmcs12 to vmcs12_revision. */ + memcpy(&state->data, &vmcs12_revision, sizeof(u32)); +} + +void set_default_state(struct kvm_nested_state *state) +{ + memset(state, 0, sizeof(*state)); + state->flags = KVM_STATE_NESTED_RUN_PENDING | + KVM_STATE_NESTED_GUEST_MODE; + state->format = 0; + state->size = sizeof(*state); +} + +void set_default_vmx_state(struct kvm_nested_state *state, int size) +{ + memset(state, 0, size); + if (have_evmcs) + state->flags = KVM_STATE_NESTED_EVMCS; + state->format = 0; + state->size = size; + state->hdr.vmx.vmxon_pa = 0x1000; + state->hdr.vmx.vmcs12_pa = 0x2000; + state->hdr.vmx.smm.flags = 0; + set_revision_id_for_vmcs12(state, VMCS12_REVISION); +} + +void test_vmx_nested_state(struct kvm_vcpu *vcpu) +{ + /* Add a page for VMCS12. */ + const int state_sz = sizeof(struct kvm_nested_state) + getpagesize(); + struct kvm_nested_state *state = + (struct kvm_nested_state *)malloc(state_sz); + + /* The format must be set to 0. 0 for VMX, 1 for SVM. */ + set_default_vmx_state(state, state_sz); + state->format = 1; + test_nested_state_expect_einval(vcpu, state); + + /* + * We cannot virtualize anything if the guest does not have VMX + * enabled. + */ + set_default_vmx_state(state, state_sz); + test_nested_state_expect_einval(vcpu, state); + + /* + * We cannot virtualize anything if the guest does not have VMX + * enabled. We expect KVM_SET_NESTED_STATE to return 0 if vmxon_pa + * is set to -1ull, but the flags must be zero. + */ + set_default_vmx_state(state, state_sz); + state->hdr.vmx.vmxon_pa = -1ull; + test_nested_state_expect_einval(vcpu, state); + + state->hdr.vmx.vmcs12_pa = -1ull; + state->flags = KVM_STATE_NESTED_EVMCS; + test_nested_state_expect_einval(vcpu, state); + + state->flags = 0; + test_nested_state(vcpu, state); + + /* Enable VMX in the guest CPUID. */ + vcpu_set_cpuid_feature(vcpu, X86_FEATURE_VMX); + + /* + * Setting vmxon_pa == -1ull and vmcs_pa == -1ull exits early without + * setting the nested state. When the eVMCS flag is not set, the + * expected return value is '0'. + */ + set_default_vmx_state(state, state_sz); + state->flags = 0; + state->hdr.vmx.vmxon_pa = -1ull; + state->hdr.vmx.vmcs12_pa = -1ull; + test_nested_state(vcpu, state); + + /* + * When eVMCS is supported, the eVMCS flag can only be set if the + * enlightened VMCS capability has been enabled. + */ + if (have_evmcs) { + state->flags = KVM_STATE_NESTED_EVMCS; + test_nested_state_expect_einval(vcpu, state); + vcpu_enable_evmcs(vcpu); + test_nested_state(vcpu, state); + } + + /* It is invalid to have vmxon_pa == -1ull and SMM flags non-zero. */ + state->hdr.vmx.smm.flags = 1; + test_nested_state_expect_einval(vcpu, state); + + /* Invalid flags are rejected. */ + set_default_vmx_state(state, state_sz); + state->hdr.vmx.flags = ~0; + test_nested_state_expect_einval(vcpu, state); + + /* It is invalid to have vmxon_pa == -1ull and vmcs_pa != -1ull. */ + set_default_vmx_state(state, state_sz); + state->hdr.vmx.vmxon_pa = -1ull; + state->flags = 0; + test_nested_state_expect_einval(vcpu, state); + + /* It is invalid to have vmxon_pa set to a non-page aligned address. */ + set_default_vmx_state(state, state_sz); + state->hdr.vmx.vmxon_pa = 1; + test_nested_state_expect_einval(vcpu, state); + + /* + * It is invalid to have KVM_STATE_NESTED_SMM_GUEST_MODE and + * KVM_STATE_NESTED_GUEST_MODE set together. + */ + set_default_vmx_state(state, state_sz); + state->flags = KVM_STATE_NESTED_GUEST_MODE | + KVM_STATE_NESTED_RUN_PENDING; + state->hdr.vmx.smm.flags = KVM_STATE_NESTED_SMM_GUEST_MODE; + test_nested_state_expect_einval(vcpu, state); + + /* + * It is invalid to have any of the SMM flags set besides: + * KVM_STATE_NESTED_SMM_GUEST_MODE + * KVM_STATE_NESTED_SMM_VMXON + */ + set_default_vmx_state(state, state_sz); + state->hdr.vmx.smm.flags = ~(KVM_STATE_NESTED_SMM_GUEST_MODE | + KVM_STATE_NESTED_SMM_VMXON); + test_nested_state_expect_einval(vcpu, state); + + /* Outside SMM, SMM flags must be zero. */ + set_default_vmx_state(state, state_sz); + state->flags = 0; + state->hdr.vmx.smm.flags = KVM_STATE_NESTED_SMM_GUEST_MODE; + test_nested_state_expect_einval(vcpu, state); + + /* + * Size must be large enough to fit kvm_nested_state and vmcs12 + * if VMCS12 physical address is set + */ + set_default_vmx_state(state, state_sz); + state->size = sizeof(*state); + state->flags = 0; + test_nested_state_expect_einval(vcpu, state); + + set_default_vmx_state(state, state_sz); + state->size = sizeof(*state); + state->flags = 0; + state->hdr.vmx.vmcs12_pa = -1; + test_nested_state(vcpu, state); + + /* + * KVM_SET_NESTED_STATE succeeds with invalid VMCS + * contents but L2 not running. + */ + set_default_vmx_state(state, state_sz); + state->flags = 0; + test_nested_state(vcpu, state); + + /* Invalid flags are rejected, even if no VMCS loaded. */ + set_default_vmx_state(state, state_sz); + state->size = sizeof(*state); + state->flags = 0; + state->hdr.vmx.vmcs12_pa = -1; + state->hdr.vmx.flags = ~0; + test_nested_state_expect_einval(vcpu, state); + + /* vmxon_pa cannot be the same address as vmcs_pa. */ + set_default_vmx_state(state, state_sz); + state->hdr.vmx.vmxon_pa = 0; + state->hdr.vmx.vmcs12_pa = 0; + test_nested_state_expect_einval(vcpu, state); + + /* + * Test that if we leave nesting the state reflects that when we get + * it again. + */ + set_default_vmx_state(state, state_sz); + state->hdr.vmx.vmxon_pa = -1ull; + state->hdr.vmx.vmcs12_pa = -1ull; + state->flags = 0; + test_nested_state(vcpu, state); + vcpu_nested_state_get(vcpu, state); + TEST_ASSERT(state->size >= sizeof(*state) && state->size <= state_sz, + "Size must be between %ld and %d. The size returned was %d.", + sizeof(*state), state_sz, state->size); + TEST_ASSERT(state->hdr.vmx.vmxon_pa == -1ull, "vmxon_pa must be -1ull."); + TEST_ASSERT(state->hdr.vmx.vmcs12_pa == -1ull, "vmcs_pa must be -1ull."); + + free(state); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vm *vm; + struct kvm_nested_state state; + struct kvm_vcpu *vcpu; + + have_evmcs = kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS); + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE)); + + /* + * AMD currently does not implement set_nested_state, so for now we + * just early out. + */ + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); + + vm = vm_create_with_one_vcpu(&vcpu, NULL); + + /* + * First run tests with VMX disabled to check error handling. + */ + vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_VMX); + + /* Passing a NULL kvm_nested_state causes a EFAULT. */ + test_nested_state_expect_efault(vcpu, NULL); + + /* 'size' cannot be smaller than sizeof(kvm_nested_state). */ + set_default_state(&state); + state.size = 0; + test_nested_state_expect_einval(vcpu, &state); + + /* + * Setting the flags 0xf fails the flags check. The only flags that + * can be used are: + * KVM_STATE_NESTED_GUEST_MODE + * KVM_STATE_NESTED_RUN_PENDING + * KVM_STATE_NESTED_EVMCS + */ + set_default_state(&state); + state.flags = 0xf; + test_nested_state_expect_einval(vcpu, &state); + + /* + * If KVM_STATE_NESTED_RUN_PENDING is set then + * KVM_STATE_NESTED_GUEST_MODE has to be set as well. + */ + set_default_state(&state); + state.flags = KVM_STATE_NESTED_RUN_PENDING; + test_nested_state_expect_einval(vcpu, &state); + + test_vmx_nested_state(vcpu); + + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/x86/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86/vmx_tsc_adjust_test.c new file mode 100644 index 000000000000..2ceb5c78c442 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/vmx_tsc_adjust_test.c @@ -0,0 +1,156 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * vmx_tsc_adjust_test + * + * Copyright (C) 2018, Google LLC. + * + * IA32_TSC_ADJUST test + * + * According to the SDM, "if an execution of WRMSR to the + * IA32_TIME_STAMP_COUNTER MSR adds (or subtracts) value X from the TSC, + * the logical processor also adds (or subtracts) value X from the + * IA32_TSC_ADJUST MSR. + * + * Note that when L1 doesn't intercept writes to IA32_TSC, a + * WRMSR(IA32_TSC) from L2 sets L1's TSC value, not L2's perceived TSC + * value. + * + * This test verifies that this unusual case is handled correctly. + */ + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "vmx.h" + +#include +#include + +#include "kselftest.h" + +#ifndef MSR_IA32_TSC_ADJUST +#define MSR_IA32_TSC_ADJUST 0x3b +#endif + +#define TSC_ADJUST_VALUE (1ll << 32) +#define TSC_OFFSET_VALUE -(1ll << 48) + +enum { + PORT_ABORT = 0x1000, + PORT_REPORT, + PORT_DONE, +}; + +enum { + VMXON_PAGE = 0, + VMCS_PAGE, + MSR_BITMAP_PAGE, + + NUM_VMX_PAGES, +}; + +/* The virtual machine object. */ +static struct kvm_vm *vm; + +static void check_ia32_tsc_adjust(int64_t max) +{ + int64_t adjust; + + adjust = rdmsr(MSR_IA32_TSC_ADJUST); + GUEST_SYNC(adjust); + GUEST_ASSERT(adjust <= max); +} + +static void l2_guest_code(void) +{ + uint64_t l1_tsc = rdtsc() - TSC_OFFSET_VALUE; + + wrmsr(MSR_IA32_TSC, l1_tsc - TSC_ADJUST_VALUE); + check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE); + + /* Exit to L1 */ + __asm__ __volatile__("vmcall"); +} + +static void l1_guest_code(struct vmx_pages *vmx_pages) +{ +#define L2_GUEST_STACK_SIZE 64 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + uint32_t control; + uintptr_t save_cr3; + + GUEST_ASSERT(rdtsc() < TSC_ADJUST_VALUE); + wrmsr(MSR_IA32_TSC, rdtsc() - TSC_ADJUST_VALUE); + check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE); + + GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); + GUEST_ASSERT(load_vmcs(vmx_pages)); + + /* Prepare the VMCS for L2 execution. */ + prepare_vmcs(vmx_pages, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + control = vmreadz(CPU_BASED_VM_EXEC_CONTROL); + control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETTING; + vmwrite(CPU_BASED_VM_EXEC_CONTROL, control); + vmwrite(TSC_OFFSET, TSC_OFFSET_VALUE); + + /* Jump into L2. First, test failure to load guest CR3. */ + save_cr3 = vmreadz(GUEST_CR3); + vmwrite(GUEST_CR3, -1ull); + GUEST_ASSERT(!vmlaunch()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == + (EXIT_REASON_FAILED_VMENTRY | EXIT_REASON_INVALID_STATE)); + check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE); + vmwrite(GUEST_CR3, save_cr3); + + GUEST_ASSERT(!vmlaunch()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + + check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE); + + GUEST_DONE(); +} + +static void report(int64_t val) +{ + pr_info("IA32_TSC_ADJUST is %ld (%lld * TSC_ADJUST_VALUE + %lld).\n", + val, val / TSC_ADJUST_VALUE, val % TSC_ADJUST_VALUE); +} + +int main(int argc, char *argv[]) +{ + vm_vaddr_t vmx_pages_gva; + struct kvm_vcpu *vcpu; + + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); + + vm = vm_create_with_one_vcpu(&vcpu, (void *) l1_guest_code); + + /* Allocate VMX pages and shared descriptors (vmx_pages). */ + vcpu_alloc_vmx(vm, &vmx_pages_gva); + vcpu_args_set(vcpu, 1, vmx_pages_gva); + + for (;;) { + struct ucall uc; + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + /* NOT REACHED */ + case UCALL_SYNC: + report(uc.args[1]); + break; + case UCALL_DONE: + goto done; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + } + +done: + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/x86/xapic_ipi_test.c b/tools/testing/selftests/kvm/x86/xapic_ipi_test.c new file mode 100644 index 000000000000..a76078a08ff8 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/xapic_ipi_test.c @@ -0,0 +1,487 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * xapic_ipi_test + * + * Copyright (C) 2020, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + * + * Test that when the APIC is in xAPIC mode, a vCPU can send an IPI to wake + * another vCPU that is halted when KVM's backing page for the APIC access + * address has been moved by mm. + * + * The test starts two vCPUs: one that sends IPIs and one that continually + * executes HLT. The sender checks that the halter has woken from the HLT and + * has reentered HLT before sending the next IPI. While the vCPUs are running, + * the host continually calls migrate_pages to move all of the process' pages + * amongst the available numa nodes on the machine. + * + * Migration is a command line option. When used on non-numa machines will + * exit with error. Test is still usefull on non-numa for testing IPIs. + */ +#include +#include +#include +#include +#include + +#include "kvm_util.h" +#include "numaif.h" +#include "processor.h" +#include "test_util.h" +#include "vmx.h" + +/* Default running time for the test */ +#define DEFAULT_RUN_SECS 3 + +/* Default delay between migrate_pages calls (microseconds) */ +#define DEFAULT_DELAY_USECS 500000 + +/* + * Vector for IPI from sender vCPU to halting vCPU. + * Value is arbitrary and was chosen for the alternating bit pattern. Any + * value should work. + */ +#define IPI_VECTOR 0xa5 + +/* + * Incremented in the IPI handler. Provides evidence to the sender that the IPI + * arrived at the destination + */ +static volatile uint64_t ipis_rcvd; + +/* Data struct shared between host main thread and vCPUs */ +struct test_data_page { + uint32_t halter_apic_id; + volatile uint64_t hlt_count; + volatile uint64_t wake_count; + uint64_t ipis_sent; + uint64_t migrations_attempted; + uint64_t migrations_completed; + uint32_t icr; + uint32_t icr2; + uint32_t halter_tpr; + uint32_t halter_ppr; + + /* + * Record local version register as a cross-check that APIC access + * worked. Value should match what KVM reports (APIC_VERSION in + * arch/x86/kvm/lapic.c). If test is failing, check that values match + * to determine whether APIC access exits are working. + */ + uint32_t halter_lvr; +}; + +struct thread_params { + struct test_data_page *data; + struct kvm_vcpu *vcpu; + uint64_t *pipis_rcvd; /* host address of ipis_rcvd global */ +}; + +void verify_apic_base_addr(void) +{ + uint64_t msr = rdmsr(MSR_IA32_APICBASE); + uint64_t base = GET_APIC_BASE(msr); + + GUEST_ASSERT(base == APIC_DEFAULT_GPA); +} + +static void halter_guest_code(struct test_data_page *data) +{ + verify_apic_base_addr(); + xapic_enable(); + + data->halter_apic_id = GET_APIC_ID_FIELD(xapic_read_reg(APIC_ID)); + data->halter_lvr = xapic_read_reg(APIC_LVR); + + /* + * Loop forever HLTing and recording halts & wakes. Disable interrupts + * each time around to minimize window between signaling the pending + * halt to the sender vCPU and executing the halt. No need to disable on + * first run as this vCPU executes first and the host waits for it to + * signal going into first halt before starting the sender vCPU. Record + * TPR and PPR for diagnostic purposes in case the test fails. + */ + for (;;) { + data->halter_tpr = xapic_read_reg(APIC_TASKPRI); + data->halter_ppr = xapic_read_reg(APIC_PROCPRI); + data->hlt_count++; + asm volatile("sti; hlt; cli"); + data->wake_count++; + } +} + +/* + * Runs on halter vCPU when IPI arrives. Write an arbitrary non-zero value to + * enable diagnosing errant writes to the APIC access address backing page in + * case of test failure. + */ +static void guest_ipi_handler(struct ex_regs *regs) +{ + ipis_rcvd++; + xapic_write_reg(APIC_EOI, 77); +} + +static void sender_guest_code(struct test_data_page *data) +{ + uint64_t last_wake_count; + uint64_t last_hlt_count; + uint64_t last_ipis_rcvd_count; + uint32_t icr_val; + uint32_t icr2_val; + uint64_t tsc_start; + + verify_apic_base_addr(); + xapic_enable(); + + /* + * Init interrupt command register for sending IPIs + * + * Delivery mode=fixed, per SDM: + * "Delivers the interrupt specified in the vector field to the target + * processor." + * + * Destination mode=physical i.e. specify target by its local APIC + * ID. This vCPU assumes that the halter vCPU has already started and + * set data->halter_apic_id. + */ + icr_val = (APIC_DEST_PHYSICAL | APIC_DM_FIXED | IPI_VECTOR); + icr2_val = SET_APIC_DEST_FIELD(data->halter_apic_id); + data->icr = icr_val; + data->icr2 = icr2_val; + + last_wake_count = data->wake_count; + last_hlt_count = data->hlt_count; + last_ipis_rcvd_count = ipis_rcvd; + for (;;) { + /* + * Send IPI to halter vCPU. + * First IPI can be sent unconditionally because halter vCPU + * starts earlier. + */ + xapic_write_reg(APIC_ICR2, icr2_val); + xapic_write_reg(APIC_ICR, icr_val); + data->ipis_sent++; + + /* + * Wait up to ~1 sec for halter to indicate that it has: + * 1. Received the IPI + * 2. Woken up from the halt + * 3. Gone back into halt + * Current CPUs typically run at 2.x Ghz which is ~2 + * billion ticks per second. + */ + tsc_start = rdtsc(); + while (rdtsc() - tsc_start < 2000000000) { + if ((ipis_rcvd != last_ipis_rcvd_count) && + (data->wake_count != last_wake_count) && + (data->hlt_count != last_hlt_count)) + break; + } + + GUEST_ASSERT((ipis_rcvd != last_ipis_rcvd_count) && + (data->wake_count != last_wake_count) && + (data->hlt_count != last_hlt_count)); + + last_wake_count = data->wake_count; + last_hlt_count = data->hlt_count; + last_ipis_rcvd_count = ipis_rcvd; + } +} + +static void *vcpu_thread(void *arg) +{ + struct thread_params *params = (struct thread_params *)arg; + struct kvm_vcpu *vcpu = params->vcpu; + struct ucall uc; + int old; + int r; + + r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old); + TEST_ASSERT(r == 0, + "pthread_setcanceltype failed on vcpu_id=%u with errno=%d", + vcpu->id, r); + + fprintf(stderr, "vCPU thread running vCPU %u\n", vcpu->id); + vcpu_run(vcpu); + + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + if (get_ucall(vcpu, &uc) == UCALL_ABORT) { + TEST_ASSERT(false, + "vCPU %u exited with error: %s.\n" + "Sending vCPU sent %lu IPIs to halting vCPU\n" + "Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n" + "Halter TPR=%#x PPR=%#x LVR=%#x\n" + "Migrations attempted: %lu\n" + "Migrations completed: %lu", + vcpu->id, (const char *)uc.args[0], + params->data->ipis_sent, params->data->hlt_count, + params->data->wake_count, + *params->pipis_rcvd, params->data->halter_tpr, + params->data->halter_ppr, params->data->halter_lvr, + params->data->migrations_attempted, + params->data->migrations_completed); + } + + return NULL; +} + +static void cancel_join_vcpu_thread(pthread_t thread, struct kvm_vcpu *vcpu) +{ + void *retval; + int r; + + r = pthread_cancel(thread); + TEST_ASSERT(r == 0, + "pthread_cancel on vcpu_id=%d failed with errno=%d", + vcpu->id, r); + + r = pthread_join(thread, &retval); + TEST_ASSERT(r == 0, + "pthread_join on vcpu_id=%d failed with errno=%d", + vcpu->id, r); + TEST_ASSERT(retval == PTHREAD_CANCELED, + "expected retval=%p, got %p", PTHREAD_CANCELED, + retval); +} + +void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs, + uint64_t *pipis_rcvd) +{ + long pages_not_moved; + unsigned long nodemask = 0; + unsigned long nodemasks[sizeof(nodemask) * 8]; + int nodes = 0; + time_t start_time, last_update, now; + time_t interval_secs = 1; + int i, r; + int from, to; + unsigned long bit; + uint64_t hlt_count; + uint64_t wake_count; + uint64_t ipis_sent; + + fprintf(stderr, "Calling migrate_pages every %d microseconds\n", + delay_usecs); + + /* Get set of first 64 numa nodes available */ + r = get_mempolicy(NULL, &nodemask, sizeof(nodemask) * 8, + 0, MPOL_F_MEMS_ALLOWED); + TEST_ASSERT(r == 0, "get_mempolicy failed errno=%d", errno); + + fprintf(stderr, "Numa nodes found amongst first %lu possible nodes " + "(each 1-bit indicates node is present): %#lx\n", + sizeof(nodemask) * 8, nodemask); + + /* Init array of masks containing a single-bit in each, one for each + * available node. migrate_pages called below requires specifying nodes + * as bit masks. + */ + for (i = 0, bit = 1; i < sizeof(nodemask) * 8; i++, bit <<= 1) { + if (nodemask & bit) { + nodemasks[nodes] = nodemask & bit; + nodes++; + } + } + + TEST_ASSERT(nodes > 1, + "Did not find at least 2 numa nodes. Can't do migration"); + + fprintf(stderr, "Migrating amongst %d nodes found\n", nodes); + + from = 0; + to = 1; + start_time = time(NULL); + last_update = start_time; + + ipis_sent = data->ipis_sent; + hlt_count = data->hlt_count; + wake_count = data->wake_count; + + while ((int)(time(NULL) - start_time) < run_secs) { + data->migrations_attempted++; + + /* + * migrate_pages with PID=0 will migrate all pages of this + * process between the nodes specified as bitmasks. The page + * backing the APIC access address belongs to this process + * because it is allocated by KVM in the context of the + * KVM_CREATE_VCPU ioctl. If that assumption ever changes this + * test may break or give a false positive signal. + */ + pages_not_moved = migrate_pages(0, sizeof(nodemasks[from]), + &nodemasks[from], + &nodemasks[to]); + if (pages_not_moved < 0) + fprintf(stderr, + "migrate_pages failed, errno=%d\n", errno); + else if (pages_not_moved > 0) + fprintf(stderr, + "migrate_pages could not move %ld pages\n", + pages_not_moved); + else + data->migrations_completed++; + + from = to; + to++; + if (to == nodes) + to = 0; + + now = time(NULL); + if (((now - start_time) % interval_secs == 0) && + (now != last_update)) { + last_update = now; + fprintf(stderr, + "%lu seconds: Migrations attempted=%lu completed=%lu, " + "IPIs sent=%lu received=%lu, HLTs=%lu wakes=%lu\n", + now - start_time, data->migrations_attempted, + data->migrations_completed, + data->ipis_sent, *pipis_rcvd, + data->hlt_count, data->wake_count); + + TEST_ASSERT(ipis_sent != data->ipis_sent && + hlt_count != data->hlt_count && + wake_count != data->wake_count, + "IPI, HLT and wake count have not increased " + "in the last %lu seconds. " + "HLTer is likely hung.", interval_secs); + + ipis_sent = data->ipis_sent; + hlt_count = data->hlt_count; + wake_count = data->wake_count; + } + usleep(delay_usecs); + } +} + +void get_cmdline_args(int argc, char *argv[], int *run_secs, + bool *migrate, int *delay_usecs) +{ + for (;;) { + int opt = getopt(argc, argv, "s:d:m"); + + if (opt == -1) + break; + switch (opt) { + case 's': + *run_secs = parse_size(optarg); + break; + case 'm': + *migrate = true; + break; + case 'd': + *delay_usecs = parse_size(optarg); + break; + default: + TEST_ASSERT(false, + "Usage: -s . Default is %d seconds.\n" + "-m adds calls to migrate_pages while vCPUs are running." + " Default is no migrations.\n" + "-d - delay between migrate_pages() calls." + " Default is %d microseconds.", + DEFAULT_RUN_SECS, DEFAULT_DELAY_USECS); + } + } +} + +int main(int argc, char *argv[]) +{ + int r; + int wait_secs; + const int max_halter_wait = 10; + int run_secs = 0; + int delay_usecs = 0; + struct test_data_page *data; + vm_vaddr_t test_data_page_vaddr; + bool migrate = false; + pthread_t threads[2]; + struct thread_params params[2]; + struct kvm_vm *vm; + uint64_t *pipis_rcvd; + + get_cmdline_args(argc, argv, &run_secs, &migrate, &delay_usecs); + if (run_secs <= 0) + run_secs = DEFAULT_RUN_SECS; + if (delay_usecs <= 0) + delay_usecs = DEFAULT_DELAY_USECS; + + vm = vm_create_with_one_vcpu(¶ms[0].vcpu, halter_guest_code); + + vm_install_exception_handler(vm, IPI_VECTOR, guest_ipi_handler); + + virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA); + + params[1].vcpu = vm_vcpu_add(vm, 1, sender_guest_code); + + test_data_page_vaddr = vm_vaddr_alloc_page(vm); + data = addr_gva2hva(vm, test_data_page_vaddr); + memset(data, 0, sizeof(*data)); + params[0].data = data; + params[1].data = data; + + vcpu_args_set(params[0].vcpu, 1, test_data_page_vaddr); + vcpu_args_set(params[1].vcpu, 1, test_data_page_vaddr); + + pipis_rcvd = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ipis_rcvd); + params[0].pipis_rcvd = pipis_rcvd; + params[1].pipis_rcvd = pipis_rcvd; + + /* Start halter vCPU thread and wait for it to execute first HLT. */ + r = pthread_create(&threads[0], NULL, vcpu_thread, ¶ms[0]); + TEST_ASSERT(r == 0, + "pthread_create halter failed errno=%d", errno); + fprintf(stderr, "Halter vCPU thread started\n"); + + wait_secs = 0; + while ((wait_secs < max_halter_wait) && !data->hlt_count) { + sleep(1); + wait_secs++; + } + + TEST_ASSERT(data->hlt_count, + "Halter vCPU did not execute first HLT within %d seconds", + max_halter_wait); + + fprintf(stderr, + "Halter vCPU thread reported its APIC ID: %u after %d seconds.\n", + data->halter_apic_id, wait_secs); + + r = pthread_create(&threads[1], NULL, vcpu_thread, ¶ms[1]); + TEST_ASSERT(r == 0, "pthread_create sender failed errno=%d", errno); + + fprintf(stderr, + "IPI sender vCPU thread started. Letting vCPUs run for %d seconds.\n", + run_secs); + + if (!migrate) + sleep(run_secs); + else + do_migrations(data, run_secs, delay_usecs, pipis_rcvd); + + /* + * Cancel threads and wait for them to stop. + */ + cancel_join_vcpu_thread(threads[0], params[0].vcpu); + cancel_join_vcpu_thread(threads[1], params[1].vcpu); + + fprintf(stderr, + "Test successful after running for %d seconds.\n" + "Sending vCPU sent %lu IPIs to halting vCPU\n" + "Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n" + "Halter APIC ID=%#x\n" + "Sender ICR value=%#x ICR2 value=%#x\n" + "Halter TPR=%#x PPR=%#x LVR=%#x\n" + "Migrations attempted: %lu\n" + "Migrations completed: %lu\n", + run_secs, data->ipis_sent, + data->hlt_count, data->wake_count, *pipis_rcvd, + data->halter_apic_id, + data->icr, data->icr2, + data->halter_tpr, data->halter_ppr, data->halter_lvr, + data->migrations_attempted, data->migrations_completed); + + kvm_vm_free(vm); + + return 0; +} diff --git a/tools/testing/selftests/kvm/x86/xapic_state_test.c b/tools/testing/selftests/kvm/x86/xapic_state_test.c new file mode 100644 index 000000000000..88bcca188799 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/xapic_state_test.c @@ -0,0 +1,262 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include +#include +#include +#include + +#include "apic.h" +#include "kvm_util.h" +#include "processor.h" +#include "test_util.h" + +struct xapic_vcpu { + struct kvm_vcpu *vcpu; + bool is_x2apic; + bool has_xavic_errata; +}; + +static void xapic_guest_code(void) +{ + asm volatile("cli"); + + xapic_enable(); + + while (1) { + uint64_t val = (u64)xapic_read_reg(APIC_IRR) | + (u64)xapic_read_reg(APIC_IRR + 0x10) << 32; + + xapic_write_reg(APIC_ICR2, val >> 32); + xapic_write_reg(APIC_ICR, val); + GUEST_SYNC(val); + } +} + +#define X2APIC_RSVD_BITS_MASK (GENMASK_ULL(31, 20) | \ + GENMASK_ULL(17, 16) | \ + GENMASK_ULL(13, 13)) + +static void x2apic_guest_code(void) +{ + asm volatile("cli"); + + x2apic_enable(); + + do { + uint64_t val = x2apic_read_reg(APIC_IRR) | + x2apic_read_reg(APIC_IRR + 0x10) << 32; + + if (val & X2APIC_RSVD_BITS_MASK) { + x2apic_write_reg_fault(APIC_ICR, val); + } else { + x2apic_write_reg(APIC_ICR, val); + GUEST_ASSERT_EQ(x2apic_read_reg(APIC_ICR), val); + } + GUEST_SYNC(val); + } while (1); +} + +static void ____test_icr(struct xapic_vcpu *x, uint64_t val) +{ + struct kvm_vcpu *vcpu = x->vcpu; + struct kvm_lapic_state xapic; + struct ucall uc; + uint64_t icr; + + /* + * Tell the guest what ICR value to write. Use the IRR to pass info, + * all bits are valid and should not be modified by KVM (ignoring the + * fact that vectors 0-15 are technically illegal). + */ + vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic); + *((u32 *)&xapic.regs[APIC_IRR]) = val; + *((u32 *)&xapic.regs[APIC_IRR + 0x10]) = val >> 32; + vcpu_ioctl(vcpu, KVM_SET_LAPIC, &xapic); + + vcpu_run(vcpu); + TEST_ASSERT_EQ(get_ucall(vcpu, &uc), UCALL_SYNC); + TEST_ASSERT_EQ(uc.args[1], val); + + vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic); + icr = (u64)(*((u32 *)&xapic.regs[APIC_ICR])) | + (u64)(*((u32 *)&xapic.regs[APIC_ICR2])) << 32; + if (!x->is_x2apic) { + if (!x->has_xavic_errata) + val &= (-1u | (0xffull << (32 + 24))); + } else if (val & X2APIC_RSVD_BITS_MASK) { + return; + } + + if (x->has_xavic_errata) + TEST_ASSERT_EQ(icr & ~APIC_ICR_BUSY, val & ~APIC_ICR_BUSY); + else + TEST_ASSERT_EQ(icr, val & ~APIC_ICR_BUSY); +} + +static void __test_icr(struct xapic_vcpu *x, uint64_t val) +{ + /* + * The BUSY bit is reserved on both AMD and Intel, but only AMD treats + * it is as _must_ be zero. Intel simply ignores the bit. Don't test + * the BUSY bit for x2APIC, as there is no single correct behavior. + */ + if (!x->is_x2apic) + ____test_icr(x, val | APIC_ICR_BUSY); + + ____test_icr(x, val & ~(u64)APIC_ICR_BUSY); +} + +static void test_icr(struct xapic_vcpu *x) +{ + struct kvm_vcpu *vcpu = x->vcpu; + uint64_t icr, i, j; + + icr = APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_FIXED; + for (i = 0; i <= 0xff; i++) + __test_icr(x, icr | i); + + icr = APIC_INT_ASSERT | APIC_DM_FIXED; + for (i = 0; i <= 0xff; i++) + __test_icr(x, icr | i); + + /* + * Send all flavors of IPIs to non-existent vCPUs. TODO: use number of + * vCPUs, not vcpu.id + 1. Arbitrarily use vector 0xff. + */ + icr = APIC_INT_ASSERT | 0xff; + for (i = 0; i < 0xff; i++) { + if (i == vcpu->id) + continue; + for (j = 0; j < 8; j++) + __test_icr(x, i << (32 + 24) | icr | (j << 8)); + } + + /* And again with a shorthand destination for all types of IPIs. */ + icr = APIC_DEST_ALLBUT | APIC_INT_ASSERT; + for (i = 0; i < 8; i++) + __test_icr(x, icr | (i << 8)); + + /* And a few garbage value, just make sure it's an IRQ (blocked). */ + __test_icr(x, 0xa5a5a5a5a5a5a5a5 & ~APIC_DM_FIXED_MASK); + __test_icr(x, 0x5a5a5a5a5a5a5a5a & ~APIC_DM_FIXED_MASK); + __test_icr(x, -1ull & ~APIC_DM_FIXED_MASK); +} + +static void __test_apic_id(struct kvm_vcpu *vcpu, uint64_t apic_base) +{ + uint32_t apic_id, expected; + struct kvm_lapic_state xapic; + + vcpu_set_msr(vcpu, MSR_IA32_APICBASE, apic_base); + + vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic); + + expected = apic_base & X2APIC_ENABLE ? vcpu->id : vcpu->id << 24; + apic_id = *((u32 *)&xapic.regs[APIC_ID]); + + TEST_ASSERT(apic_id == expected, + "APIC_ID not set back to %s format; wanted = %x, got = %x", + (apic_base & X2APIC_ENABLE) ? "x2APIC" : "xAPIC", + expected, apic_id); +} + +/* + * Verify that KVM switches the APIC_ID between xAPIC and x2APIC when userspace + * stuffs MSR_IA32_APICBASE. Setting the APIC_ID when x2APIC is enabled and + * when the APIC transitions for DISABLED to ENABLED is architectural behavior + * (on Intel), whereas the x2APIC => xAPIC transition behavior is KVM ABI since + * attempted to transition from x2APIC to xAPIC without disabling the APIC is + * architecturally disallowed. + */ +static void test_apic_id(void) +{ + const uint32_t NR_VCPUS = 3; + struct kvm_vcpu *vcpus[NR_VCPUS]; + uint64_t apic_base; + struct kvm_vm *vm; + int i; + + vm = vm_create_with_vcpus(NR_VCPUS, NULL, vcpus); + vm_enable_cap(vm, KVM_CAP_X2APIC_API, KVM_X2APIC_API_USE_32BIT_IDS); + + for (i = 0; i < NR_VCPUS; i++) { + apic_base = vcpu_get_msr(vcpus[i], MSR_IA32_APICBASE); + + TEST_ASSERT(apic_base & MSR_IA32_APICBASE_ENABLE, + "APIC not in ENABLED state at vCPU RESET"); + TEST_ASSERT(!(apic_base & X2APIC_ENABLE), + "APIC not in xAPIC mode at vCPU RESET"); + + __test_apic_id(vcpus[i], apic_base); + __test_apic_id(vcpus[i], apic_base | X2APIC_ENABLE); + __test_apic_id(vcpus[i], apic_base); + } + + kvm_vm_free(vm); +} + +static void test_x2apic_id(void) +{ + struct kvm_lapic_state lapic = {}; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + int i; + + vm = vm_create_with_one_vcpu(&vcpu, NULL); + vcpu_set_msr(vcpu, MSR_IA32_APICBASE, MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE); + + /* + * Try stuffing a modified x2APIC ID, KVM should ignore the value and + * always return the vCPU's default/readonly x2APIC ID. + */ + for (i = 0; i <= 0xff; i++) { + *(u32 *)(lapic.regs + APIC_ID) = i << 24; + *(u32 *)(lapic.regs + APIC_SPIV) = APIC_SPIV_APIC_ENABLED; + vcpu_ioctl(vcpu, KVM_SET_LAPIC, &lapic); + + vcpu_ioctl(vcpu, KVM_GET_LAPIC, &lapic); + TEST_ASSERT(*((u32 *)&lapic.regs[APIC_ID]) == vcpu->id << 24, + "x2APIC ID should be fully readonly"); + } + + kvm_vm_free(vm); +} + +int main(int argc, char *argv[]) +{ + struct xapic_vcpu x = { + .vcpu = NULL, + .is_x2apic = true, + }; + struct kvm_vm *vm; + + vm = vm_create_with_one_vcpu(&x.vcpu, x2apic_guest_code); + test_icr(&x); + kvm_vm_free(vm); + + /* + * Use a second VM for the xAPIC test so that x2APIC can be hidden from + * the guest in order to test AVIC. KVM disallows changing CPUID after + * KVM_RUN and AVIC is disabled if _any_ vCPU is allowed to use x2APIC. + */ + vm = vm_create_with_one_vcpu(&x.vcpu, xapic_guest_code); + x.is_x2apic = false; + + /* + * AMD's AVIC implementation is buggy (fails to clear the ICR BUSY bit), + * and also diverges from KVM with respect to ICR2[23:0] (KVM and Intel + * drops writes, AMD does not). Account for the errata when checking + * that KVM reads back what was written. + */ + x.has_xavic_errata = host_cpu_is_amd && + get_kvm_amd_param_bool("avic"); + + vcpu_clear_cpuid_feature(x.vcpu, X86_FEATURE_X2APIC); + + virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA); + test_icr(&x); + kvm_vm_free(vm); + + test_apic_id(); + test_x2apic_id(); +} diff --git a/tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c b/tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c new file mode 100644 index 000000000000..c8a5c5e51661 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c @@ -0,0 +1,139 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * XCR0 cpuid test + * + * Copyright (C) 2022, Google LLC. + */ +#include +#include +#include +#include +#include + +#include "test_util.h" + +#include "kvm_util.h" +#include "processor.h" + +/* + * Assert that architectural dependency rules are satisfied, e.g. that AVX is + * supported if and only if SSE is supported. + */ +#define ASSERT_XFEATURE_DEPENDENCIES(supported_xcr0, xfeatures, dependencies) \ +do { \ + uint64_t __supported = (supported_xcr0) & ((xfeatures) | (dependencies)); \ + \ + __GUEST_ASSERT((__supported & (xfeatures)) != (xfeatures) || \ + __supported == ((xfeatures) | (dependencies)), \ + "supported = 0x%lx, xfeatures = 0x%llx, dependencies = 0x%llx", \ + __supported, (xfeatures), (dependencies)); \ +} while (0) + +/* + * Assert that KVM reports a sane, usable as-is XCR0. Architecturally, a CPU + * isn't strictly required to _support_ all XFeatures related to a feature, but + * at the same time XSETBV will #GP if bundled XFeatures aren't enabled and + * disabled coherently. E.g. a CPU can technically enumerate supported for + * XTILE_CFG but not XTILE_DATA, but attempting to enable XTILE_CFG without + * XTILE_DATA will #GP. + */ +#define ASSERT_ALL_OR_NONE_XFEATURE(supported_xcr0, xfeatures) \ +do { \ + uint64_t __supported = (supported_xcr0) & (xfeatures); \ + \ + __GUEST_ASSERT(!__supported || __supported == (xfeatures), \ + "supported = 0x%lx, xfeatures = 0x%llx", \ + __supported, (xfeatures)); \ +} while (0) + +static void guest_code(void) +{ + uint64_t initial_xcr0; + uint64_t supported_xcr0; + int i, vector; + + set_cr4(get_cr4() | X86_CR4_OSXSAVE); + + initial_xcr0 = xgetbv(0); + supported_xcr0 = this_cpu_supported_xcr0(); + + GUEST_ASSERT(initial_xcr0 == supported_xcr0); + + /* Check AVX */ + ASSERT_XFEATURE_DEPENDENCIES(supported_xcr0, + XFEATURE_MASK_YMM, + XFEATURE_MASK_SSE); + + /* Check MPX */ + ASSERT_ALL_OR_NONE_XFEATURE(supported_xcr0, + XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR); + + /* Check AVX-512 */ + ASSERT_XFEATURE_DEPENDENCIES(supported_xcr0, + XFEATURE_MASK_AVX512, + XFEATURE_MASK_SSE | XFEATURE_MASK_YMM); + ASSERT_ALL_OR_NONE_XFEATURE(supported_xcr0, + XFEATURE_MASK_AVX512); + + /* Check AMX */ + ASSERT_ALL_OR_NONE_XFEATURE(supported_xcr0, + XFEATURE_MASK_XTILE); + + vector = xsetbv_safe(0, XFEATURE_MASK_FP); + __GUEST_ASSERT(!vector, + "Expected success on XSETBV(FP), got vector '0x%x'", + vector); + + vector = xsetbv_safe(0, supported_xcr0); + __GUEST_ASSERT(!vector, + "Expected success on XSETBV(0x%lx), got vector '0x%x'", + supported_xcr0, vector); + + for (i = 0; i < 64; i++) { + if (supported_xcr0 & BIT_ULL(i)) + continue; + + vector = xsetbv_safe(0, supported_xcr0 | BIT_ULL(i)); + __GUEST_ASSERT(vector == GP_VECTOR, + "Expected #GP on XSETBV(0x%llx), supported XCR0 = %lx, got vector '0x%x'", + BIT_ULL(i), supported_xcr0, vector); + } + + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_run *run; + struct kvm_vm *vm; + struct ucall uc; + + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVE)); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + run = vcpu->run; + + while (1) { + vcpu_run(vcpu); + + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Unexpected exit reason: %u (%s),", + run->exit_reason, + exit_reason_str(run->exit_reason)); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + case UCALL_DONE: + goto done; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + } + +done: + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/x86/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86/xen_shinfo_test.c new file mode 100644 index 000000000000..a59b3c799bb2 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/xen_shinfo_test.c @@ -0,0 +1,1161 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright © 2021 Amazon.com, Inc. or its affiliates. + */ + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" + +#include +#include +#include +#include +#include + +#include + +#define SHINFO_REGION_GVA 0xc0000000ULL +#define SHINFO_REGION_GPA 0xc0000000ULL +#define SHINFO_REGION_SLOT 10 + +#define DUMMY_REGION_GPA (SHINFO_REGION_GPA + (3 * PAGE_SIZE)) +#define DUMMY_REGION_SLOT 11 + +#define DUMMY_REGION_GPA_2 (SHINFO_REGION_GPA + (4 * PAGE_SIZE)) +#define DUMMY_REGION_SLOT_2 12 + +#define SHINFO_ADDR (SHINFO_REGION_GPA) +#define VCPU_INFO_ADDR (SHINFO_REGION_GPA + 0x40) +#define PVTIME_ADDR (SHINFO_REGION_GPA + PAGE_SIZE) +#define RUNSTATE_ADDR (SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE - 15) + +#define SHINFO_VADDR (SHINFO_REGION_GVA) +#define VCPU_INFO_VADDR (SHINFO_REGION_GVA + 0x40) +#define RUNSTATE_VADDR (SHINFO_REGION_GVA + PAGE_SIZE + PAGE_SIZE - 15) + +#define EVTCHN_VECTOR 0x10 + +#define EVTCHN_TEST1 15 +#define EVTCHN_TEST2 66 +#define EVTCHN_TIMER 13 + +enum { + TEST_INJECT_VECTOR = 0, + TEST_RUNSTATE_runnable, + TEST_RUNSTATE_blocked, + TEST_RUNSTATE_offline, + TEST_RUNSTATE_ADJUST, + TEST_RUNSTATE_DATA, + TEST_STEAL_TIME, + TEST_EVTCHN_MASKED, + TEST_EVTCHN_UNMASKED, + TEST_EVTCHN_SLOWPATH, + TEST_EVTCHN_SEND_IOCTL, + TEST_EVTCHN_HCALL, + TEST_EVTCHN_HCALL_SLOWPATH, + TEST_EVTCHN_HCALL_EVENTFD, + TEST_TIMER_SETUP, + TEST_TIMER_WAIT, + TEST_TIMER_RESTORE, + TEST_POLL_READY, + TEST_POLL_TIMEOUT, + TEST_POLL_MASKED, + TEST_POLL_WAKE, + SET_VCPU_INFO, + TEST_TIMER_PAST, + TEST_LOCKING_SEND_RACE, + TEST_LOCKING_POLL_RACE, + TEST_LOCKING_POLL_TIMEOUT, + TEST_DONE, + + TEST_GUEST_SAW_IRQ, +}; + +#define XEN_HYPERCALL_MSR 0x40000000 + +#define MIN_STEAL_TIME 50000 + +#define SHINFO_RACE_TIMEOUT 2 /* seconds */ + +#define __HYPERVISOR_set_timer_op 15 +#define __HYPERVISOR_sched_op 29 +#define __HYPERVISOR_event_channel_op 32 + +#define SCHEDOP_poll 3 + +#define EVTCHNOP_send 4 + +#define EVTCHNSTAT_interdomain 2 + +struct evtchn_send { + u32 port; +}; + +struct sched_poll { + u32 *ports; + unsigned int nr_ports; + u64 timeout; +}; + +struct pvclock_vcpu_time_info { + u32 version; + u32 pad0; + u64 tsc_timestamp; + u64 system_time; + u32 tsc_to_system_mul; + s8 tsc_shift; + u8 flags; + u8 pad[2]; +} __attribute__((__packed__)); /* 32 bytes */ + +struct pvclock_wall_clock { + u32 version; + u32 sec; + u32 nsec; +} __attribute__((__packed__)); + +struct vcpu_runstate_info { + uint32_t state; + uint64_t state_entry_time; + uint64_t time[5]; /* Extra field for overrun check */ +}; + +struct compat_vcpu_runstate_info { + uint32_t state; + uint64_t state_entry_time; + uint64_t time[5]; +} __attribute__((__packed__)); + +struct arch_vcpu_info { + unsigned long cr2; + unsigned long pad; /* sizeof(vcpu_info_t) == 64 */ +}; + +struct vcpu_info { + uint8_t evtchn_upcall_pending; + uint8_t evtchn_upcall_mask; + unsigned long evtchn_pending_sel; + struct arch_vcpu_info arch; + struct pvclock_vcpu_time_info time; +}; /* 64 bytes (x86) */ + +struct shared_info { + struct vcpu_info vcpu_info[32]; + unsigned long evtchn_pending[64]; + unsigned long evtchn_mask[64]; + struct pvclock_wall_clock wc; + uint32_t wc_sec_hi; + /* arch_shared_info here */ +}; + +#define RUNSTATE_running 0 +#define RUNSTATE_runnable 1 +#define RUNSTATE_blocked 2 +#define RUNSTATE_offline 3 + +static const char *runstate_names[] = { + "running", + "runnable", + "blocked", + "offline" +}; + +struct { + struct kvm_irq_routing info; + struct kvm_irq_routing_entry entries[2]; +} irq_routes; + +static volatile bool guest_saw_irq; + +static void evtchn_handler(struct ex_regs *regs) +{ + struct vcpu_info *vi = (void *)VCPU_INFO_VADDR; + + vcpu_arch_put_guest(vi->evtchn_upcall_pending, 0); + vcpu_arch_put_guest(vi->evtchn_pending_sel, 0); + guest_saw_irq = true; + + GUEST_SYNC(TEST_GUEST_SAW_IRQ); +} + +static void guest_wait_for_irq(void) +{ + while (!guest_saw_irq) + __asm__ __volatile__ ("rep nop" : : : "memory"); + guest_saw_irq = false; +} + +static void guest_code(void) +{ + struct vcpu_runstate_info *rs = (void *)RUNSTATE_VADDR; + int i; + + __asm__ __volatile__( + "sti\n" + "nop\n" + ); + + /* Trigger an interrupt injection */ + GUEST_SYNC(TEST_INJECT_VECTOR); + + guest_wait_for_irq(); + + /* Test having the host set runstates manually */ + GUEST_SYNC(TEST_RUNSTATE_runnable); + GUEST_ASSERT(rs->time[RUNSTATE_runnable] != 0); + GUEST_ASSERT(rs->state == 0); + + GUEST_SYNC(TEST_RUNSTATE_blocked); + GUEST_ASSERT(rs->time[RUNSTATE_blocked] != 0); + GUEST_ASSERT(rs->state == 0); + + GUEST_SYNC(TEST_RUNSTATE_offline); + GUEST_ASSERT(rs->time[RUNSTATE_offline] != 0); + GUEST_ASSERT(rs->state == 0); + + /* Test runstate time adjust */ + GUEST_SYNC(TEST_RUNSTATE_ADJUST); + GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x5a); + GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x6b6b); + + /* Test runstate time set */ + GUEST_SYNC(TEST_RUNSTATE_DATA); + GUEST_ASSERT(rs->state_entry_time >= 0x8000); + GUEST_ASSERT(rs->time[RUNSTATE_runnable] == 0); + GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x6b6b); + GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x5a); + + /* sched_yield() should result in some 'runnable' time */ + GUEST_SYNC(TEST_STEAL_TIME); + GUEST_ASSERT(rs->time[RUNSTATE_runnable] >= MIN_STEAL_TIME); + + /* Attempt to deliver a *masked* interrupt */ + GUEST_SYNC(TEST_EVTCHN_MASKED); + + /* Wait until we see the bit set */ + struct shared_info *si = (void *)SHINFO_VADDR; + while (!si->evtchn_pending[0]) + __asm__ __volatile__ ("rep nop" : : : "memory"); + + /* Now deliver an *unmasked* interrupt */ + GUEST_SYNC(TEST_EVTCHN_UNMASKED); + + guest_wait_for_irq(); + + /* Change memslots and deliver an interrupt */ + GUEST_SYNC(TEST_EVTCHN_SLOWPATH); + + guest_wait_for_irq(); + + /* Deliver event channel with KVM_XEN_HVM_EVTCHN_SEND */ + GUEST_SYNC(TEST_EVTCHN_SEND_IOCTL); + + guest_wait_for_irq(); + + GUEST_SYNC(TEST_EVTCHN_HCALL); + + /* Our turn. Deliver event channel (to ourselves) with + * EVTCHNOP_send hypercall. */ + struct evtchn_send s = { .port = 127 }; + xen_hypercall(__HYPERVISOR_event_channel_op, EVTCHNOP_send, &s); + + guest_wait_for_irq(); + + GUEST_SYNC(TEST_EVTCHN_HCALL_SLOWPATH); + + /* + * Same again, but this time the host has messed with memslots so it + * should take the slow path in kvm_xen_set_evtchn(). + */ + xen_hypercall(__HYPERVISOR_event_channel_op, EVTCHNOP_send, &s); + + guest_wait_for_irq(); + + GUEST_SYNC(TEST_EVTCHN_HCALL_EVENTFD); + + /* Deliver "outbound" event channel to an eventfd which + * happens to be one of our own irqfds. */ + s.port = 197; + xen_hypercall(__HYPERVISOR_event_channel_op, EVTCHNOP_send, &s); + + guest_wait_for_irq(); + + GUEST_SYNC(TEST_TIMER_SETUP); + + /* Set a timer 100ms in the future. */ + xen_hypercall(__HYPERVISOR_set_timer_op, + rs->state_entry_time + 100000000, NULL); + + GUEST_SYNC(TEST_TIMER_WAIT); + + /* Now wait for the timer */ + guest_wait_for_irq(); + + GUEST_SYNC(TEST_TIMER_RESTORE); + + /* The host has 'restored' the timer. Just wait for it. */ + guest_wait_for_irq(); + + GUEST_SYNC(TEST_POLL_READY); + + /* Poll for an event channel port which is already set */ + u32 ports[1] = { EVTCHN_TIMER }; + struct sched_poll p = { + .ports = ports, + .nr_ports = 1, + .timeout = 0, + }; + + xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p); + + GUEST_SYNC(TEST_POLL_TIMEOUT); + + /* Poll for an unset port and wait for the timeout. */ + p.timeout = 100000000; + xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p); + + GUEST_SYNC(TEST_POLL_MASKED); + + /* A timer will wake the masked port we're waiting on, while we poll */ + p.timeout = 0; + xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p); + + GUEST_SYNC(TEST_POLL_WAKE); + + /* Set the vcpu_info to point at exactly the place it already is to + * make sure the attribute is functional. */ + GUEST_SYNC(SET_VCPU_INFO); + + /* A timer wake an *unmasked* port which should wake us with an + * actual interrupt, while we're polling on a different port. */ + ports[0]++; + p.timeout = 0; + xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p); + + guest_wait_for_irq(); + + GUEST_SYNC(TEST_TIMER_PAST); + + /* Timer should have fired already */ + guest_wait_for_irq(); + + GUEST_SYNC(TEST_LOCKING_SEND_RACE); + /* Racing host ioctls */ + + guest_wait_for_irq(); + + GUEST_SYNC(TEST_LOCKING_POLL_RACE); + /* Racing vmcall against host ioctl */ + + ports[0] = 0; + + p = (struct sched_poll) { + .ports = ports, + .nr_ports = 1, + .timeout = 0 + }; + +wait_for_timer: + /* + * Poll for a timer wake event while the worker thread is mucking with + * the shared info. KVM XEN drops timer IRQs if the shared info is + * invalid when the timer expires. Arbitrarily poll 100 times before + * giving up and asking the VMM to re-arm the timer. 100 polls should + * consume enough time to beat on KVM without taking too long if the + * timer IRQ is dropped due to an invalid event channel. + */ + for (i = 0; i < 100 && !guest_saw_irq; i++) + __xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p); + + /* + * Re-send the timer IRQ if it was (likely) dropped due to the timer + * expiring while the event channel was invalid. + */ + if (!guest_saw_irq) { + GUEST_SYNC(TEST_LOCKING_POLL_TIMEOUT); + goto wait_for_timer; + } + guest_saw_irq = false; + + GUEST_SYNC(TEST_DONE); +} + +static struct shared_info *shinfo; +static struct vcpu_info *vinfo; +static struct kvm_vcpu *vcpu; + +static void handle_alrm(int sig) +{ + if (vinfo) + printf("evtchn_upcall_pending 0x%x\n", vinfo->evtchn_upcall_pending); + vcpu_dump(stdout, vcpu, 0); + TEST_FAIL("IRQ delivery timed out"); +} + +static void *juggle_shinfo_state(void *arg) +{ + struct kvm_vm *vm = (struct kvm_vm *)arg; + + struct kvm_xen_hvm_attr cache_activate_gfn = { + .type = KVM_XEN_ATTR_TYPE_SHARED_INFO, + .u.shared_info.gfn = SHINFO_REGION_GPA / PAGE_SIZE + }; + + struct kvm_xen_hvm_attr cache_deactivate_gfn = { + .type = KVM_XEN_ATTR_TYPE_SHARED_INFO, + .u.shared_info.gfn = KVM_XEN_INVALID_GFN + }; + + struct kvm_xen_hvm_attr cache_activate_hva = { + .type = KVM_XEN_ATTR_TYPE_SHARED_INFO_HVA, + .u.shared_info.hva = (unsigned long)shinfo + }; + + struct kvm_xen_hvm_attr cache_deactivate_hva = { + .type = KVM_XEN_ATTR_TYPE_SHARED_INFO, + .u.shared_info.hva = 0 + }; + + int xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM); + + for (;;) { + __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_activate_gfn); + pthread_testcancel(); + __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_deactivate_gfn); + + if (xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA) { + __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_activate_hva); + pthread_testcancel(); + __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_deactivate_hva); + } + } + + return NULL; +} + +int main(int argc, char *argv[]) +{ + struct kvm_xen_hvm_attr evt_reset; + struct kvm_vm *vm; + pthread_t thread; + bool verbose; + int ret; + + verbose = argc > 1 && (!strncmp(argv[1], "-v", 3) || + !strncmp(argv[1], "--verbose", 10)); + + int xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM); + TEST_REQUIRE(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO); + + bool do_runstate_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE); + bool do_runstate_flag = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG); + bool do_eventfd_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL); + bool do_evtchn_tests = do_eventfd_tests && !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND); + bool has_shinfo_hva = !!(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + /* Map a region for the shared_info page */ + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, + SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 3, 0); + virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 3); + + shinfo = addr_gpa2hva(vm, SHINFO_VADDR); + + int zero_fd = open("/dev/zero", O_RDONLY); + TEST_ASSERT(zero_fd != -1, "Failed to open /dev/zero"); + + struct kvm_xen_hvm_config hvmc = { + .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL, + .msr = XEN_HYPERCALL_MSR, + }; + + /* Let the kernel know that we *will* use it for sending all + * event channels, which lets it intercept SCHEDOP_poll */ + if (do_evtchn_tests) + hvmc.flags |= KVM_XEN_HVM_CONFIG_EVTCHN_SEND; + + vm_ioctl(vm, KVM_XEN_HVM_CONFIG, &hvmc); + + struct kvm_xen_hvm_attr lm = { + .type = KVM_XEN_ATTR_TYPE_LONG_MODE, + .u.long_mode = 1, + }; + vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm); + + if (do_runstate_flag) { + struct kvm_xen_hvm_attr ruf = { + .type = KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG, + .u.runstate_update_flag = 1, + }; + vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ruf); + + ruf.u.runstate_update_flag = 0; + vm_ioctl(vm, KVM_XEN_HVM_GET_ATTR, &ruf); + TEST_ASSERT(ruf.u.runstate_update_flag == 1, + "Failed to read back RUNSTATE_UPDATE_FLAG attr"); + } + + struct kvm_xen_hvm_attr ha = {}; + + if (has_shinfo_hva) { + ha.type = KVM_XEN_ATTR_TYPE_SHARED_INFO_HVA; + ha.u.shared_info.hva = (unsigned long)shinfo; + } else { + ha.type = KVM_XEN_ATTR_TYPE_SHARED_INFO; + ha.u.shared_info.gfn = SHINFO_ADDR / PAGE_SIZE; + } + + vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ha); + + /* + * Test what happens when the HVA of the shinfo page is remapped after + * the kernel has a reference to it. But make sure we copy the clock + * info over since that's only set at setup time, and we test it later. + */ + struct pvclock_wall_clock wc_copy = shinfo->wc; + void *m = mmap(shinfo, PAGE_SIZE, PROT_READ|PROT_WRITE, MAP_FIXED|MAP_PRIVATE, zero_fd, 0); + TEST_ASSERT(m == shinfo, "Failed to map /dev/zero over shared info"); + shinfo->wc = wc_copy; + + struct kvm_xen_vcpu_attr vi = { + .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO, + .u.gpa = VCPU_INFO_ADDR, + }; + vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &vi); + + struct kvm_xen_vcpu_attr pvclock = { + .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO, + .u.gpa = PVTIME_ADDR, + }; + vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &pvclock); + + struct kvm_xen_hvm_attr vec = { + .type = KVM_XEN_ATTR_TYPE_UPCALL_VECTOR, + .u.vector = EVTCHN_VECTOR, + }; + vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &vec); + + vm_install_exception_handler(vm, EVTCHN_VECTOR, evtchn_handler); + + if (do_runstate_tests) { + struct kvm_xen_vcpu_attr st = { + .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR, + .u.gpa = RUNSTATE_ADDR, + }; + vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &st); + } + + int irq_fd[2] = { -1, -1 }; + + if (do_eventfd_tests) { + irq_fd[0] = eventfd(0, 0); + irq_fd[1] = eventfd(0, 0); + + /* Unexpected, but not a KVM failure */ + if (irq_fd[0] == -1 || irq_fd[1] == -1) + do_evtchn_tests = do_eventfd_tests = false; + } + + if (do_eventfd_tests) { + irq_routes.info.nr = 2; + + irq_routes.entries[0].gsi = 32; + irq_routes.entries[0].type = KVM_IRQ_ROUTING_XEN_EVTCHN; + irq_routes.entries[0].u.xen_evtchn.port = EVTCHN_TEST1; + irq_routes.entries[0].u.xen_evtchn.vcpu = vcpu->id; + irq_routes.entries[0].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL; + + irq_routes.entries[1].gsi = 33; + irq_routes.entries[1].type = KVM_IRQ_ROUTING_XEN_EVTCHN; + irq_routes.entries[1].u.xen_evtchn.port = EVTCHN_TEST2; + irq_routes.entries[1].u.xen_evtchn.vcpu = vcpu->id; + irq_routes.entries[1].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL; + + vm_ioctl(vm, KVM_SET_GSI_ROUTING, &irq_routes.info); + + struct kvm_irqfd ifd = { }; + + ifd.fd = irq_fd[0]; + ifd.gsi = 32; + vm_ioctl(vm, KVM_IRQFD, &ifd); + + ifd.fd = irq_fd[1]; + ifd.gsi = 33; + vm_ioctl(vm, KVM_IRQFD, &ifd); + + struct sigaction sa = { }; + sa.sa_handler = handle_alrm; + sigaction(SIGALRM, &sa, NULL); + } + + struct kvm_xen_vcpu_attr tmr = { + .type = KVM_XEN_VCPU_ATTR_TYPE_TIMER, + .u.timer.port = EVTCHN_TIMER, + .u.timer.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL, + .u.timer.expires_ns = 0 + }; + + if (do_evtchn_tests) { + struct kvm_xen_hvm_attr inj = { + .type = KVM_XEN_ATTR_TYPE_EVTCHN, + .u.evtchn.send_port = 127, + .u.evtchn.type = EVTCHNSTAT_interdomain, + .u.evtchn.flags = 0, + .u.evtchn.deliver.port.port = EVTCHN_TEST1, + .u.evtchn.deliver.port.vcpu = vcpu->id + 1, + .u.evtchn.deliver.port.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL, + }; + vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj); + + /* Test migration to a different vCPU */ + inj.u.evtchn.flags = KVM_XEN_EVTCHN_UPDATE; + inj.u.evtchn.deliver.port.vcpu = vcpu->id; + vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj); + + inj.u.evtchn.send_port = 197; + inj.u.evtchn.deliver.eventfd.port = 0; + inj.u.evtchn.deliver.eventfd.fd = irq_fd[1]; + inj.u.evtchn.flags = 0; + vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj); + + vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr); + } + vinfo = addr_gpa2hva(vm, VCPU_INFO_VADDR); + vinfo->evtchn_upcall_pending = 0; + + struct vcpu_runstate_info *rs = addr_gpa2hva(vm, RUNSTATE_ADDR); + rs->state = 0x5a; + + bool evtchn_irq_expected = false; + + for (;;) { + struct ucall uc; + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + /* NOT REACHED */ + case UCALL_SYNC: { + struct kvm_xen_vcpu_attr rst; + long rundelay; + + if (do_runstate_tests) + TEST_ASSERT(rs->state_entry_time == rs->time[0] + + rs->time[1] + rs->time[2] + rs->time[3], + "runstate times don't add up"); + + switch (uc.args[1]) { + case TEST_INJECT_VECTOR: + if (verbose) + printf("Delivering evtchn upcall\n"); + evtchn_irq_expected = true; + vinfo->evtchn_upcall_pending = 1; + break; + + case TEST_RUNSTATE_runnable...TEST_RUNSTATE_offline: + TEST_ASSERT(!evtchn_irq_expected, "Event channel IRQ not seen"); + if (!do_runstate_tests) + goto done; + if (verbose) + printf("Testing runstate %s\n", runstate_names[uc.args[1]]); + rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT; + rst.u.runstate.state = uc.args[1] + RUNSTATE_runnable - + TEST_RUNSTATE_runnable; + vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst); + break; + + case TEST_RUNSTATE_ADJUST: + if (verbose) + printf("Testing RUNSTATE_ADJUST\n"); + rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST; + memset(&rst.u, 0, sizeof(rst.u)); + rst.u.runstate.state = (uint64_t)-1; + rst.u.runstate.time_blocked = + 0x5a - rs->time[RUNSTATE_blocked]; + rst.u.runstate.time_offline = + 0x6b6b - rs->time[RUNSTATE_offline]; + rst.u.runstate.time_runnable = -rst.u.runstate.time_blocked - + rst.u.runstate.time_offline; + vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst); + break; + + case TEST_RUNSTATE_DATA: + if (verbose) + printf("Testing RUNSTATE_DATA\n"); + rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA; + memset(&rst.u, 0, sizeof(rst.u)); + rst.u.runstate.state = RUNSTATE_running; + rst.u.runstate.state_entry_time = 0x6b6b + 0x5a; + rst.u.runstate.time_blocked = 0x6b6b; + rst.u.runstate.time_offline = 0x5a; + vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst); + break; + + case TEST_STEAL_TIME: + if (verbose) + printf("Testing steal time\n"); + /* Yield until scheduler delay exceeds target */ + rundelay = get_run_delay() + MIN_STEAL_TIME; + do { + sched_yield(); + } while (get_run_delay() < rundelay); + break; + + case TEST_EVTCHN_MASKED: + if (!do_eventfd_tests) + goto done; + if (verbose) + printf("Testing masked event channel\n"); + shinfo->evtchn_mask[0] = 1UL << EVTCHN_TEST1; + eventfd_write(irq_fd[0], 1UL); + alarm(1); + break; + + case TEST_EVTCHN_UNMASKED: + if (verbose) + printf("Testing unmasked event channel\n"); + /* Unmask that, but deliver the other one */ + shinfo->evtchn_pending[0] = 0; + shinfo->evtchn_mask[0] = 0; + eventfd_write(irq_fd[1], 1UL); + evtchn_irq_expected = true; + alarm(1); + break; + + case TEST_EVTCHN_SLOWPATH: + TEST_ASSERT(!evtchn_irq_expected, + "Expected event channel IRQ but it didn't happen"); + shinfo->evtchn_pending[1] = 0; + if (verbose) + printf("Testing event channel after memslot change\n"); + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, + DUMMY_REGION_GPA, DUMMY_REGION_SLOT, 1, 0); + eventfd_write(irq_fd[0], 1UL); + evtchn_irq_expected = true; + alarm(1); + break; + + case TEST_EVTCHN_SEND_IOCTL: + TEST_ASSERT(!evtchn_irq_expected, + "Expected event channel IRQ but it didn't happen"); + if (!do_evtchn_tests) + goto done; + + shinfo->evtchn_pending[0] = 0; + if (verbose) + printf("Testing injection with KVM_XEN_HVM_EVTCHN_SEND\n"); + + struct kvm_irq_routing_xen_evtchn e; + e.port = EVTCHN_TEST2; + e.vcpu = vcpu->id; + e.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL; + + vm_ioctl(vm, KVM_XEN_HVM_EVTCHN_SEND, &e); + evtchn_irq_expected = true; + alarm(1); + break; + + case TEST_EVTCHN_HCALL: + TEST_ASSERT(!evtchn_irq_expected, + "Expected event channel IRQ but it didn't happen"); + shinfo->evtchn_pending[1] = 0; + + if (verbose) + printf("Testing guest EVTCHNOP_send direct to evtchn\n"); + evtchn_irq_expected = true; + alarm(1); + break; + + case TEST_EVTCHN_HCALL_SLOWPATH: + TEST_ASSERT(!evtchn_irq_expected, + "Expected event channel IRQ but it didn't happen"); + shinfo->evtchn_pending[0] = 0; + + if (verbose) + printf("Testing guest EVTCHNOP_send direct to evtchn after memslot change\n"); + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, + DUMMY_REGION_GPA_2, DUMMY_REGION_SLOT_2, 1, 0); + evtchn_irq_expected = true; + alarm(1); + break; + + case TEST_EVTCHN_HCALL_EVENTFD: + TEST_ASSERT(!evtchn_irq_expected, + "Expected event channel IRQ but it didn't happen"); + shinfo->evtchn_pending[0] = 0; + + if (verbose) + printf("Testing guest EVTCHNOP_send to eventfd\n"); + evtchn_irq_expected = true; + alarm(1); + break; + + case TEST_TIMER_SETUP: + TEST_ASSERT(!evtchn_irq_expected, + "Expected event channel IRQ but it didn't happen"); + shinfo->evtchn_pending[1] = 0; + + if (verbose) + printf("Testing guest oneshot timer\n"); + break; + + case TEST_TIMER_WAIT: + memset(&tmr, 0, sizeof(tmr)); + tmr.type = KVM_XEN_VCPU_ATTR_TYPE_TIMER; + vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr); + TEST_ASSERT(tmr.u.timer.port == EVTCHN_TIMER, + "Timer port not returned"); + TEST_ASSERT(tmr.u.timer.priority == KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL, + "Timer priority not returned"); + TEST_ASSERT(tmr.u.timer.expires_ns > rs->state_entry_time, + "Timer expiry not returned"); + evtchn_irq_expected = true; + alarm(1); + break; + + case TEST_TIMER_RESTORE: + TEST_ASSERT(!evtchn_irq_expected, + "Expected event channel IRQ but it didn't happen"); + shinfo->evtchn_pending[0] = 0; + + if (verbose) + printf("Testing restored oneshot timer\n"); + + tmr.u.timer.expires_ns = rs->state_entry_time + 100000000; + vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr); + evtchn_irq_expected = true; + alarm(1); + break; + + case TEST_POLL_READY: + TEST_ASSERT(!evtchn_irq_expected, + "Expected event channel IRQ but it didn't happen"); + + if (verbose) + printf("Testing SCHEDOP_poll with already pending event\n"); + shinfo->evtchn_pending[0] = shinfo->evtchn_mask[0] = 1UL << EVTCHN_TIMER; + alarm(1); + break; + + case TEST_POLL_TIMEOUT: + if (verbose) + printf("Testing SCHEDOP_poll timeout\n"); + shinfo->evtchn_pending[0] = 0; + alarm(1); + break; + + case TEST_POLL_MASKED: + if (verbose) + printf("Testing SCHEDOP_poll wake on masked event\n"); + + tmr.u.timer.expires_ns = rs->state_entry_time + 100000000; + vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr); + alarm(1); + break; + + case TEST_POLL_WAKE: + shinfo->evtchn_pending[0] = shinfo->evtchn_mask[0] = 0; + if (verbose) + printf("Testing SCHEDOP_poll wake on unmasked event\n"); + + evtchn_irq_expected = true; + tmr.u.timer.expires_ns = rs->state_entry_time + 100000000; + vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr); + + /* Read it back and check the pending time is reported correctly */ + tmr.u.timer.expires_ns = 0; + vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr); + TEST_ASSERT(tmr.u.timer.expires_ns == rs->state_entry_time + 100000000, + "Timer not reported pending"); + alarm(1); + break; + + case SET_VCPU_INFO: + if (has_shinfo_hva) { + struct kvm_xen_vcpu_attr vih = { + .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO_HVA, + .u.hva = (unsigned long)vinfo + }; + vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &vih); + } + break; + + case TEST_TIMER_PAST: + TEST_ASSERT(!evtchn_irq_expected, + "Expected event channel IRQ but it didn't happen"); + /* Read timer and check it is no longer pending */ + vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr); + TEST_ASSERT(!tmr.u.timer.expires_ns, "Timer still reported pending"); + + shinfo->evtchn_pending[0] = 0; + if (verbose) + printf("Testing timer in the past\n"); + + evtchn_irq_expected = true; + tmr.u.timer.expires_ns = rs->state_entry_time - 100000000ULL; + vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr); + alarm(1); + break; + + case TEST_LOCKING_SEND_RACE: + TEST_ASSERT(!evtchn_irq_expected, + "Expected event channel IRQ but it didn't happen"); + alarm(0); + + if (verbose) + printf("Testing shinfo lock corruption (KVM_XEN_HVM_EVTCHN_SEND)\n"); + + ret = pthread_create(&thread, NULL, &juggle_shinfo_state, (void *)vm); + TEST_ASSERT(ret == 0, "pthread_create() failed: %s", strerror(ret)); + + struct kvm_irq_routing_xen_evtchn uxe = { + .port = 1, + .vcpu = vcpu->id, + .priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL + }; + + evtchn_irq_expected = true; + for (time_t t = time(NULL) + SHINFO_RACE_TIMEOUT; time(NULL) < t;) + __vm_ioctl(vm, KVM_XEN_HVM_EVTCHN_SEND, &uxe); + break; + + case TEST_LOCKING_POLL_RACE: + TEST_ASSERT(!evtchn_irq_expected, + "Expected event channel IRQ but it didn't happen"); + + if (verbose) + printf("Testing shinfo lock corruption (SCHEDOP_poll)\n"); + + shinfo->evtchn_pending[0] = 1; + + evtchn_irq_expected = true; + tmr.u.timer.expires_ns = rs->state_entry_time + + SHINFO_RACE_TIMEOUT * 1000000000ULL; + vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr); + break; + + case TEST_LOCKING_POLL_TIMEOUT: + /* + * Optional and possibly repeated sync point. + * Injecting the timer IRQ may fail if the + * shinfo is invalid when the timer expires. + * If the timer has expired but the IRQ hasn't + * been delivered, rearm the timer and retry. + */ + vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr); + + /* Resume the guest if the timer is still pending. */ + if (tmr.u.timer.expires_ns) + break; + + /* All done if the IRQ was delivered. */ + if (!evtchn_irq_expected) + break; + + tmr.u.timer.expires_ns = rs->state_entry_time + + SHINFO_RACE_TIMEOUT * 1000000000ULL; + vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr); + break; + case TEST_DONE: + TEST_ASSERT(!evtchn_irq_expected, + "Expected event channel IRQ but it didn't happen"); + + ret = pthread_cancel(thread); + TEST_ASSERT(ret == 0, "pthread_cancel() failed: %s", strerror(ret)); + + ret = pthread_join(thread, 0); + TEST_ASSERT(ret == 0, "pthread_join() failed: %s", strerror(ret)); + goto done; + + case TEST_GUEST_SAW_IRQ: + TEST_ASSERT(evtchn_irq_expected, "Unexpected event channel IRQ"); + evtchn_irq_expected = false; + break; + } + break; + } + case UCALL_DONE: + goto done; + default: + TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd); + } + } + + done: + evt_reset.type = KVM_XEN_ATTR_TYPE_EVTCHN; + evt_reset.u.evtchn.flags = KVM_XEN_EVTCHN_RESET; + vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &evt_reset); + + alarm(0); + + /* + * Just a *really* basic check that things are being put in the + * right place. The actual calculations are much the same for + * Xen as they are for the KVM variants, so no need to check. + */ + struct pvclock_wall_clock *wc; + struct pvclock_vcpu_time_info *ti, *ti2; + struct kvm_clock_data kcdata; + long long delta; + + wc = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0xc00); + ti = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0x40 + 0x20); + ti2 = addr_gpa2hva(vm, PVTIME_ADDR); + + if (verbose) { + printf("Wall clock (v %d) %d.%09d\n", wc->version, wc->sec, wc->nsec); + printf("Time info 1: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n", + ti->version, ti->tsc_timestamp, ti->system_time, ti->tsc_to_system_mul, + ti->tsc_shift, ti->flags); + printf("Time info 2: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n", + ti2->version, ti2->tsc_timestamp, ti2->system_time, ti2->tsc_to_system_mul, + ti2->tsc_shift, ti2->flags); + } + + TEST_ASSERT(wc->version && !(wc->version & 1), + "Bad wallclock version %x", wc->version); + + vm_ioctl(vm, KVM_GET_CLOCK, &kcdata); + + if (kcdata.flags & KVM_CLOCK_REALTIME) { + if (verbose) { + printf("KVM_GET_CLOCK clock: %lld.%09lld\n", + kcdata.clock / NSEC_PER_SEC, kcdata.clock % NSEC_PER_SEC); + printf("KVM_GET_CLOCK realtime: %lld.%09lld\n", + kcdata.realtime / NSEC_PER_SEC, kcdata.realtime % NSEC_PER_SEC); + } + + delta = (wc->sec * NSEC_PER_SEC + wc->nsec) - (kcdata.realtime - kcdata.clock); + + /* + * KVM_GET_CLOCK gives CLOCK_REALTIME which jumps on leap seconds updates but + * unfortunately KVM doesn't currently offer a CLOCK_TAI alternative. Accept 1s + * delta as testing clock accuracy is not the goal here. The test just needs to + * check that the value in shinfo is somewhat sane. + */ + TEST_ASSERT(llabs(delta) < NSEC_PER_SEC, + "Guest's epoch from shinfo %d.%09d differs from KVM_GET_CLOCK %lld.%lld", + wc->sec, wc->nsec, (kcdata.realtime - kcdata.clock) / NSEC_PER_SEC, + (kcdata.realtime - kcdata.clock) % NSEC_PER_SEC); + } else { + pr_info("Missing KVM_CLOCK_REALTIME, skipping shinfo epoch sanity check\n"); + } + + TEST_ASSERT(ti->version && !(ti->version & 1), + "Bad time_info version %x", ti->version); + TEST_ASSERT(ti2->version && !(ti2->version & 1), + "Bad time_info version %x", ti->version); + + if (do_runstate_tests) { + /* + * Fetch runstate and check sanity. Strictly speaking in the + * general case we might not expect the numbers to be identical + * but in this case we know we aren't running the vCPU any more. + */ + struct kvm_xen_vcpu_attr rst = { + .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA, + }; + vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &rst); + + if (verbose) { + printf("Runstate: %s(%d), entry %" PRIu64 " ns\n", + rs->state <= RUNSTATE_offline ? runstate_names[rs->state] : "unknown", + rs->state, rs->state_entry_time); + for (int i = RUNSTATE_running; i <= RUNSTATE_offline; i++) { + printf("State %s: %" PRIu64 " ns\n", + runstate_names[i], rs->time[i]); + } + } + + /* + * Exercise runstate info at all points across the page boundary, in + * 32-bit and 64-bit mode. In particular, test the case where it is + * configured in 32-bit mode and then switched to 64-bit mode while + * active, which takes it onto the second page. + */ + unsigned long runstate_addr; + struct compat_vcpu_runstate_info *crs; + for (runstate_addr = SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE - sizeof(*rs) - 4; + runstate_addr < SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE + 4; runstate_addr++) { + + rs = addr_gpa2hva(vm, runstate_addr); + crs = (void *)rs; + + memset(rs, 0xa5, sizeof(*rs)); + + /* Set to compatibility mode */ + lm.u.long_mode = 0; + vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm); + + /* Set runstate to new address (kernel will write it) */ + struct kvm_xen_vcpu_attr st = { + .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR, + .u.gpa = runstate_addr, + }; + vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &st); + + if (verbose) + printf("Compatibility runstate at %08lx\n", runstate_addr); + + TEST_ASSERT(crs->state == rst.u.runstate.state, "Runstate mismatch"); + TEST_ASSERT(crs->state_entry_time == rst.u.runstate.state_entry_time, + "State entry time mismatch"); + TEST_ASSERT(crs->time[RUNSTATE_running] == rst.u.runstate.time_running, + "Running time mismatch"); + TEST_ASSERT(crs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable, + "Runnable time mismatch"); + TEST_ASSERT(crs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked, + "Blocked time mismatch"); + TEST_ASSERT(crs->time[RUNSTATE_offline] == rst.u.runstate.time_offline, + "Offline time mismatch"); + TEST_ASSERT(crs->time[RUNSTATE_offline + 1] == 0xa5a5a5a5a5a5a5a5ULL, + "Structure overrun"); + TEST_ASSERT(crs->state_entry_time == crs->time[0] + + crs->time[1] + crs->time[2] + crs->time[3], + "runstate times don't add up"); + + + /* Now switch to 64-bit mode */ + lm.u.long_mode = 1; + vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm); + + memset(rs, 0xa5, sizeof(*rs)); + + /* Don't change the address, just trigger a write */ + struct kvm_xen_vcpu_attr adj = { + .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST, + .u.runstate.state = (uint64_t)-1 + }; + vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &adj); + + if (verbose) + printf("64-bit runstate at %08lx\n", runstate_addr); + + TEST_ASSERT(rs->state == rst.u.runstate.state, "Runstate mismatch"); + TEST_ASSERT(rs->state_entry_time == rst.u.runstate.state_entry_time, + "State entry time mismatch"); + TEST_ASSERT(rs->time[RUNSTATE_running] == rst.u.runstate.time_running, + "Running time mismatch"); + TEST_ASSERT(rs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable, + "Runnable time mismatch"); + TEST_ASSERT(rs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked, + "Blocked time mismatch"); + TEST_ASSERT(rs->time[RUNSTATE_offline] == rst.u.runstate.time_offline, + "Offline time mismatch"); + TEST_ASSERT(rs->time[RUNSTATE_offline + 1] == 0xa5a5a5a5a5a5a5a5ULL, + "Structure overrun"); + + TEST_ASSERT(rs->state_entry_time == rs->time[0] + + rs->time[1] + rs->time[2] + rs->time[3], + "runstate times don't add up"); + } + } + + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/x86/xen_vmcall_test.c b/tools/testing/selftests/kvm/x86/xen_vmcall_test.c new file mode 100644 index 000000000000..2585087cdf5c --- /dev/null +++ b/tools/testing/selftests/kvm/x86/xen_vmcall_test.c @@ -0,0 +1,143 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * xen_vmcall_test + * + * Copyright © 2020 Amazon.com, Inc. or its affiliates. + * + * Userspace hypercall testing + */ + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "hyperv.h" + +#define HCALL_REGION_GPA 0xc0000000ULL +#define HCALL_REGION_SLOT 10 + +#define INPUTVALUE 17 +#define ARGVALUE(x) (0xdeadbeef5a5a0000UL + x) +#define RETVALUE 0xcafef00dfbfbffffUL + +#define XEN_HYPERCALL_MSR 0x40000200 +#define HV_GUEST_OS_ID_MSR 0x40000000 +#define HV_HYPERCALL_MSR 0x40000001 + +#define HVCALL_SIGNAL_EVENT 0x005d +#define HV_STATUS_INVALID_ALIGNMENT 4 + +static void guest_code(void) +{ + unsigned long rax = INPUTVALUE; + unsigned long rdi = ARGVALUE(1); + unsigned long rsi = ARGVALUE(2); + unsigned long rdx = ARGVALUE(3); + unsigned long rcx; + register unsigned long r10 __asm__("r10") = ARGVALUE(4); + register unsigned long r8 __asm__("r8") = ARGVALUE(5); + register unsigned long r9 __asm__("r9") = ARGVALUE(6); + + /* First a direct invocation of 'vmcall' */ + __asm__ __volatile__("vmcall" : + "=a"(rax) : + "a"(rax), "D"(rdi), "S"(rsi), "d"(rdx), + "r"(r10), "r"(r8), "r"(r9)); + GUEST_ASSERT(rax == RETVALUE); + + /* Fill in the Xen hypercall page */ + __asm__ __volatile__("wrmsr" : : "c" (XEN_HYPERCALL_MSR), + "a" (HCALL_REGION_GPA & 0xffffffff), + "d" (HCALL_REGION_GPA >> 32)); + + /* Set Hyper-V Guest OS ID */ + __asm__ __volatile__("wrmsr" : : "c" (HV_GUEST_OS_ID_MSR), + "a" (0x5a), "d" (0)); + + /* Hyper-V hypercall page */ + u64 msrval = HCALL_REGION_GPA + PAGE_SIZE + 1; + __asm__ __volatile__("wrmsr" : : "c" (HV_HYPERCALL_MSR), + "a" (msrval & 0xffffffff), + "d" (msrval >> 32)); + + /* Invoke a Xen hypercall */ + __asm__ __volatile__("call *%1" : "=a"(rax) : + "r"(HCALL_REGION_GPA + INPUTVALUE * 32), + "a"(rax), "D"(rdi), "S"(rsi), "d"(rdx), + "r"(r10), "r"(r8), "r"(r9)); + GUEST_ASSERT(rax == RETVALUE); + + /* Invoke a Hyper-V hypercall */ + rax = 0; + rcx = HVCALL_SIGNAL_EVENT; /* code */ + rdx = 0x5a5a5a5a; /* ingpa (badly aligned) */ + __asm__ __volatile__("call *%1" : "=a"(rax) : + "r"(HCALL_REGION_GPA + PAGE_SIZE), + "a"(rax), "c"(rcx), "d"(rdx), + "r"(r8)); + GUEST_ASSERT(rax == HV_STATUS_INVALID_ALIGNMENT); + + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + unsigned int xen_caps; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM); + TEST_REQUIRE(xen_caps & KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + vcpu_set_hv_cpuid(vcpu); + + struct kvm_xen_hvm_config hvmc = { + .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL, + .msr = XEN_HYPERCALL_MSR, + }; + vm_ioctl(vm, KVM_XEN_HVM_CONFIG, &hvmc); + + /* Map a region for the hypercall pages */ + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, + HCALL_REGION_GPA, HCALL_REGION_SLOT, 2, 0); + virt_map(vm, HCALL_REGION_GPA, HCALL_REGION_GPA, 2); + + for (;;) { + volatile struct kvm_run *run = vcpu->run; + struct ucall uc; + + vcpu_run(vcpu); + + if (run->exit_reason == KVM_EXIT_XEN) { + TEST_ASSERT_EQ(run->xen.type, KVM_EXIT_XEN_HCALL); + TEST_ASSERT_EQ(run->xen.u.hcall.cpl, 0); + TEST_ASSERT_EQ(run->xen.u.hcall.longmode, 1); + TEST_ASSERT_EQ(run->xen.u.hcall.input, INPUTVALUE); + TEST_ASSERT_EQ(run->xen.u.hcall.params[0], ARGVALUE(1)); + TEST_ASSERT_EQ(run->xen.u.hcall.params[1], ARGVALUE(2)); + TEST_ASSERT_EQ(run->xen.u.hcall.params[2], ARGVALUE(3)); + TEST_ASSERT_EQ(run->xen.u.hcall.params[3], ARGVALUE(4)); + TEST_ASSERT_EQ(run->xen.u.hcall.params[4], ARGVALUE(5)); + TEST_ASSERT_EQ(run->xen.u.hcall.params[5], ARGVALUE(6)); + run->xen.u.hcall.result = RETVALUE; + continue; + } + + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + /* NOT REACHED */ + case UCALL_SYNC: + break; + case UCALL_DONE: + goto done; + default: + TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd); + } + } +done: + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/x86/xss_msr_test.c b/tools/testing/selftests/kvm/x86/xss_msr_test.c new file mode 100644 index 000000000000..f331a4e9bae3 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/xss_msr_test.c @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019, Google LLC. + * + * Tests for the IA32_XSS MSR. + */ +#include + +#include "test_util.h" +#include "kvm_util.h" +#include "vmx.h" + +#define MSR_BITS 64 + +int main(int argc, char *argv[]) +{ + bool xss_in_msr_list; + struct kvm_vm *vm; + struct kvm_vcpu *vcpu; + uint64_t xss_val; + int i, r; + + /* Create VM */ + vm = vm_create_with_one_vcpu(&vcpu, NULL); + + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVES)); + + xss_val = vcpu_get_msr(vcpu, MSR_IA32_XSS); + TEST_ASSERT(xss_val == 0, + "MSR_IA32_XSS should be initialized to zero"); + + vcpu_set_msr(vcpu, MSR_IA32_XSS, xss_val); + + /* + * At present, KVM only supports a guest IA32_XSS value of 0. Verify + * that trying to set the guest IA32_XSS to an unsupported value fails. + * Also, in the future when a non-zero value succeeds check that + * IA32_XSS is in the list of MSRs to save/restore. + */ + xss_in_msr_list = kvm_msr_is_in_save_restore_list(MSR_IA32_XSS); + for (i = 0; i < MSR_BITS; ++i) { + r = _vcpu_set_msr(vcpu, MSR_IA32_XSS, 1ull << i); + + /* + * Setting a list of MSRs returns the entry that "faulted", or + * the last entry +1 if all MSRs were successfully written. + */ + TEST_ASSERT(!r || r == 1, KVM_IOCTL_ERROR(KVM_SET_MSRS, r)); + TEST_ASSERT(r != 1 || xss_in_msr_list, + "IA32_XSS was able to be set, but was not in save/restore list"); + } + + kvm_vm_free(vm); +} diff --git a/tools/testing/selftests/kvm/x86_64/amx_test.c b/tools/testing/selftests/kvm/x86_64/amx_test.c deleted file mode 100644 index f4ce5a185a7d..000000000000 --- a/tools/testing/selftests/kvm/x86_64/amx_test.c +++ /dev/null @@ -1,315 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * amx tests - * - * Copyright (C) 2021, Intel, Inc. - * - * Tests for amx #NM exception and save/restore. - */ -#include -#include -#include -#include -#include -#include - -#include "test_util.h" - -#include "kvm_util.h" -#include "processor.h" -#include "vmx.h" - -#ifndef __x86_64__ -# error This test is 64-bit only -#endif - -#define NUM_TILES 8 -#define TILE_SIZE 1024 -#define XSAVE_SIZE ((NUM_TILES * TILE_SIZE) + PAGE_SIZE) - -/* Tile configuration associated: */ -#define PALETTE_TABLE_INDEX 1 -#define MAX_TILES 16 -#define RESERVED_BYTES 14 - -#define XSAVE_HDR_OFFSET 512 - -struct tile_config { - u8 palette_id; - u8 start_row; - u8 reserved[RESERVED_BYTES]; - u16 colsb[MAX_TILES]; - u8 rows[MAX_TILES]; -}; - -struct tile_data { - u8 data[NUM_TILES * TILE_SIZE]; -}; - -struct xtile_info { - u16 bytes_per_tile; - u16 bytes_per_row; - u16 max_names; - u16 max_rows; - u32 xsave_offset; - u32 xsave_size; -}; - -static struct xtile_info xtile; - -static inline void __ldtilecfg(void *cfg) -{ - asm volatile(".byte 0xc4,0xe2,0x78,0x49,0x00" - : : "a"(cfg)); -} - -static inline void __tileloadd(void *tile) -{ - asm volatile(".byte 0xc4,0xe2,0x7b,0x4b,0x04,0x10" - : : "a"(tile), "d"(0)); -} - -static inline void __tilerelease(void) -{ - asm volatile(".byte 0xc4, 0xe2, 0x78, 0x49, 0xc0" ::); -} - -static inline void __xsavec(struct xstate *xstate, uint64_t rfbm) -{ - uint32_t rfbm_lo = rfbm; - uint32_t rfbm_hi = rfbm >> 32; - - asm volatile("xsavec (%%rdi)" - : : "D" (xstate), "a" (rfbm_lo), "d" (rfbm_hi) - : "memory"); -} - -static void check_xtile_info(void) -{ - GUEST_ASSERT((xgetbv(0) & XFEATURE_MASK_XTILE) == XFEATURE_MASK_XTILE); - - GUEST_ASSERT(this_cpu_has_p(X86_PROPERTY_XSTATE_MAX_SIZE_XCR0)); - GUEST_ASSERT(this_cpu_property(X86_PROPERTY_XSTATE_MAX_SIZE_XCR0) <= XSAVE_SIZE); - - xtile.xsave_offset = this_cpu_property(X86_PROPERTY_XSTATE_TILE_OFFSET); - GUEST_ASSERT(xtile.xsave_offset == 2816); - xtile.xsave_size = this_cpu_property(X86_PROPERTY_XSTATE_TILE_SIZE); - GUEST_ASSERT(xtile.xsave_size == 8192); - GUEST_ASSERT(sizeof(struct tile_data) >= xtile.xsave_size); - - GUEST_ASSERT(this_cpu_has_p(X86_PROPERTY_AMX_MAX_PALETTE_TABLES)); - GUEST_ASSERT(this_cpu_property(X86_PROPERTY_AMX_MAX_PALETTE_TABLES) >= - PALETTE_TABLE_INDEX); - - GUEST_ASSERT(this_cpu_has_p(X86_PROPERTY_AMX_NR_TILE_REGS)); - xtile.max_names = this_cpu_property(X86_PROPERTY_AMX_NR_TILE_REGS); - GUEST_ASSERT(xtile.max_names == 8); - xtile.bytes_per_tile = this_cpu_property(X86_PROPERTY_AMX_BYTES_PER_TILE); - GUEST_ASSERT(xtile.bytes_per_tile == 1024); - xtile.bytes_per_row = this_cpu_property(X86_PROPERTY_AMX_BYTES_PER_ROW); - GUEST_ASSERT(xtile.bytes_per_row == 64); - xtile.max_rows = this_cpu_property(X86_PROPERTY_AMX_MAX_ROWS); - GUEST_ASSERT(xtile.max_rows == 16); -} - -static void set_tilecfg(struct tile_config *cfg) -{ - int i; - - /* Only palette id 1 */ - cfg->palette_id = 1; - for (i = 0; i < xtile.max_names; i++) { - cfg->colsb[i] = xtile.bytes_per_row; - cfg->rows[i] = xtile.max_rows; - } -} - -static void __attribute__((__flatten__)) guest_code(struct tile_config *amx_cfg, - struct tile_data *tiledata, - struct xstate *xstate) -{ - GUEST_ASSERT(this_cpu_has(X86_FEATURE_XSAVE) && - this_cpu_has(X86_FEATURE_OSXSAVE)); - check_xtile_info(); - GUEST_SYNC(1); - - /* xfd=0, enable amx */ - wrmsr(MSR_IA32_XFD, 0); - GUEST_SYNC(2); - GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == 0); - set_tilecfg(amx_cfg); - __ldtilecfg(amx_cfg); - GUEST_SYNC(3); - /* Check save/restore when trap to userspace */ - __tileloadd(tiledata); - GUEST_SYNC(4); - __tilerelease(); - GUEST_SYNC(5); - /* - * After XSAVEC, XTILEDATA is cleared in the xstate_bv but is set in - * the xcomp_bv. - */ - xstate->header.xstate_bv = XFEATURE_MASK_XTILE_DATA; - __xsavec(xstate, XFEATURE_MASK_XTILE_DATA); - GUEST_ASSERT(!(xstate->header.xstate_bv & XFEATURE_MASK_XTILE_DATA)); - GUEST_ASSERT(xstate->header.xcomp_bv & XFEATURE_MASK_XTILE_DATA); - - /* xfd=0x40000, disable amx tiledata */ - wrmsr(MSR_IA32_XFD, XFEATURE_MASK_XTILE_DATA); - - /* - * XTILEDATA is cleared in xstate_bv but set in xcomp_bv, this property - * remains the same even when amx tiledata is disabled by IA32_XFD. - */ - xstate->header.xstate_bv = XFEATURE_MASK_XTILE_DATA; - __xsavec(xstate, XFEATURE_MASK_XTILE_DATA); - GUEST_ASSERT(!(xstate->header.xstate_bv & XFEATURE_MASK_XTILE_DATA)); - GUEST_ASSERT((xstate->header.xcomp_bv & XFEATURE_MASK_XTILE_DATA)); - - GUEST_SYNC(6); - GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == XFEATURE_MASK_XTILE_DATA); - set_tilecfg(amx_cfg); - __ldtilecfg(amx_cfg); - /* Trigger #NM exception */ - __tileloadd(tiledata); - GUEST_SYNC(10); - - GUEST_DONE(); -} - -void guest_nm_handler(struct ex_regs *regs) -{ - /* Check if #NM is triggered by XFEATURE_MASK_XTILE_DATA */ - GUEST_SYNC(7); - GUEST_ASSERT(!(get_cr0() & X86_CR0_TS)); - GUEST_ASSERT(rdmsr(MSR_IA32_XFD_ERR) == XFEATURE_MASK_XTILE_DATA); - GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == XFEATURE_MASK_XTILE_DATA); - GUEST_SYNC(8); - GUEST_ASSERT(rdmsr(MSR_IA32_XFD_ERR) == XFEATURE_MASK_XTILE_DATA); - GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == XFEATURE_MASK_XTILE_DATA); - /* Clear xfd_err */ - wrmsr(MSR_IA32_XFD_ERR, 0); - /* xfd=0, enable amx */ - wrmsr(MSR_IA32_XFD, 0); - GUEST_SYNC(9); -} - -int main(int argc, char *argv[]) -{ - struct kvm_regs regs1, regs2; - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - struct kvm_x86_state *state; - int xsave_restore_size; - vm_vaddr_t amx_cfg, tiledata, xstate; - struct ucall uc; - u32 amx_offset; - int ret; - - /* - * Note, all off-by-default features must be enabled before anything - * caches KVM_GET_SUPPORTED_CPUID, e.g. before using kvm_cpu_has(). - */ - vm_xsave_require_permission(XFEATURE_MASK_XTILE_DATA); - - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XFD)); - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVE)); - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_AMX_TILE)); - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XTILECFG)); - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XTILEDATA)); - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XTILEDATA_XFD)); - - /* Create VM */ - vm = vm_create_with_one_vcpu(&vcpu, guest_code); - - TEST_ASSERT(kvm_cpu_has_p(X86_PROPERTY_XSTATE_MAX_SIZE), - "KVM should enumerate max XSAVE size when XSAVE is supported"); - xsave_restore_size = kvm_cpu_property(X86_PROPERTY_XSTATE_MAX_SIZE); - - vcpu_regs_get(vcpu, ®s1); - - /* Register #NM handler */ - vm_install_exception_handler(vm, NM_VECTOR, guest_nm_handler); - - /* amx cfg for guest_code */ - amx_cfg = vm_vaddr_alloc_page(vm); - memset(addr_gva2hva(vm, amx_cfg), 0x0, getpagesize()); - - /* amx tiledata for guest_code */ - tiledata = vm_vaddr_alloc_pages(vm, 2); - memset(addr_gva2hva(vm, tiledata), rand() | 1, 2 * getpagesize()); - - /* XSAVE state for guest_code */ - xstate = vm_vaddr_alloc_pages(vm, DIV_ROUND_UP(XSAVE_SIZE, PAGE_SIZE)); - memset(addr_gva2hva(vm, xstate), 0, PAGE_SIZE * DIV_ROUND_UP(XSAVE_SIZE, PAGE_SIZE)); - vcpu_args_set(vcpu, 3, amx_cfg, tiledata, xstate); - - for (;;) { - vcpu_run(vcpu); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); - - switch (get_ucall(vcpu, &uc)) { - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - /* NOT REACHED */ - case UCALL_SYNC: - switch (uc.args[1]) { - case 1: - case 2: - case 3: - case 5: - case 6: - case 7: - case 8: - fprintf(stderr, "GUEST_SYNC(%ld)\n", uc.args[1]); - break; - case 4: - case 10: - fprintf(stderr, - "GUEST_SYNC(%ld), check save/restore status\n", uc.args[1]); - - /* Compacted mode, get amx offset by xsave area - * size subtract 8K amx size. - */ - amx_offset = xsave_restore_size - NUM_TILES*TILE_SIZE; - state = vcpu_save_state(vcpu); - void *amx_start = (void *)state->xsave + amx_offset; - void *tiles_data = (void *)addr_gva2hva(vm, tiledata); - /* Only check TMM0 register, 1 tile */ - ret = memcmp(amx_start, tiles_data, TILE_SIZE); - TEST_ASSERT(ret == 0, "memcmp failed, ret=%d", ret); - kvm_x86_state_cleanup(state); - break; - case 9: - fprintf(stderr, - "GUEST_SYNC(%ld), #NM exception and enable amx\n", uc.args[1]); - break; - } - break; - case UCALL_DONE: - fprintf(stderr, "UCALL_DONE\n"); - goto done; - default: - TEST_FAIL("Unknown ucall %lu", uc.cmd); - } - - state = vcpu_save_state(vcpu); - memset(®s1, 0, sizeof(regs1)); - vcpu_regs_get(vcpu, ®s1); - - kvm_vm_release(vm); - - /* Restore state in a new VM. */ - vcpu = vm_recreate_with_one_vcpu(vm); - vcpu_load_state(vcpu, state); - kvm_x86_state_cleanup(state); - - memset(®s2, 0, sizeof(regs2)); - vcpu_regs_get(vcpu, ®s2); - TEST_ASSERT(!memcmp(®s1, ®s2, sizeof(regs2)), - "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx", - (ulong) regs2.rdi, (ulong) regs2.rsi); - } -done: - kvm_vm_free(vm); -} diff --git a/tools/testing/selftests/kvm/x86_64/apic_bus_clock_test.c b/tools/testing/selftests/kvm/x86_64/apic_bus_clock_test.c deleted file mode 100644 index f8916bb34405..000000000000 --- a/tools/testing/selftests/kvm/x86_64/apic_bus_clock_test.c +++ /dev/null @@ -1,194 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (c) 2024 Intel Corporation - * - * Verify KVM correctly emulates the APIC bus frequency when the VMM configures - * the frequency via KVM_CAP_X86_APIC_BUS_CYCLES_NS. Start the APIC timer by - * programming TMICT (timer initial count) to the largest value possible (so - * that the timer will not expire during the test). Then, after an arbitrary - * amount of time has elapsed, verify TMCCT (timer current count) is within 1% - * of the expected value based on the time elapsed, the APIC bus frequency, and - * the programmed TDCR (timer divide configuration register). - */ - -#include "apic.h" -#include "test_util.h" - -/* - * Possible TDCR values with matching divide count. Used to modify APIC - * timer frequency. - */ -static const struct { - const uint32_t tdcr; - const uint32_t divide_count; -} tdcrs[] = { - {0x0, 2}, - {0x1, 4}, - {0x2, 8}, - {0x3, 16}, - {0x8, 32}, - {0x9, 64}, - {0xa, 128}, - {0xb, 1}, -}; - -static bool is_x2apic; - -static void apic_enable(void) -{ - if (is_x2apic) - x2apic_enable(); - else - xapic_enable(); -} - -static uint32_t apic_read_reg(unsigned int reg) -{ - return is_x2apic ? x2apic_read_reg(reg) : xapic_read_reg(reg); -} - -static void apic_write_reg(unsigned int reg, uint32_t val) -{ - if (is_x2apic) - x2apic_write_reg(reg, val); - else - xapic_write_reg(reg, val); -} - -static void apic_guest_code(uint64_t apic_hz, uint64_t delay_ms) -{ - uint64_t tsc_hz = guest_tsc_khz * 1000; - const uint32_t tmict = ~0u; - uint64_t tsc0, tsc1, freq; - uint32_t tmcct; - int i; - - apic_enable(); - - /* - * Setup one-shot timer. The vector does not matter because the - * interrupt should not fire. - */ - apic_write_reg(APIC_LVTT, APIC_LVT_TIMER_ONESHOT | APIC_LVT_MASKED); - - for (i = 0; i < ARRAY_SIZE(tdcrs); i++) { - apic_write_reg(APIC_TDCR, tdcrs[i].tdcr); - apic_write_reg(APIC_TMICT, tmict); - - tsc0 = rdtsc(); - udelay(delay_ms * 1000); - tmcct = apic_read_reg(APIC_TMCCT); - tsc1 = rdtsc(); - - /* - * Stop the timer _after_ reading the current, final count, as - * writing the initial counter also modifies the current count. - */ - apic_write_reg(APIC_TMICT, 0); - - freq = (tmict - tmcct) * tdcrs[i].divide_count * tsc_hz / (tsc1 - tsc0); - /* Check if measured frequency is within 5% of configured frequency. */ - __GUEST_ASSERT(freq < apic_hz * 105 / 100 && freq > apic_hz * 95 / 100, - "Frequency = %lu (wanted %lu - %lu), bus = %lu, div = %u, tsc = %lu", - freq, apic_hz * 95 / 100, apic_hz * 105 / 100, - apic_hz, tdcrs[i].divide_count, tsc_hz); - } - - GUEST_DONE(); -} - -static void test_apic_bus_clock(struct kvm_vcpu *vcpu) -{ - bool done = false; - struct ucall uc; - - while (!done) { - vcpu_run(vcpu); - - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); - - switch (get_ucall(vcpu, &uc)) { - case UCALL_DONE: - done = true; - break; - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - break; - default: - TEST_FAIL("Unknown ucall %lu", uc.cmd); - break; - } - } -} - -static void run_apic_bus_clock_test(uint64_t apic_hz, uint64_t delay_ms, - bool x2apic) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - int ret; - - is_x2apic = x2apic; - - vm = vm_create(1); - - sync_global_to_guest(vm, is_x2apic); - - vm_enable_cap(vm, KVM_CAP_X86_APIC_BUS_CYCLES_NS, - NSEC_PER_SEC / apic_hz); - - vcpu = vm_vcpu_add(vm, 0, apic_guest_code); - vcpu_args_set(vcpu, 2, apic_hz, delay_ms); - - ret = __vm_enable_cap(vm, KVM_CAP_X86_APIC_BUS_CYCLES_NS, - NSEC_PER_SEC / apic_hz); - TEST_ASSERT(ret < 0 && errno == EINVAL, - "Setting of APIC bus frequency after vCPU is created should fail."); - - if (!is_x2apic) - virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA); - - test_apic_bus_clock(vcpu); - kvm_vm_free(vm); -} - -static void help(char *name) -{ - puts(""); - printf("usage: %s [-h] [-d delay] [-f APIC bus freq]\n", name); - puts(""); - printf("-d: Delay (in msec) guest uses to measure APIC bus frequency.\n"); - printf("-f: The APIC bus frequency (in MHz) to be configured for the guest.\n"); - puts(""); -} - -int main(int argc, char *argv[]) -{ - /* - * Arbitrarilty default to 25MHz for the APIC bus frequency, which is - * different enough from the default 1GHz to be interesting. - */ - uint64_t apic_hz = 25 * 1000 * 1000; - uint64_t delay_ms = 100; - int opt; - - TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_APIC_BUS_CYCLES_NS)); - - while ((opt = getopt(argc, argv, "d:f:h")) != -1) { - switch (opt) { - case 'f': - apic_hz = atoi_positive("APIC bus frequency", optarg) * 1000 * 1000; - break; - case 'd': - delay_ms = atoi_positive("Delay in milliseconds", optarg); - break; - case 'h': - default: - help(argv[0]); - exit(KSFT_SKIP); - } - } - - run_apic_bus_clock_test(apic_hz, delay_ms, false); - run_apic_bus_clock_test(apic_hz, delay_ms, true); -} diff --git a/tools/testing/selftests/kvm/x86_64/cpuid_test.c b/tools/testing/selftests/kvm/x86_64/cpuid_test.c deleted file mode 100644 index 7b3fda6842bc..000000000000 --- a/tools/testing/selftests/kvm/x86_64/cpuid_test.c +++ /dev/null @@ -1,225 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2021, Red Hat Inc. - * - * Generic tests for KVM CPUID set/get ioctls - */ -#include -#include -#include - -#include "test_util.h" -#include "kvm_util.h" -#include "processor.h" - -struct cpuid_mask { - union { - struct { - u32 eax; - u32 ebx; - u32 ecx; - u32 edx; - }; - u32 regs[4]; - }; -}; - -static void test_guest_cpuids(struct kvm_cpuid2 *guest_cpuid) -{ - int i; - u32 eax, ebx, ecx, edx; - - for (i = 0; i < guest_cpuid->nent; i++) { - __cpuid(guest_cpuid->entries[i].function, - guest_cpuid->entries[i].index, - &eax, &ebx, &ecx, &edx); - - GUEST_ASSERT_EQ(eax, guest_cpuid->entries[i].eax); - GUEST_ASSERT_EQ(ebx, guest_cpuid->entries[i].ebx); - GUEST_ASSERT_EQ(ecx, guest_cpuid->entries[i].ecx); - GUEST_ASSERT_EQ(edx, guest_cpuid->entries[i].edx); - } - -} - -static void guest_main(struct kvm_cpuid2 *guest_cpuid) -{ - GUEST_SYNC(1); - - test_guest_cpuids(guest_cpuid); - - GUEST_SYNC(2); - - GUEST_ASSERT_EQ(this_cpu_property(X86_PROPERTY_MAX_KVM_LEAF), 0x40000001); - - GUEST_DONE(); -} - -static struct cpuid_mask get_const_cpuid_mask(const struct kvm_cpuid_entry2 *entry) -{ - struct cpuid_mask mask; - - memset(&mask, 0xff, sizeof(mask)); - - switch (entry->function) { - case 0x1: - mask.regs[X86_FEATURE_OSXSAVE.reg] &= ~BIT(X86_FEATURE_OSXSAVE.bit); - break; - case 0x7: - mask.regs[X86_FEATURE_OSPKE.reg] &= ~BIT(X86_FEATURE_OSPKE.bit); - break; - case 0xd: - /* - * CPUID.0xD.{0,1}.EBX enumerate XSAVE size based on the current - * XCR0 and IA32_XSS MSR values. - */ - if (entry->index < 2) - mask.ebx = 0; - break; - } - return mask; -} - -static void compare_cpuids(const struct kvm_cpuid2 *cpuid1, - const struct kvm_cpuid2 *cpuid2) -{ - const struct kvm_cpuid_entry2 *e1, *e2; - int i; - - TEST_ASSERT(cpuid1->nent == cpuid2->nent, - "CPUID nent mismatch: %d vs. %d", cpuid1->nent, cpuid2->nent); - - for (i = 0; i < cpuid1->nent; i++) { - struct cpuid_mask mask; - - e1 = &cpuid1->entries[i]; - e2 = &cpuid2->entries[i]; - - TEST_ASSERT(e1->function == e2->function && - e1->index == e2->index && e1->flags == e2->flags, - "CPUID entries[%d] mismtach: 0x%x.%d.%x vs. 0x%x.%d.%x", - i, e1->function, e1->index, e1->flags, - e2->function, e2->index, e2->flags); - - /* Mask off dynamic bits, e.g. OSXSAVE, when comparing entries. */ - mask = get_const_cpuid_mask(e1); - - TEST_ASSERT((e1->eax & mask.eax) == (e2->eax & mask.eax) && - (e1->ebx & mask.ebx) == (e2->ebx & mask.ebx) && - (e1->ecx & mask.ecx) == (e2->ecx & mask.ecx) && - (e1->edx & mask.edx) == (e2->edx & mask.edx), - "CPUID 0x%x.%x differ: 0x%x:0x%x:0x%x:0x%x vs 0x%x:0x%x:0x%x:0x%x", - e1->function, e1->index, - e1->eax & mask.eax, e1->ebx & mask.ebx, - e1->ecx & mask.ecx, e1->edx & mask.edx, - e2->eax & mask.eax, e2->ebx & mask.ebx, - e2->ecx & mask.ecx, e2->edx & mask.edx); - } -} - -static void run_vcpu(struct kvm_vcpu *vcpu, int stage) -{ - struct ucall uc; - - vcpu_run(vcpu); - - switch (get_ucall(vcpu, &uc)) { - case UCALL_SYNC: - TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") && - uc.args[1] == stage + 1, - "Stage %d: Unexpected register values vmexit, got %lx", - stage + 1, (ulong)uc.args[1]); - return; - case UCALL_DONE: - return; - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - default: - TEST_ASSERT(false, "Unexpected exit: %s", - exit_reason_str(vcpu->run->exit_reason)); - } -} - -struct kvm_cpuid2 *vcpu_alloc_cpuid(struct kvm_vm *vm, vm_vaddr_t *p_gva, struct kvm_cpuid2 *cpuid) -{ - int size = sizeof(*cpuid) + cpuid->nent * sizeof(cpuid->entries[0]); - vm_vaddr_t gva = vm_vaddr_alloc(vm, size, KVM_UTIL_MIN_VADDR); - struct kvm_cpuid2 *guest_cpuids = addr_gva2hva(vm, gva); - - memcpy(guest_cpuids, cpuid, size); - - *p_gva = gva; - return guest_cpuids; -} - -static void set_cpuid_after_run(struct kvm_vcpu *vcpu) -{ - struct kvm_cpuid_entry2 *ent; - int rc; - u32 eax, ebx, x; - - /* Setting unmodified CPUID is allowed */ - rc = __vcpu_set_cpuid(vcpu); - TEST_ASSERT(!rc, "Setting unmodified CPUID after KVM_RUN failed: %d", rc); - - /* Changing CPU features is forbidden */ - ent = vcpu_get_cpuid_entry(vcpu, 0x7); - ebx = ent->ebx; - ent->ebx--; - rc = __vcpu_set_cpuid(vcpu); - TEST_ASSERT(rc, "Changing CPU features should fail"); - ent->ebx = ebx; - - /* Changing MAXPHYADDR is forbidden */ - ent = vcpu_get_cpuid_entry(vcpu, 0x80000008); - eax = ent->eax; - x = eax & 0xff; - ent->eax = (eax & ~0xffu) | (x - 1); - rc = __vcpu_set_cpuid(vcpu); - TEST_ASSERT(rc, "Changing MAXPHYADDR should fail"); - ent->eax = eax; -} - -static void test_get_cpuid2(struct kvm_vcpu *vcpu) -{ - struct kvm_cpuid2 *cpuid = allocate_kvm_cpuid2(vcpu->cpuid->nent + 1); - int i, r; - - vcpu_ioctl(vcpu, KVM_GET_CPUID2, cpuid); - TEST_ASSERT(cpuid->nent == vcpu->cpuid->nent, - "KVM didn't update nent on success, wanted %u, got %u", - vcpu->cpuid->nent, cpuid->nent); - - for (i = 0; i < vcpu->cpuid->nent; i++) { - cpuid->nent = i; - r = __vcpu_ioctl(vcpu, KVM_GET_CPUID2, cpuid); - TEST_ASSERT(r && errno == E2BIG, KVM_IOCTL_ERROR(KVM_GET_CPUID2, r)); - TEST_ASSERT(cpuid->nent == i, "KVM modified nent on failure"); - } - free(cpuid); -} - -int main(void) -{ - struct kvm_vcpu *vcpu; - vm_vaddr_t cpuid_gva; - struct kvm_vm *vm; - int stage; - - vm = vm_create_with_one_vcpu(&vcpu, guest_main); - - compare_cpuids(kvm_get_supported_cpuid(), vcpu->cpuid); - - vcpu_alloc_cpuid(vm, &cpuid_gva, vcpu->cpuid); - - vcpu_args_set(vcpu, 1, cpuid_gva); - - for (stage = 0; stage < 3; stage++) - run_vcpu(vcpu, stage); - - set_cpuid_after_run(vcpu); - - test_get_cpuid2(vcpu); - - kvm_vm_free(vm); -} diff --git a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c deleted file mode 100644 index 28cc66454601..000000000000 --- a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c +++ /dev/null @@ -1,100 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * CR4 and CPUID sync test - * - * Copyright 2018, Red Hat, Inc. and/or its affiliates. - * - * Author: - * Wei Huang - */ - -#include -#include -#include -#include -#include - -#include "test_util.h" - -#include "kvm_util.h" -#include "processor.h" - -#define MAGIC_HYPERCALL_PORT 0x80 - -static void guest_code(void) -{ - u32 regs[4] = { - [KVM_CPUID_EAX] = X86_FEATURE_OSXSAVE.function, - [KVM_CPUID_ECX] = X86_FEATURE_OSXSAVE.index, - }; - - /* CR4.OSXSAVE should be enabled by default (for selftests vCPUs). */ - GUEST_ASSERT(get_cr4() & X86_CR4_OSXSAVE); - - /* verify CR4.OSXSAVE == CPUID.OSXSAVE */ - GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSXSAVE)); - - /* - * Notify hypervisor to clear CR4.0SXSAVE, do CPUID and save output, - * and then restore CR4. Do this all in assembly to ensure no AVX - * instructions are executed while OSXSAVE=0. - */ - asm volatile ( - "out %%al, $" __stringify(MAGIC_HYPERCALL_PORT) "\n\t" - "cpuid\n\t" - "mov %%rdi, %%cr4\n\t" - : "+a" (regs[KVM_CPUID_EAX]), - "=b" (regs[KVM_CPUID_EBX]), - "+c" (regs[KVM_CPUID_ECX]), - "=d" (regs[KVM_CPUID_EDX]) - : "D" (get_cr4()) - ); - - /* Verify KVM cleared OSXSAVE in CPUID when it was cleared in CR4. */ - GUEST_ASSERT(!(regs[X86_FEATURE_OSXSAVE.reg] & BIT(X86_FEATURE_OSXSAVE.bit))); - - /* Verify restoring CR4 also restored OSXSAVE in CPUID. */ - GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSXSAVE)); - - GUEST_DONE(); -} - -int main(int argc, char *argv[]) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - struct kvm_sregs sregs; - struct ucall uc; - - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVE)); - - vm = vm_create_with_one_vcpu(&vcpu, guest_code); - - while (1) { - vcpu_run(vcpu); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); - - if (vcpu->run->io.port == MAGIC_HYPERCALL_PORT && - vcpu->run->io.direction == KVM_EXIT_IO_OUT) { - /* emulate hypervisor clearing CR4.OSXSAVE */ - vcpu_sregs_get(vcpu, &sregs); - sregs.cr4 &= ~X86_CR4_OSXSAVE; - vcpu_sregs_set(vcpu, &sregs); - continue; - } - - switch (get_ucall(vcpu, &uc)) { - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - break; - case UCALL_DONE: - goto done; - default: - TEST_FAIL("Unknown ucall %lu", uc.cmd); - } - } - -done: - kvm_vm_free(vm); - return 0; -} diff --git a/tools/testing/selftests/kvm/x86_64/debug_regs.c b/tools/testing/selftests/kvm/x86_64/debug_regs.c deleted file mode 100644 index 2d814c1d1dc4..000000000000 --- a/tools/testing/selftests/kvm/x86_64/debug_regs.c +++ /dev/null @@ -1,217 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * KVM guest debug register tests - * - * Copyright (C) 2020, Red Hat, Inc. - */ -#include -#include -#include "kvm_util.h" -#include "processor.h" -#include "apic.h" - -#define DR6_BD (1 << 13) -#define DR7_GD (1 << 13) - -#define IRQ_VECTOR 0xAA - -/* For testing data access debug BP */ -uint32_t guest_value; - -extern unsigned char sw_bp, hw_bp, write_data, ss_start, bd_start; - -static void guest_code(void) -{ - /* Create a pending interrupt on current vCPU */ - x2apic_enable(); - x2apic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_INT_ASSERT | - APIC_DM_FIXED | IRQ_VECTOR); - - /* - * Software BP tests. - * - * NOTE: sw_bp need to be before the cmd here, because int3 is an - * exception rather than a normal trap for KVM_SET_GUEST_DEBUG (we - * capture it using the vcpu exception bitmap). - */ - asm volatile("sw_bp: int3"); - - /* Hardware instruction BP test */ - asm volatile("hw_bp: nop"); - - /* Hardware data BP test */ - asm volatile("mov $1234,%%rax;\n\t" - "mov %%rax,%0;\n\t write_data:" - : "=m" (guest_value) : : "rax"); - - /* - * Single step test, covers 2 basic instructions and 2 emulated - * - * Enable interrupts during the single stepping to see that pending - * interrupt we raised is not handled due to KVM_GUESTDBG_BLOCKIRQ. - * - * Write MSR_IA32_TSC_DEADLINE to verify that KVM's fastpath handler - * exits to userspace due to single-step being enabled. - */ - asm volatile("ss_start: " - "sti\n\t" - "xor %%eax,%%eax\n\t" - "cpuid\n\t" - "movl $" __stringify(MSR_IA32_TSC_DEADLINE) ", %%ecx\n\t" - "wrmsr\n\t" - "cli\n\t" - : : : "eax", "ebx", "ecx", "edx"); - - /* DR6.BD test */ - asm volatile("bd_start: mov %%dr0, %%rax" : : : "rax"); - GUEST_DONE(); -} - -#define CAST_TO_RIP(v) ((unsigned long long)&(v)) - -static void vcpu_skip_insn(struct kvm_vcpu *vcpu, int insn_len) -{ - struct kvm_regs regs; - - vcpu_regs_get(vcpu, ®s); - regs.rip += insn_len; - vcpu_regs_set(vcpu, ®s); -} - -int main(void) -{ - struct kvm_guest_debug debug; - unsigned long long target_dr6, target_rip; - struct kvm_vcpu *vcpu; - struct kvm_run *run; - struct kvm_vm *vm; - struct ucall uc; - uint64_t cmd; - int i; - /* Instruction lengths starting at ss_start */ - int ss_size[6] = { - 1, /* sti*/ - 2, /* xor */ - 2, /* cpuid */ - 5, /* mov */ - 2, /* rdmsr */ - 1, /* cli */ - }; - - TEST_REQUIRE(kvm_has_cap(KVM_CAP_SET_GUEST_DEBUG)); - - vm = vm_create_with_one_vcpu(&vcpu, guest_code); - run = vcpu->run; - - /* Test software BPs - int3 */ - memset(&debug, 0, sizeof(debug)); - debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP; - vcpu_guest_debug_set(vcpu, &debug); - vcpu_run(vcpu); - TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG && - run->debug.arch.exception == BP_VECTOR && - run->debug.arch.pc == CAST_TO_RIP(sw_bp), - "INT3: exit %d exception %d rip 0x%llx (should be 0x%llx)", - run->exit_reason, run->debug.arch.exception, - run->debug.arch.pc, CAST_TO_RIP(sw_bp)); - vcpu_skip_insn(vcpu, 1); - - /* Test instruction HW BP over DR[0-3] */ - for (i = 0; i < 4; i++) { - memset(&debug, 0, sizeof(debug)); - debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP; - debug.arch.debugreg[i] = CAST_TO_RIP(hw_bp); - debug.arch.debugreg[7] = 0x400 | (1UL << (2*i+1)); - vcpu_guest_debug_set(vcpu, &debug); - vcpu_run(vcpu); - target_dr6 = 0xffff0ff0 | (1UL << i); - TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG && - run->debug.arch.exception == DB_VECTOR && - run->debug.arch.pc == CAST_TO_RIP(hw_bp) && - run->debug.arch.dr6 == target_dr6, - "INS_HW_BP (DR%d): exit %d exception %d rip 0x%llx " - "(should be 0x%llx) dr6 0x%llx (should be 0x%llx)", - i, run->exit_reason, run->debug.arch.exception, - run->debug.arch.pc, CAST_TO_RIP(hw_bp), - run->debug.arch.dr6, target_dr6); - } - /* Skip "nop" */ - vcpu_skip_insn(vcpu, 1); - - /* Test data access HW BP over DR[0-3] */ - for (i = 0; i < 4; i++) { - memset(&debug, 0, sizeof(debug)); - debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP; - debug.arch.debugreg[i] = CAST_TO_RIP(guest_value); - debug.arch.debugreg[7] = 0x00000400 | (1UL << (2*i+1)) | - (0x000d0000UL << (4*i)); - vcpu_guest_debug_set(vcpu, &debug); - vcpu_run(vcpu); - target_dr6 = 0xffff0ff0 | (1UL << i); - TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG && - run->debug.arch.exception == DB_VECTOR && - run->debug.arch.pc == CAST_TO_RIP(write_data) && - run->debug.arch.dr6 == target_dr6, - "DATA_HW_BP (DR%d): exit %d exception %d rip 0x%llx " - "(should be 0x%llx) dr6 0x%llx (should be 0x%llx)", - i, run->exit_reason, run->debug.arch.exception, - run->debug.arch.pc, CAST_TO_RIP(write_data), - run->debug.arch.dr6, target_dr6); - /* Rollback the 4-bytes "mov" */ - vcpu_skip_insn(vcpu, -7); - } - /* Skip the 4-bytes "mov" */ - vcpu_skip_insn(vcpu, 7); - - /* Test single step */ - target_rip = CAST_TO_RIP(ss_start); - target_dr6 = 0xffff4ff0ULL; - for (i = 0; i < ARRAY_SIZE(ss_size); i++) { - target_rip += ss_size[i]; - memset(&debug, 0, sizeof(debug)); - debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP | - KVM_GUESTDBG_BLOCKIRQ; - debug.arch.debugreg[7] = 0x00000400; - vcpu_guest_debug_set(vcpu, &debug); - vcpu_run(vcpu); - TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG && - run->debug.arch.exception == DB_VECTOR && - run->debug.arch.pc == target_rip && - run->debug.arch.dr6 == target_dr6, - "SINGLE_STEP[%d]: exit %d exception %d rip 0x%llx " - "(should be 0x%llx) dr6 0x%llx (should be 0x%llx)", - i, run->exit_reason, run->debug.arch.exception, - run->debug.arch.pc, target_rip, run->debug.arch.dr6, - target_dr6); - } - - /* Finally test global disable */ - memset(&debug, 0, sizeof(debug)); - debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP; - debug.arch.debugreg[7] = 0x400 | DR7_GD; - vcpu_guest_debug_set(vcpu, &debug); - vcpu_run(vcpu); - target_dr6 = 0xffff0ff0 | DR6_BD; - TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG && - run->debug.arch.exception == DB_VECTOR && - run->debug.arch.pc == CAST_TO_RIP(bd_start) && - run->debug.arch.dr6 == target_dr6, - "DR7.GD: exit %d exception %d rip 0x%llx " - "(should be 0x%llx) dr6 0x%llx (should be 0x%llx)", - run->exit_reason, run->debug.arch.exception, - run->debug.arch.pc, target_rip, run->debug.arch.dr6, - target_dr6); - - /* Disable all debug controls, run to the end */ - memset(&debug, 0, sizeof(debug)); - vcpu_guest_debug_set(vcpu, &debug); - - vcpu_run(vcpu); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); - cmd = get_ucall(vcpu, &uc); - TEST_ASSERT(cmd == UCALL_DONE, "UCALL_DONE"); - - kvm_vm_free(vm); - - return 0; -} diff --git a/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c b/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c deleted file mode 100644 index 2929c067c207..000000000000 --- a/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c +++ /dev/null @@ -1,263 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * KVM dirty logging page splitting test - * - * Based on dirty_log_perf.c - * - * Copyright (C) 2018, Red Hat, Inc. - * Copyright (C) 2023, Google, Inc. - */ - -#include -#include -#include -#include - -#include "kvm_util.h" -#include "test_util.h" -#include "memstress.h" -#include "guest_modes.h" -#include "ucall_common.h" - -#define VCPUS 2 -#define SLOTS 2 -#define ITERATIONS 2 - -static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; - -static enum vm_mem_backing_src_type backing_src = VM_MEM_SRC_ANONYMOUS_HUGETLB; - -static u64 dirty_log_manual_caps; -static bool host_quit; -static int iteration; -static int vcpu_last_completed_iteration[KVM_MAX_VCPUS]; - -struct kvm_page_stats { - uint64_t pages_4k; - uint64_t pages_2m; - uint64_t pages_1g; - uint64_t hugepages; -}; - -static void get_page_stats(struct kvm_vm *vm, struct kvm_page_stats *stats, const char *stage) -{ - stats->pages_4k = vm_get_stat(vm, "pages_4k"); - stats->pages_2m = vm_get_stat(vm, "pages_2m"); - stats->pages_1g = vm_get_stat(vm, "pages_1g"); - stats->hugepages = stats->pages_2m + stats->pages_1g; - - pr_debug("\nPage stats after %s: 4K: %ld 2M: %ld 1G: %ld huge: %ld\n", - stage, stats->pages_4k, stats->pages_2m, stats->pages_1g, - stats->hugepages); -} - -static void run_vcpu_iteration(struct kvm_vm *vm) -{ - int i; - - iteration++; - for (i = 0; i < VCPUS; i++) { - while (READ_ONCE(vcpu_last_completed_iteration[i]) != - iteration) - ; - } -} - -static void vcpu_worker(struct memstress_vcpu_args *vcpu_args) -{ - struct kvm_vcpu *vcpu = vcpu_args->vcpu; - int vcpu_idx = vcpu_args->vcpu_idx; - - while (!READ_ONCE(host_quit)) { - int current_iteration = READ_ONCE(iteration); - - vcpu_run(vcpu); - - TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_SYNC); - - vcpu_last_completed_iteration[vcpu_idx] = current_iteration; - - /* Wait for the start of the next iteration to be signaled. */ - while (current_iteration == READ_ONCE(iteration) && - READ_ONCE(iteration) >= 0 && - !READ_ONCE(host_quit)) - ; - } -} - -static void run_test(enum vm_guest_mode mode, void *unused) -{ - struct kvm_vm *vm; - unsigned long **bitmaps; - uint64_t guest_num_pages; - uint64_t host_num_pages; - uint64_t pages_per_slot; - int i; - struct kvm_page_stats stats_populated; - struct kvm_page_stats stats_dirty_logging_enabled; - struct kvm_page_stats stats_dirty_pass[ITERATIONS]; - struct kvm_page_stats stats_clear_pass[ITERATIONS]; - struct kvm_page_stats stats_dirty_logging_disabled; - struct kvm_page_stats stats_repopulated; - - vm = memstress_create_vm(mode, VCPUS, guest_percpu_mem_size, - SLOTS, backing_src, false); - - guest_num_pages = (VCPUS * guest_percpu_mem_size) >> vm->page_shift; - guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages); - host_num_pages = vm_num_host_pages(mode, guest_num_pages); - pages_per_slot = host_num_pages / SLOTS; - TEST_ASSERT_EQ(host_num_pages, pages_per_slot * SLOTS); - TEST_ASSERT(!(host_num_pages % 512), - "Number of pages, '%lu' not a multiple of 2MiB", host_num_pages); - - bitmaps = memstress_alloc_bitmaps(SLOTS, pages_per_slot); - - if (dirty_log_manual_caps) - vm_enable_cap(vm, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2, - dirty_log_manual_caps); - - /* Start the iterations */ - iteration = -1; - host_quit = false; - - for (i = 0; i < VCPUS; i++) - vcpu_last_completed_iteration[i] = -1; - - memstress_start_vcpu_threads(VCPUS, vcpu_worker); - - run_vcpu_iteration(vm); - get_page_stats(vm, &stats_populated, "populating memory"); - - /* Enable dirty logging */ - memstress_enable_dirty_logging(vm, SLOTS); - - get_page_stats(vm, &stats_dirty_logging_enabled, "enabling dirty logging"); - - while (iteration < ITERATIONS) { - run_vcpu_iteration(vm); - get_page_stats(vm, &stats_dirty_pass[iteration - 1], - "dirtying memory"); - - memstress_get_dirty_log(vm, bitmaps, SLOTS); - - if (dirty_log_manual_caps) { - memstress_clear_dirty_log(vm, bitmaps, SLOTS, pages_per_slot); - - get_page_stats(vm, &stats_clear_pass[iteration - 1], "clearing dirty log"); - } - } - - /* Disable dirty logging */ - memstress_disable_dirty_logging(vm, SLOTS); - - get_page_stats(vm, &stats_dirty_logging_disabled, "disabling dirty logging"); - - /* Run vCPUs again to fault pages back in. */ - run_vcpu_iteration(vm); - get_page_stats(vm, &stats_repopulated, "repopulating memory"); - - /* - * Tell the vCPU threads to quit. No need to manually check that vCPUs - * have stopped running after disabling dirty logging, the join will - * wait for them to exit. - */ - host_quit = true; - memstress_join_vcpu_threads(VCPUS); - - memstress_free_bitmaps(bitmaps, SLOTS); - memstress_destroy_vm(vm); - - TEST_ASSERT_EQ((stats_populated.pages_2m * 512 + - stats_populated.pages_1g * 512 * 512), host_num_pages); - - /* - * Check that all huge pages were split. Since large pages can only - * exist in the data slot, and the vCPUs should have dirtied all pages - * in the data slot, there should be no huge pages left after splitting. - * Splitting happens at dirty log enable time without - * KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 and after the first clear pass - * with that capability. - */ - if (dirty_log_manual_caps) { - TEST_ASSERT_EQ(stats_clear_pass[0].hugepages, 0); - TEST_ASSERT(stats_clear_pass[0].pages_4k >= host_num_pages, - "Expected at least '%lu' 4KiB pages, found only '%lu'", - host_num_pages, stats_clear_pass[0].pages_4k); - TEST_ASSERT_EQ(stats_dirty_logging_enabled.hugepages, stats_populated.hugepages); - } else { - TEST_ASSERT_EQ(stats_dirty_logging_enabled.hugepages, 0); - TEST_ASSERT(stats_dirty_logging_enabled.pages_4k >= host_num_pages, - "Expected at least '%lu' 4KiB pages, found only '%lu'", - host_num_pages, stats_dirty_logging_enabled.pages_4k); - } - - /* - * Once dirty logging is disabled and the vCPUs have touched all their - * memory again, the hugepage counts should be the same as they were - * right after initial population of memory. - */ - TEST_ASSERT_EQ(stats_populated.pages_2m, stats_repopulated.pages_2m); - TEST_ASSERT_EQ(stats_populated.pages_1g, stats_repopulated.pages_1g); -} - -static void help(char *name) -{ - puts(""); - printf("usage: %s [-h] [-b vcpu bytes] [-s mem type]\n", - name); - puts(""); - printf(" -b: specify the size of the memory region which should be\n" - " dirtied by each vCPU. e.g. 10M or 3G.\n" - " (default: 1G)\n"); - backing_src_help("-s"); - puts(""); -} - -int main(int argc, char *argv[]) -{ - int opt; - - TEST_REQUIRE(get_kvm_param_bool("eager_page_split")); - TEST_REQUIRE(get_kvm_param_bool("tdp_mmu")); - - while ((opt = getopt(argc, argv, "b:hs:")) != -1) { - switch (opt) { - case 'b': - guest_percpu_mem_size = parse_size(optarg); - break; - case 'h': - help(argv[0]); - exit(0); - case 's': - backing_src = parse_backing_src_type(optarg); - break; - default: - help(argv[0]); - exit(1); - } - } - - if (!is_backing_src_hugetlb(backing_src)) { - pr_info("This test will only work reliably with HugeTLB memory. " - "It can work with THP, but that is best effort.\n"); - } - - guest_modes_append_default(); - - dirty_log_manual_caps = 0; - for_each_guest_mode(run_test, NULL); - - dirty_log_manual_caps = - kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2); - - if (dirty_log_manual_caps) { - dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | - KVM_DIRTY_LOG_INITIALLY_SET); - for_each_guest_mode(run_test, NULL); - } else { - pr_info("Skipping testing with MANUAL_PROTECT as it is not supported"); - } - - return 0; -} diff --git a/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c b/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c deleted file mode 100644 index 81055476d394..000000000000 --- a/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c +++ /dev/null @@ -1,39 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2022, Google LLC. - * - * Test for KVM_CAP_EXIT_ON_EMULATION_FAILURE. - */ -#include "flds_emulation.h" -#include "test_util.h" -#include "ucall_common.h" - -#define MMIO_GPA 0x700000000 -#define MMIO_GVA MMIO_GPA - -static void guest_code(void) -{ - /* Execute flds with an MMIO address to force KVM to emulate it. */ - flds(MMIO_GVA); - GUEST_DONE(); -} - -int main(int argc, char *argv[]) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - - TEST_REQUIRE(kvm_has_cap(KVM_CAP_EXIT_ON_EMULATION_FAILURE)); - - vm = vm_create_with_one_vcpu(&vcpu, guest_code); - vm_enable_cap(vm, KVM_CAP_EXIT_ON_EMULATION_FAILURE, 1); - virt_map(vm, MMIO_GVA, MMIO_GPA, 1); - - vcpu_run(vcpu); - handle_flds_emulation_failure_exit(vcpu); - vcpu_run(vcpu); - TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE); - - kvm_vm_free(vm); - return 0; -} diff --git a/tools/testing/selftests/kvm/x86_64/feature_msrs_test.c b/tools/testing/selftests/kvm/x86_64/feature_msrs_test.c deleted file mode 100644 index a72f13ae2edb..000000000000 --- a/tools/testing/selftests/kvm/x86_64/feature_msrs_test.c +++ /dev/null @@ -1,113 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2020, Red Hat, Inc. - */ -#include -#include -#include -#include -#include - -#include "test_util.h" -#include "kvm_util.h" -#include "processor.h" - -static bool is_kvm_controlled_msr(uint32_t msr) -{ - return msr == MSR_IA32_VMX_CR0_FIXED1 || msr == MSR_IA32_VMX_CR4_FIXED1; -} - -/* - * For VMX MSRs with a "true" variant, KVM requires userspace to set the "true" - * MSR, and doesn't allow setting the hidden version. - */ -static bool is_hidden_vmx_msr(uint32_t msr) -{ - switch (msr) { - case MSR_IA32_VMX_PINBASED_CTLS: - case MSR_IA32_VMX_PROCBASED_CTLS: - case MSR_IA32_VMX_EXIT_CTLS: - case MSR_IA32_VMX_ENTRY_CTLS: - return true; - default: - return false; - } -} - -static bool is_quirked_msr(uint32_t msr) -{ - return msr != MSR_AMD64_DE_CFG; -} - -static void test_feature_msr(uint32_t msr) -{ - const uint64_t supported_mask = kvm_get_feature_msr(msr); - uint64_t reset_value = is_quirked_msr(msr) ? supported_mask : 0; - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - - /* - * Don't bother testing KVM-controlled MSRs beyond verifying that the - * MSR can be read from userspace. Any value is effectively legal, as - * KVM is bound by x86 architecture, not by ABI. - */ - if (is_kvm_controlled_msr(msr)) - return; - - /* - * More goofy behavior. KVM reports the host CPU's actual revision ID, - * but initializes the vCPU's revision ID to an arbitrary value. - */ - if (msr == MSR_IA32_UCODE_REV) - reset_value = host_cpu_is_intel ? 0x100000000ULL : 0x01000065; - - /* - * For quirked MSRs, KVM's ABI is to initialize the vCPU's value to the - * full set of features supported by KVM. For non-quirked MSRs, and - * when the quirk is disabled, KVM must zero-initialize the MSR and let - * userspace do the configuration. - */ - vm = vm_create_with_one_vcpu(&vcpu, NULL); - TEST_ASSERT(vcpu_get_msr(vcpu, msr) == reset_value, - "Wanted 0x%lx for %squirked MSR 0x%x, got 0x%lx", - reset_value, is_quirked_msr(msr) ? "" : "non-", msr, - vcpu_get_msr(vcpu, msr)); - if (!is_hidden_vmx_msr(msr)) - vcpu_set_msr(vcpu, msr, supported_mask); - kvm_vm_free(vm); - - if (is_hidden_vmx_msr(msr)) - return; - - if (!kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2) || - !(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) & KVM_X86_QUIRK_STUFF_FEATURE_MSRS)) - return; - - vm = vm_create(1); - vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, KVM_X86_QUIRK_STUFF_FEATURE_MSRS); - - vcpu = vm_vcpu_add(vm, 0, NULL); - TEST_ASSERT(!vcpu_get_msr(vcpu, msr), - "Quirk disabled, wanted '0' for MSR 0x%x, got 0x%lx", - msr, vcpu_get_msr(vcpu, msr)); - kvm_vm_free(vm); -} - -int main(int argc, char *argv[]) -{ - const struct kvm_msr_list *feature_list; - int i; - - /* - * Skip the entire test if MSR_FEATURES isn't supported, other tests - * will cover the "regular" list of MSRs, the coverage here is purely - * opportunistic and not interesting on its own. - */ - TEST_REQUIRE(kvm_has_cap(KVM_CAP_GET_MSR_FEATURES)); - - (void)kvm_get_msr_index_list(); - - feature_list = kvm_get_feature_msr_index_list(); - for (i = 0; i < feature_list->nmsrs; i++) - test_feature_msr(feature_list->indices[i]); -} diff --git a/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c b/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c deleted file mode 100644 index 762628f7d4ba..000000000000 --- a/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c +++ /dev/null @@ -1,142 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2020, Google LLC. - * - * Tests for KVM paravirtual feature disablement - */ -#include -#include -#include -#include - -#include "kvm_test_harness.h" -#include "apic.h" -#include "test_util.h" -#include "kvm_util.h" -#include "processor.h" - -/* VMCALL and VMMCALL are both 3-byte opcodes. */ -#define HYPERCALL_INSN_SIZE 3 - -static bool quirk_disabled; - -static void guest_ud_handler(struct ex_regs *regs) -{ - regs->rax = -EFAULT; - regs->rip += HYPERCALL_INSN_SIZE; -} - -static const uint8_t vmx_vmcall[HYPERCALL_INSN_SIZE] = { 0x0f, 0x01, 0xc1 }; -static const uint8_t svm_vmmcall[HYPERCALL_INSN_SIZE] = { 0x0f, 0x01, 0xd9 }; - -extern uint8_t hypercall_insn[HYPERCALL_INSN_SIZE]; -static uint64_t do_sched_yield(uint8_t apic_id) -{ - uint64_t ret; - - asm volatile("hypercall_insn:\n\t" - ".byte 0xcc,0xcc,0xcc\n\t" - : "=a"(ret) - : "a"((uint64_t)KVM_HC_SCHED_YIELD), "b"((uint64_t)apic_id) - : "memory"); - - return ret; -} - -static void guest_main(void) -{ - const uint8_t *native_hypercall_insn; - const uint8_t *other_hypercall_insn; - uint64_t ret; - - if (host_cpu_is_intel) { - native_hypercall_insn = vmx_vmcall; - other_hypercall_insn = svm_vmmcall; - } else if (host_cpu_is_amd) { - native_hypercall_insn = svm_vmmcall; - other_hypercall_insn = vmx_vmcall; - } else { - GUEST_ASSERT(0); - /* unreachable */ - return; - } - - memcpy(hypercall_insn, other_hypercall_insn, HYPERCALL_INSN_SIZE); - - ret = do_sched_yield(GET_APIC_ID_FIELD(xapic_read_reg(APIC_ID))); - - /* - * If the quirk is disabled, verify that guest_ud_handler() "returned" - * -EFAULT and that KVM did NOT patch the hypercall. If the quirk is - * enabled, verify that the hypercall succeeded and that KVM patched in - * the "right" hypercall. - */ - if (quirk_disabled) { - GUEST_ASSERT(ret == (uint64_t)-EFAULT); - GUEST_ASSERT(!memcmp(other_hypercall_insn, hypercall_insn, - HYPERCALL_INSN_SIZE)); - } else { - GUEST_ASSERT(!ret); - GUEST_ASSERT(!memcmp(native_hypercall_insn, hypercall_insn, - HYPERCALL_INSN_SIZE)); - } - - GUEST_DONE(); -} - -KVM_ONE_VCPU_TEST_SUITE(fix_hypercall); - -static void enter_guest(struct kvm_vcpu *vcpu) -{ - struct kvm_run *run = vcpu->run; - struct ucall uc; - - vcpu_run(vcpu); - switch (get_ucall(vcpu, &uc)) { - case UCALL_SYNC: - pr_info("%s: %016lx\n", (const char *)uc.args[2], uc.args[3]); - break; - case UCALL_DONE: - return; - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - default: - TEST_FAIL("Unhandled ucall: %ld\nexit_reason: %u (%s)", - uc.cmd, run->exit_reason, exit_reason_str(run->exit_reason)); - } -} - -static void test_fix_hypercall(struct kvm_vcpu *vcpu, bool disable_quirk) -{ - struct kvm_vm *vm = vcpu->vm; - - vm_install_exception_handler(vcpu->vm, UD_VECTOR, guest_ud_handler); - - if (disable_quirk) - vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, - KVM_X86_QUIRK_FIX_HYPERCALL_INSN); - - quirk_disabled = disable_quirk; - sync_global_to_guest(vm, quirk_disabled); - - virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA); - - enter_guest(vcpu); -} - -KVM_ONE_VCPU_TEST(fix_hypercall, enable_quirk, guest_main) -{ - test_fix_hypercall(vcpu, false); -} - -KVM_ONE_VCPU_TEST(fix_hypercall, disable_quirk, guest_main) -{ - test_fix_hypercall(vcpu, true); -} - -int main(int argc, char *argv[]) -{ - TEST_REQUIRE(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) & KVM_X86_QUIRK_FIX_HYPERCALL_INSN); - - return test_harness_run(argc, argv); -} diff --git a/tools/testing/selftests/kvm/x86_64/flds_emulation.h b/tools/testing/selftests/kvm/x86_64/flds_emulation.h deleted file mode 100644 index 37b1a9f52864..000000000000 --- a/tools/testing/selftests/kvm/x86_64/flds_emulation.h +++ /dev/null @@ -1,52 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -#ifndef SELFTEST_KVM_FLDS_EMULATION_H -#define SELFTEST_KVM_FLDS_EMULATION_H - -#include "kvm_util.h" - -#define FLDS_MEM_EAX ".byte 0xd9, 0x00" - -/* - * flds is an instruction that the KVM instruction emulator is known not to - * support. This can be used in guest code along with a mechanism to force - * KVM to emulate the instruction (e.g. by providing an MMIO address) to - * exercise emulation failures. - */ -static inline void flds(uint64_t address) -{ - __asm__ __volatile__(FLDS_MEM_EAX :: "a"(address)); -} - -static inline void handle_flds_emulation_failure_exit(struct kvm_vcpu *vcpu) -{ - struct kvm_run *run = vcpu->run; - struct kvm_regs regs; - uint8_t *insn_bytes; - uint64_t flags; - - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR); - - TEST_ASSERT(run->emulation_failure.suberror == KVM_INTERNAL_ERROR_EMULATION, - "Unexpected suberror: %u", - run->emulation_failure.suberror); - - flags = run->emulation_failure.flags; - TEST_ASSERT(run->emulation_failure.ndata >= 3 && - flags & KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES, - "run->emulation_failure is missing instruction bytes"); - - TEST_ASSERT(run->emulation_failure.insn_size >= 2, - "Expected a 2-byte opcode for 'flds', got %d bytes", - run->emulation_failure.insn_size); - - insn_bytes = run->emulation_failure.insn_bytes; - TEST_ASSERT(insn_bytes[0] == 0xd9 && insn_bytes[1] == 0, - "Expected 'flds [eax]', opcode '0xd9 0x00', got opcode 0x%02x 0x%02x", - insn_bytes[0], insn_bytes[1]); - - vcpu_regs_get(vcpu, ®s); - regs.rip += 2; - vcpu_regs_set(vcpu, ®s); -} - -#endif /* !SELFTEST_KVM_FLDS_EMULATION_H */ diff --git a/tools/testing/selftests/kvm/x86_64/hwcr_msr_test.c b/tools/testing/selftests/kvm/x86_64/hwcr_msr_test.c deleted file mode 100644 index 10b1b0ba374e..000000000000 --- a/tools/testing/selftests/kvm/x86_64/hwcr_msr_test.c +++ /dev/null @@ -1,45 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2023, Google LLC. - */ -#include - -#include "test_util.h" -#include "kvm_util.h" -#include "vmx.h" - -void test_hwcr_bit(struct kvm_vcpu *vcpu, unsigned int bit) -{ - const uint64_t ignored = BIT_ULL(3) | BIT_ULL(6) | BIT_ULL(8); - const uint64_t valid = BIT_ULL(18) | BIT_ULL(24); - const uint64_t legal = ignored | valid; - uint64_t val = BIT_ULL(bit); - uint64_t actual; - int r; - - r = _vcpu_set_msr(vcpu, MSR_K7_HWCR, val); - TEST_ASSERT(val & ~legal ? !r : r == 1, - "Expected KVM_SET_MSRS(MSR_K7_HWCR) = 0x%lx to %s", - val, val & ~legal ? "fail" : "succeed"); - - actual = vcpu_get_msr(vcpu, MSR_K7_HWCR); - TEST_ASSERT(actual == (val & valid), - "Bit %u: unexpected HWCR 0x%lx; expected 0x%lx", - bit, actual, (val & valid)); - - vcpu_set_msr(vcpu, MSR_K7_HWCR, 0); -} - -int main(int argc, char *argv[]) -{ - struct kvm_vm *vm; - struct kvm_vcpu *vcpu; - unsigned int bit; - - vm = vm_create_with_one_vcpu(&vcpu, NULL); - - for (bit = 0; bit < BITS_PER_LONG; bit++) - test_hwcr_bit(vcpu, bit); - - kvm_vm_free(vm); -} diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c deleted file mode 100644 index e058bc676cd6..000000000000 --- a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c +++ /dev/null @@ -1,263 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2021, Red Hat, Inc. - * - * Tests for Hyper-V clocksources - */ -#include "test_util.h" -#include "kvm_util.h" -#include "processor.h" -#include "hyperv.h" - -struct ms_hyperv_tsc_page { - volatile u32 tsc_sequence; - u32 reserved1; - volatile u64 tsc_scale; - volatile s64 tsc_offset; -} __packed; - -/* Simplified mul_u64_u64_shr() */ -static inline u64 mul_u64_u64_shr64(u64 a, u64 b) -{ - union { - u64 ll; - struct { - u32 low, high; - } l; - } rm, rn, rh, a0, b0; - u64 c; - - a0.ll = a; - b0.ll = b; - - rm.ll = (u64)a0.l.low * b0.l.high; - rn.ll = (u64)a0.l.high * b0.l.low; - rh.ll = (u64)a0.l.high * b0.l.high; - - rh.l.low = c = rm.l.high + rn.l.high + rh.l.low; - rh.l.high = (c >> 32) + rh.l.high; - - return rh.ll; -} - -static inline void nop_loop(void) -{ - int i; - - for (i = 0; i < 100000000; i++) - asm volatile("nop"); -} - -static inline void check_tsc_msr_rdtsc(void) -{ - u64 tsc_freq, r1, r2, t1, t2; - s64 delta_ns; - - tsc_freq = rdmsr(HV_X64_MSR_TSC_FREQUENCY); - GUEST_ASSERT(tsc_freq > 0); - - /* For increased accuracy, take mean rdtsc() before and afrer rdmsr() */ - r1 = rdtsc(); - t1 = rdmsr(HV_X64_MSR_TIME_REF_COUNT); - r1 = (r1 + rdtsc()) / 2; - nop_loop(); - r2 = rdtsc(); - t2 = rdmsr(HV_X64_MSR_TIME_REF_COUNT); - r2 = (r2 + rdtsc()) / 2; - - GUEST_ASSERT(r2 > r1 && t2 > t1); - - /* HV_X64_MSR_TIME_REF_COUNT is in 100ns */ - delta_ns = ((t2 - t1) * 100) - ((r2 - r1) * 1000000000 / tsc_freq); - if (delta_ns < 0) - delta_ns = -delta_ns; - - /* 1% tolerance */ - GUEST_ASSERT(delta_ns * 100 < (t2 - t1) * 100); -} - -static inline u64 get_tscpage_ts(struct ms_hyperv_tsc_page *tsc_page) -{ - return mul_u64_u64_shr64(rdtsc(), tsc_page->tsc_scale) + tsc_page->tsc_offset; -} - -static inline void check_tsc_msr_tsc_page(struct ms_hyperv_tsc_page *tsc_page) -{ - u64 r1, r2, t1, t2; - - /* Compare TSC page clocksource with HV_X64_MSR_TIME_REF_COUNT */ - t1 = get_tscpage_ts(tsc_page); - r1 = rdmsr(HV_X64_MSR_TIME_REF_COUNT); - - /* 10 ms tolerance */ - GUEST_ASSERT(r1 >= t1 && r1 - t1 < 100000); - nop_loop(); - - t2 = get_tscpage_ts(tsc_page); - r2 = rdmsr(HV_X64_MSR_TIME_REF_COUNT); - GUEST_ASSERT(r2 >= t1 && r2 - t2 < 100000); -} - -static void guest_main(struct ms_hyperv_tsc_page *tsc_page, vm_paddr_t tsc_page_gpa) -{ - u64 tsc_scale, tsc_offset; - - /* Set Guest OS id to enable Hyper-V emulation */ - GUEST_SYNC(1); - wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID); - GUEST_SYNC(2); - - check_tsc_msr_rdtsc(); - - GUEST_SYNC(3); - - /* Set up TSC page is disabled state, check that it's clean */ - wrmsr(HV_X64_MSR_REFERENCE_TSC, tsc_page_gpa); - GUEST_ASSERT(tsc_page->tsc_sequence == 0); - GUEST_ASSERT(tsc_page->tsc_scale == 0); - GUEST_ASSERT(tsc_page->tsc_offset == 0); - - GUEST_SYNC(4); - - /* Set up TSC page is enabled state */ - wrmsr(HV_X64_MSR_REFERENCE_TSC, tsc_page_gpa | 0x1); - GUEST_ASSERT(tsc_page->tsc_sequence != 0); - - GUEST_SYNC(5); - - check_tsc_msr_tsc_page(tsc_page); - - GUEST_SYNC(6); - - tsc_offset = tsc_page->tsc_offset; - /* Call KVM_SET_CLOCK from userspace, check that TSC page was updated */ - - GUEST_SYNC(7); - /* Sanity check TSC page timestamp, it should be close to 0 */ - GUEST_ASSERT(get_tscpage_ts(tsc_page) < 100000); - - GUEST_ASSERT(tsc_page->tsc_offset != tsc_offset); - - nop_loop(); - - /* - * Enable Re-enlightenment and check that TSC page stays constant across - * KVM_SET_CLOCK. - */ - wrmsr(HV_X64_MSR_REENLIGHTENMENT_CONTROL, 0x1 << 16 | 0xff); - wrmsr(HV_X64_MSR_TSC_EMULATION_CONTROL, 0x1); - tsc_offset = tsc_page->tsc_offset; - tsc_scale = tsc_page->tsc_scale; - GUEST_SYNC(8); - GUEST_ASSERT(tsc_page->tsc_offset == tsc_offset); - GUEST_ASSERT(tsc_page->tsc_scale == tsc_scale); - - GUEST_SYNC(9); - - check_tsc_msr_tsc_page(tsc_page); - - /* - * Disable re-enlightenment and TSC page, check that KVM doesn't update - * it anymore. - */ - wrmsr(HV_X64_MSR_REENLIGHTENMENT_CONTROL, 0); - wrmsr(HV_X64_MSR_TSC_EMULATION_CONTROL, 0); - wrmsr(HV_X64_MSR_REFERENCE_TSC, 0); - memset(tsc_page, 0, sizeof(*tsc_page)); - - GUEST_SYNC(10); - GUEST_ASSERT(tsc_page->tsc_sequence == 0); - GUEST_ASSERT(tsc_page->tsc_offset == 0); - GUEST_ASSERT(tsc_page->tsc_scale == 0); - - GUEST_DONE(); -} - -static void host_check_tsc_msr_rdtsc(struct kvm_vcpu *vcpu) -{ - u64 tsc_freq, r1, r2, t1, t2; - s64 delta_ns; - - tsc_freq = vcpu_get_msr(vcpu, HV_X64_MSR_TSC_FREQUENCY); - TEST_ASSERT(tsc_freq > 0, "TSC frequency must be nonzero"); - - /* For increased accuracy, take mean rdtsc() before and afrer ioctl */ - r1 = rdtsc(); - t1 = vcpu_get_msr(vcpu, HV_X64_MSR_TIME_REF_COUNT); - r1 = (r1 + rdtsc()) / 2; - nop_loop(); - r2 = rdtsc(); - t2 = vcpu_get_msr(vcpu, HV_X64_MSR_TIME_REF_COUNT); - r2 = (r2 + rdtsc()) / 2; - - TEST_ASSERT(t2 > t1, "Time reference MSR is not monotonic (%ld <= %ld)", t1, t2); - - /* HV_X64_MSR_TIME_REF_COUNT is in 100ns */ - delta_ns = ((t2 - t1) * 100) - ((r2 - r1) * 1000000000 / tsc_freq); - if (delta_ns < 0) - delta_ns = -delta_ns; - - /* 1% tolerance */ - TEST_ASSERT(delta_ns * 100 < (t2 - t1) * 100, - "Elapsed time does not match (MSR=%ld, TSC=%ld)", - (t2 - t1) * 100, (r2 - r1) * 1000000000 / tsc_freq); -} - -int main(void) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - struct ucall uc; - vm_vaddr_t tsc_page_gva; - int stage; - - TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_TIME)); - TEST_REQUIRE(sys_clocksource_is_based_on_tsc()); - - vm = vm_create_with_one_vcpu(&vcpu, guest_main); - - vcpu_set_hv_cpuid(vcpu); - - tsc_page_gva = vm_vaddr_alloc_page(vm); - memset(addr_gva2hva(vm, tsc_page_gva), 0x0, getpagesize()); - TEST_ASSERT((addr_gva2gpa(vm, tsc_page_gva) & (getpagesize() - 1)) == 0, - "TSC page has to be page aligned"); - vcpu_args_set(vcpu, 2, tsc_page_gva, addr_gva2gpa(vm, tsc_page_gva)); - - host_check_tsc_msr_rdtsc(vcpu); - - for (stage = 1;; stage++) { - vcpu_run(vcpu); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); - - switch (get_ucall(vcpu, &uc)) { - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - /* NOT REACHED */ - case UCALL_SYNC: - break; - case UCALL_DONE: - /* Keep in sync with guest_main() */ - TEST_ASSERT(stage == 11, "Testing ended prematurely, stage %d", - stage); - goto out; - default: - TEST_FAIL("Unknown ucall %lu", uc.cmd); - } - - TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") && - uc.args[1] == stage, - "Stage %d: Unexpected register values vmexit, got %lx", - stage, (ulong)uc.args[1]); - - /* Reset kvmclock triggering TSC page update */ - if (stage == 7 || stage == 8 || stage == 10) { - struct kvm_clock_data clock = {0}; - - vm_ioctl(vm, KVM_SET_CLOCK, &clock); - } - } - -out: - kvm_vm_free(vm); -} diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c deleted file mode 100644 index 4f5881d4ef66..000000000000 --- a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c +++ /dev/null @@ -1,172 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Test for x86 KVM_CAP_HYPERV_CPUID - * - * Copyright (C) 2018, Red Hat, Inc. - * - * This work is licensed under the terms of the GNU GPL, version 2. - * - */ -#include -#include -#include -#include -#include - -#include "test_util.h" -#include "kvm_util.h" -#include "processor.h" -#include "vmx.h" - -static void guest_code(void) -{ -} - -static bool smt_possible(void) -{ - char buf[16]; - FILE *f; - bool res = true; - - f = fopen("/sys/devices/system/cpu/smt/control", "r"); - if (f) { - if (fread(buf, sizeof(*buf), sizeof(buf), f) > 0) { - if (!strncmp(buf, "forceoff", 8) || - !strncmp(buf, "notsupported", 12)) - res = false; - } - fclose(f); - } - - return res; -} - -static void test_hv_cpuid(const struct kvm_cpuid2 *hv_cpuid_entries, - bool evmcs_expected) -{ - int i; - int nent_expected = 10; - u32 test_val; - - TEST_ASSERT(hv_cpuid_entries->nent == nent_expected, - "KVM_GET_SUPPORTED_HV_CPUID should return %d entries" - " (returned %d)", - nent_expected, hv_cpuid_entries->nent); - - for (i = 0; i < hv_cpuid_entries->nent; i++) { - const struct kvm_cpuid_entry2 *entry = &hv_cpuid_entries->entries[i]; - - TEST_ASSERT((entry->function >= 0x40000000) && - (entry->function <= 0x40000082), - "function %x is our of supported range", - entry->function); - - TEST_ASSERT(entry->index == 0, - ".index field should be zero"); - - TEST_ASSERT(entry->flags == 0, - ".flags field should be zero"); - - TEST_ASSERT(!entry->padding[0] && !entry->padding[1] && - !entry->padding[2], "padding should be zero"); - - switch (entry->function) { - case 0x40000000: - test_val = 0x40000082; - - TEST_ASSERT(entry->eax == test_val, - "Wrong max leaf report in 0x40000000.EAX: %x" - " (evmcs=%d)", - entry->eax, evmcs_expected - ); - break; - case 0x40000004: - test_val = entry->eax & (1UL << 18); - - TEST_ASSERT(!!test_val == !smt_possible(), - "NoNonArchitecturalCoreSharing bit" - " doesn't reflect SMT setting"); - break; - case 0x4000000A: - TEST_ASSERT(entry->eax & (1UL << 19), - "Enlightened MSR-Bitmap should always be supported" - " 0x40000000.EAX: %x", entry->eax); - if (evmcs_expected) - TEST_ASSERT((entry->eax & 0xffff) == 0x101, - "Supported Enlightened VMCS version range is supposed to be 1:1" - " 0x40000000.EAX: %x", entry->eax); - - break; - default: - break; - - } - /* - * If needed for debug: - * fprintf(stdout, - * "CPUID%lx EAX=0x%lx EBX=0x%lx ECX=0x%lx EDX=0x%lx\n", - * entry->function, entry->eax, entry->ebx, entry->ecx, - * entry->edx); - */ - } -} - -void test_hv_cpuid_e2big(struct kvm_vm *vm, struct kvm_vcpu *vcpu) -{ - static struct kvm_cpuid2 cpuid = {.nent = 0}; - int ret; - - if (vcpu) - ret = __vcpu_ioctl(vcpu, KVM_GET_SUPPORTED_HV_CPUID, &cpuid); - else - ret = __kvm_ioctl(vm->kvm_fd, KVM_GET_SUPPORTED_HV_CPUID, &cpuid); - - TEST_ASSERT(ret == -1 && errno == E2BIG, - "%s KVM_GET_SUPPORTED_HV_CPUID didn't fail with -E2BIG when" - " it should have: %d %d", !vcpu ? "KVM" : "vCPU", ret, errno); -} - -int main(int argc, char *argv[]) -{ - struct kvm_vm *vm; - const struct kvm_cpuid2 *hv_cpuid_entries; - struct kvm_vcpu *vcpu; - - TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_CPUID)); - - vm = vm_create_with_one_vcpu(&vcpu, guest_code); - - /* Test vCPU ioctl version */ - test_hv_cpuid_e2big(vm, vcpu); - - hv_cpuid_entries = vcpu_get_supported_hv_cpuid(vcpu); - test_hv_cpuid(hv_cpuid_entries, false); - free((void *)hv_cpuid_entries); - - if (!kvm_cpu_has(X86_FEATURE_VMX) || - !kvm_has_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) { - print_skip("Enlightened VMCS is unsupported"); - goto do_sys; - } - vcpu_enable_evmcs(vcpu); - hv_cpuid_entries = vcpu_get_supported_hv_cpuid(vcpu); - test_hv_cpuid(hv_cpuid_entries, true); - free((void *)hv_cpuid_entries); - -do_sys: - /* Test system ioctl version */ - if (!kvm_has_cap(KVM_CAP_SYS_HYPERV_CPUID)) { - print_skip("KVM_CAP_SYS_HYPERV_CPUID not supported"); - goto out; - } - - test_hv_cpuid_e2big(vm, NULL); - - hv_cpuid_entries = kvm_get_supported_hv_cpuid(); - test_hv_cpuid(hv_cpuid_entries, kvm_cpu_has(X86_FEATURE_VMX)); - -out: - kvm_vm_free(vm); - - return 0; -} diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c b/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c deleted file mode 100644 index 74cf19661309..000000000000 --- a/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c +++ /dev/null @@ -1,307 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2018, Red Hat, Inc. - * - * Tests for Enlightened VMCS, including nested guest state. - */ -#include -#include -#include -#include -#include -#include - -#include "test_util.h" - -#include "kvm_util.h" - -#include "hyperv.h" -#include "vmx.h" - -static int ud_count; - -static void guest_ud_handler(struct ex_regs *regs) -{ - ud_count++; - regs->rip += 3; /* VMLAUNCH */ -} - -static void guest_nmi_handler(struct ex_regs *regs) -{ -} - -static inline void rdmsr_from_l2(uint32_t msr) -{ - /* Currently, L1 doesn't preserve GPRs during vmexits. */ - __asm__ __volatile__ ("rdmsr" : : "c"(msr) : - "rax", "rbx", "rdx", "rsi", "rdi", "r8", "r9", - "r10", "r11", "r12", "r13", "r14", "r15"); -} - -/* Exit to L1 from L2 with RDMSR instruction */ -void l2_guest_code(void) -{ - u64 unused; - - GUEST_SYNC(7); - - GUEST_SYNC(8); - - /* Forced exit to L1 upon restore */ - GUEST_SYNC(9); - - vmcall(); - - /* MSR-Bitmap tests */ - rdmsr_from_l2(MSR_FS_BASE); /* intercepted */ - rdmsr_from_l2(MSR_FS_BASE); /* intercepted */ - rdmsr_from_l2(MSR_GS_BASE); /* not intercepted */ - vmcall(); - rdmsr_from_l2(MSR_GS_BASE); /* intercepted */ - - /* L2 TLB flush tests */ - hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT, 0x0, - HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | HV_FLUSH_ALL_PROCESSORS); - rdmsr_from_l2(MSR_FS_BASE); - /* - * Note: hypercall status (RAX) is not preserved correctly by L1 after - * synthetic vmexit, use unchecked version. - */ - __hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT, 0x0, - HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | HV_FLUSH_ALL_PROCESSORS, - &unused); - - /* Done, exit to L1 and never come back. */ - vmcall(); -} - -void guest_code(struct vmx_pages *vmx_pages, struct hyperv_test_pages *hv_pages, - vm_vaddr_t hv_hcall_page_gpa) -{ -#define L2_GUEST_STACK_SIZE 64 - unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; - - wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID); - wrmsr(HV_X64_MSR_HYPERCALL, hv_hcall_page_gpa); - - x2apic_enable(); - - GUEST_SYNC(1); - GUEST_SYNC(2); - - enable_vp_assist(hv_pages->vp_assist_gpa, hv_pages->vp_assist); - evmcs_enable(); - - GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); - GUEST_SYNC(3); - GUEST_ASSERT(load_evmcs(hv_pages)); - GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa); - - GUEST_SYNC(4); - GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa); - - prepare_vmcs(vmx_pages, l2_guest_code, - &l2_guest_stack[L2_GUEST_STACK_SIZE]); - - GUEST_SYNC(5); - GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa); - current_evmcs->revision_id = -1u; - GUEST_ASSERT(vmlaunch()); - current_evmcs->revision_id = EVMCS_VERSION; - GUEST_SYNC(6); - - vmwrite(PIN_BASED_VM_EXEC_CONTROL, vmreadz(PIN_BASED_VM_EXEC_CONTROL) | - PIN_BASED_NMI_EXITING); - - /* L2 TLB flush setup */ - current_evmcs->partition_assist_page = hv_pages->partition_assist_gpa; - current_evmcs->hv_enlightenments_control.nested_flush_hypercall = 1; - current_evmcs->hv_vm_id = 1; - current_evmcs->hv_vp_id = 1; - current_vp_assist->nested_control.features.directhypercall = 1; - *(u32 *)(hv_pages->partition_assist) = 0; - - GUEST_ASSERT(!vmlaunch()); - GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_EXCEPTION_NMI); - GUEST_ASSERT_EQ((vmreadz(VM_EXIT_INTR_INFO) & 0xff), NMI_VECTOR); - GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa); - - /* - * NMI forces L2->L1 exit, resuming L2 and hope that EVMCS is - * up-to-date (RIP points where it should and not at the beginning - * of l2_guest_code(). GUEST_SYNC(9) checkes that. - */ - GUEST_ASSERT(!vmresume()); - - GUEST_SYNC(10); - - GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); - current_evmcs->guest_rip += 3; /* vmcall */ - - /* Intercept RDMSR 0xc0000100 */ - vmwrite(CPU_BASED_VM_EXEC_CONTROL, vmreadz(CPU_BASED_VM_EXEC_CONTROL) | - CPU_BASED_USE_MSR_BITMAPS); - __set_bit(MSR_FS_BASE & 0x1fff, vmx_pages->msr + 0x400); - GUEST_ASSERT(!vmresume()); - GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ); - current_evmcs->guest_rip += 2; /* rdmsr */ - - /* Enable enlightened MSR bitmap */ - current_evmcs->hv_enlightenments_control.msr_bitmap = 1; - GUEST_ASSERT(!vmresume()); - GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ); - current_evmcs->guest_rip += 2; /* rdmsr */ - - /* Intercept RDMSR 0xc0000101 without telling KVM about it */ - __set_bit(MSR_GS_BASE & 0x1fff, vmx_pages->msr + 0x400); - /* Make sure HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP is set */ - current_evmcs->hv_clean_fields |= HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP; - GUEST_ASSERT(!vmresume()); - /* Make sure we don't see EXIT_REASON_MSR_READ here so eMSR bitmap works */ - GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); - current_evmcs->guest_rip += 3; /* vmcall */ - - /* Now tell KVM we've changed MSR-Bitmap */ - current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP; - GUEST_ASSERT(!vmresume()); - GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ); - current_evmcs->guest_rip += 2; /* rdmsr */ - - /* - * L2 TLB flush test. First VMCALL should be handled directly by L0, - * no VMCALL exit expected. - */ - GUEST_ASSERT(!vmresume()); - GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ); - current_evmcs->guest_rip += 2; /* rdmsr */ - /* Enable synthetic vmexit */ - *(u32 *)(hv_pages->partition_assist) = 1; - GUEST_ASSERT(!vmresume()); - GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == HV_VMX_SYNTHETIC_EXIT_REASON_TRAP_AFTER_FLUSH); - - GUEST_ASSERT(!vmresume()); - GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); - GUEST_SYNC(11); - - /* Try enlightened vmptrld with an incorrect GPA */ - evmcs_vmptrld(0xdeadbeef, hv_pages->enlightened_vmcs); - GUEST_ASSERT(vmlaunch()); - GUEST_ASSERT(ud_count == 1); - GUEST_DONE(); -} - -void inject_nmi(struct kvm_vcpu *vcpu) -{ - struct kvm_vcpu_events events; - - vcpu_events_get(vcpu, &events); - - events.nmi.pending = 1; - events.flags |= KVM_VCPUEVENT_VALID_NMI_PENDING; - - vcpu_events_set(vcpu, &events); -} - -static struct kvm_vcpu *save_restore_vm(struct kvm_vm *vm, - struct kvm_vcpu *vcpu) -{ - struct kvm_regs regs1, regs2; - struct kvm_x86_state *state; - - state = vcpu_save_state(vcpu); - memset(®s1, 0, sizeof(regs1)); - vcpu_regs_get(vcpu, ®s1); - - kvm_vm_release(vm); - - /* Restore state in a new VM. */ - vcpu = vm_recreate_with_one_vcpu(vm); - vcpu_set_hv_cpuid(vcpu); - vcpu_enable_evmcs(vcpu); - vcpu_load_state(vcpu, state); - kvm_x86_state_cleanup(state); - - memset(®s2, 0, sizeof(regs2)); - vcpu_regs_get(vcpu, ®s2); - TEST_ASSERT(!memcmp(®s1, ®s2, sizeof(regs2)), - "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx", - (ulong) regs2.rdi, (ulong) regs2.rsi); - return vcpu; -} - -int main(int argc, char *argv[]) -{ - vm_vaddr_t vmx_pages_gva = 0, hv_pages_gva = 0; - vm_vaddr_t hcall_page; - - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - struct ucall uc; - int stage; - - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); - TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE)); - TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)); - TEST_REQUIRE(kvm_hv_cpu_has(HV_X64_NESTED_DIRECT_FLUSH)); - - vm = vm_create_with_one_vcpu(&vcpu, guest_code); - - hcall_page = vm_vaddr_alloc_pages(vm, 1); - memset(addr_gva2hva(vm, hcall_page), 0x0, getpagesize()); - - vcpu_set_hv_cpuid(vcpu); - vcpu_enable_evmcs(vcpu); - - vcpu_alloc_vmx(vm, &vmx_pages_gva); - vcpu_alloc_hyperv_test_pages(vm, &hv_pages_gva); - vcpu_args_set(vcpu, 3, vmx_pages_gva, hv_pages_gva, addr_gva2gpa(vm, hcall_page)); - vcpu_set_msr(vcpu, HV_X64_MSR_VP_INDEX, vcpu->id); - - vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler); - vm_install_exception_handler(vm, NMI_VECTOR, guest_nmi_handler); - - pr_info("Running L1 which uses EVMCS to run L2\n"); - - for (stage = 1;; stage++) { - vcpu_run(vcpu); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); - - switch (get_ucall(vcpu, &uc)) { - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - /* NOT REACHED */ - case UCALL_SYNC: - break; - case UCALL_DONE: - goto done; - default: - TEST_FAIL("Unknown ucall %lu", uc.cmd); - } - - /* UCALL_SYNC is handled here. */ - TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") && - uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx", - stage, (ulong)uc.args[1]); - - vcpu = save_restore_vm(vm, vcpu); - - /* Force immediate L2->L1 exit before resuming */ - if (stage == 8) { - pr_info("Injecting NMI into L1 before L2 had a chance to run after restore\n"); - inject_nmi(vcpu); - } - - /* - * Do KVM_GET_NESTED_STATE/KVM_SET_NESTED_STATE for a freshly - * restored VM (before the first KVM_RUN) to check that - * KVM_STATE_NESTED_EVMCS is not lost. - */ - if (stage == 9) { - pr_info("Trying extra KVM_GET_NESTED_STATE/KVM_SET_NESTED_STATE cycle\n"); - vcpu = save_restore_vm(vm, vcpu); - } - } - -done: - kvm_vm_free(vm); -} diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_extended_hypercalls.c b/tools/testing/selftests/kvm/x86_64/hyperv_extended_hypercalls.c deleted file mode 100644 index 949e08e98f31..000000000000 --- a/tools/testing/selftests/kvm/x86_64/hyperv_extended_hypercalls.c +++ /dev/null @@ -1,98 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Test Hyper-V extended hypercall, HV_EXT_CALL_QUERY_CAPABILITIES (0x8001), - * exit to userspace and receive result in guest. - * - * Negative tests are present in hyperv_features.c - * - * Copyright 2022 Google LLC - * Author: Vipin Sharma - */ -#include "kvm_util.h" -#include "processor.h" -#include "hyperv.h" - -/* Any value is fine */ -#define EXT_CAPABILITIES 0xbull - -static void guest_code(vm_paddr_t in_pg_gpa, vm_paddr_t out_pg_gpa, - vm_vaddr_t out_pg_gva) -{ - uint64_t *output_gva; - - wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID); - wrmsr(HV_X64_MSR_HYPERCALL, in_pg_gpa); - - output_gva = (uint64_t *)out_pg_gva; - - hyperv_hypercall(HV_EXT_CALL_QUERY_CAPABILITIES, in_pg_gpa, out_pg_gpa); - - /* TLFS states output will be a uint64_t value */ - GUEST_ASSERT_EQ(*output_gva, EXT_CAPABILITIES); - - GUEST_DONE(); -} - -int main(void) -{ - vm_vaddr_t hcall_out_page; - vm_vaddr_t hcall_in_page; - struct kvm_vcpu *vcpu; - struct kvm_run *run; - struct kvm_vm *vm; - uint64_t *outval; - struct ucall uc; - - TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_CPUID)); - - /* Verify if extended hypercalls are supported */ - if (!kvm_cpuid_has(kvm_get_supported_hv_cpuid(), - HV_ENABLE_EXTENDED_HYPERCALLS)) { - print_skip("Extended calls not supported by the kernel"); - exit(KSFT_SKIP); - } - - vm = vm_create_with_one_vcpu(&vcpu, guest_code); - run = vcpu->run; - vcpu_set_hv_cpuid(vcpu); - - /* Hypercall input */ - hcall_in_page = vm_vaddr_alloc_pages(vm, 1); - memset(addr_gva2hva(vm, hcall_in_page), 0x0, vm->page_size); - - /* Hypercall output */ - hcall_out_page = vm_vaddr_alloc_pages(vm, 1); - memset(addr_gva2hva(vm, hcall_out_page), 0x0, vm->page_size); - - vcpu_args_set(vcpu, 3, addr_gva2gpa(vm, hcall_in_page), - addr_gva2gpa(vm, hcall_out_page), hcall_out_page); - - vcpu_run(vcpu); - - TEST_ASSERT(run->exit_reason == KVM_EXIT_HYPERV, - "Unexpected exit reason: %u (%s)", - run->exit_reason, exit_reason_str(run->exit_reason)); - - outval = addr_gpa2hva(vm, run->hyperv.u.hcall.params[1]); - *outval = EXT_CAPABILITIES; - run->hyperv.u.hcall.result = HV_STATUS_SUCCESS; - - vcpu_run(vcpu); - - TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, - "Unexpected exit reason: %u (%s)", - run->exit_reason, exit_reason_str(run->exit_reason)); - - switch (get_ucall(vcpu, &uc)) { - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - break; - case UCALL_DONE: - break; - default: - TEST_FAIL("Unhandled ucall: %ld", uc.cmd); - } - - kvm_vm_free(vm); - return 0; -} diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c deleted file mode 100644 index 068e9c69710d..000000000000 --- a/tools/testing/selftests/kvm/x86_64/hyperv_features.c +++ /dev/null @@ -1,695 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2021, Red Hat, Inc. - * - * Tests for Hyper-V features enablement - */ -#include -#include -#include - -#include "test_util.h" -#include "kvm_util.h" -#include "processor.h" -#include "hyperv.h" - -/* - * HYPERV_CPUID_ENLIGHTMENT_INFO.EBX is not a 'feature' CPUID leaf - * but to activate the feature it is sufficient to set it to a non-zero - * value. Use BIT(0) for that. - */ -#define HV_PV_SPINLOCKS_TEST \ - KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EBX, 0) - -struct msr_data { - uint32_t idx; - bool fault_expected; - bool write; - u64 write_val; -}; - -struct hcall_data { - uint64_t control; - uint64_t expect; - bool ud_expected; -}; - -static bool is_write_only_msr(uint32_t msr) -{ - return msr == HV_X64_MSR_EOI; -} - -static void guest_msr(struct msr_data *msr) -{ - uint8_t vector = 0; - uint64_t msr_val = 0; - - GUEST_ASSERT(msr->idx); - - if (msr->write) - vector = wrmsr_safe(msr->idx, msr->write_val); - - if (!vector && (!msr->write || !is_write_only_msr(msr->idx))) - vector = rdmsr_safe(msr->idx, &msr_val); - - if (msr->fault_expected) - __GUEST_ASSERT(vector == GP_VECTOR, - "Expected #GP on %sMSR(0x%x), got vector '0x%x'", - msr->write ? "WR" : "RD", msr->idx, vector); - else - __GUEST_ASSERT(!vector, - "Expected success on %sMSR(0x%x), got vector '0x%x'", - msr->write ? "WR" : "RD", msr->idx, vector); - - if (vector || is_write_only_msr(msr->idx)) - goto done; - - if (msr->write) - __GUEST_ASSERT(!vector, - "WRMSR(0x%x) to '0x%lx', RDMSR read '0x%lx'", - msr->idx, msr->write_val, msr_val); - - /* Invariant TSC bit appears when TSC invariant control MSR is written to */ - if (msr->idx == HV_X64_MSR_TSC_INVARIANT_CONTROL) { - if (!this_cpu_has(HV_ACCESS_TSC_INVARIANT)) - GUEST_ASSERT(this_cpu_has(X86_FEATURE_INVTSC)); - else - GUEST_ASSERT(this_cpu_has(X86_FEATURE_INVTSC) == - !!(msr_val & HV_INVARIANT_TSC_EXPOSED)); - } - -done: - GUEST_DONE(); -} - -static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall) -{ - u64 res, input, output; - uint8_t vector; - - GUEST_ASSERT_NE(hcall->control, 0); - - wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID); - wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa); - - if (!(hcall->control & HV_HYPERCALL_FAST_BIT)) { - input = pgs_gpa; - output = pgs_gpa + 4096; - } else { - input = output = 0; - } - - vector = __hyperv_hypercall(hcall->control, input, output, &res); - if (hcall->ud_expected) { - __GUEST_ASSERT(vector == UD_VECTOR, - "Expected #UD for control '%lu', got vector '0x%x'", - hcall->control, vector); - } else { - __GUEST_ASSERT(!vector, - "Expected no exception for control '%lu', got vector '0x%x'", - hcall->control, vector); - GUEST_ASSERT_EQ(res, hcall->expect); - } - - GUEST_DONE(); -} - -static void vcpu_reset_hv_cpuid(struct kvm_vcpu *vcpu) -{ - /* - * Enable all supported Hyper-V features, then clear the leafs holding - * the features that will be tested one by one. - */ - vcpu_set_hv_cpuid(vcpu); - - vcpu_clear_cpuid_entry(vcpu, HYPERV_CPUID_FEATURES); - vcpu_clear_cpuid_entry(vcpu, HYPERV_CPUID_ENLIGHTMENT_INFO); - vcpu_clear_cpuid_entry(vcpu, HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES); -} - -static void guest_test_msrs_access(void) -{ - struct kvm_cpuid2 *prev_cpuid = NULL; - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - struct ucall uc; - int stage = 0; - vm_vaddr_t msr_gva; - struct msr_data *msr; - bool has_invtsc = kvm_cpu_has(X86_FEATURE_INVTSC); - - while (true) { - vm = vm_create_with_one_vcpu(&vcpu, guest_msr); - - msr_gva = vm_vaddr_alloc_page(vm); - memset(addr_gva2hva(vm, msr_gva), 0x0, getpagesize()); - msr = addr_gva2hva(vm, msr_gva); - - vcpu_args_set(vcpu, 1, msr_gva); - vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_ENFORCE_CPUID, 1); - - if (!prev_cpuid) { - vcpu_reset_hv_cpuid(vcpu); - - prev_cpuid = allocate_kvm_cpuid2(vcpu->cpuid->nent); - } else { - vcpu_init_cpuid(vcpu, prev_cpuid); - } - - /* TODO: Make this entire test easier to maintain. */ - if (stage >= 21) - vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_SYNIC2, 0); - - switch (stage) { - case 0: - /* - * Only available when Hyper-V identification is set - */ - msr->idx = HV_X64_MSR_GUEST_OS_ID; - msr->write = false; - msr->fault_expected = true; - break; - case 1: - msr->idx = HV_X64_MSR_HYPERCALL; - msr->write = false; - msr->fault_expected = true; - break; - case 2: - vcpu_set_cpuid_feature(vcpu, HV_MSR_HYPERCALL_AVAILABLE); - /* - * HV_X64_MSR_GUEST_OS_ID has to be written first to make - * HV_X64_MSR_HYPERCALL available. - */ - msr->idx = HV_X64_MSR_GUEST_OS_ID; - msr->write = true; - msr->write_val = HYPERV_LINUX_OS_ID; - msr->fault_expected = false; - break; - case 3: - msr->idx = HV_X64_MSR_GUEST_OS_ID; - msr->write = false; - msr->fault_expected = false; - break; - case 4: - msr->idx = HV_X64_MSR_HYPERCALL; - msr->write = false; - msr->fault_expected = false; - break; - - case 5: - msr->idx = HV_X64_MSR_VP_RUNTIME; - msr->write = false; - msr->fault_expected = true; - break; - case 6: - vcpu_set_cpuid_feature(vcpu, HV_MSR_VP_RUNTIME_AVAILABLE); - msr->idx = HV_X64_MSR_VP_RUNTIME; - msr->write = false; - msr->fault_expected = false; - break; - case 7: - /* Read only */ - msr->idx = HV_X64_MSR_VP_RUNTIME; - msr->write = true; - msr->write_val = 1; - msr->fault_expected = true; - break; - - case 8: - msr->idx = HV_X64_MSR_TIME_REF_COUNT; - msr->write = false; - msr->fault_expected = true; - break; - case 9: - vcpu_set_cpuid_feature(vcpu, HV_MSR_TIME_REF_COUNT_AVAILABLE); - msr->idx = HV_X64_MSR_TIME_REF_COUNT; - msr->write = false; - msr->fault_expected = false; - break; - case 10: - /* Read only */ - msr->idx = HV_X64_MSR_TIME_REF_COUNT; - msr->write = true; - msr->write_val = 1; - msr->fault_expected = true; - break; - - case 11: - msr->idx = HV_X64_MSR_VP_INDEX; - msr->write = false; - msr->fault_expected = true; - break; - case 12: - vcpu_set_cpuid_feature(vcpu, HV_MSR_VP_INDEX_AVAILABLE); - msr->idx = HV_X64_MSR_VP_INDEX; - msr->write = false; - msr->fault_expected = false; - break; - case 13: - /* Read only */ - msr->idx = HV_X64_MSR_VP_INDEX; - msr->write = true; - msr->write_val = 1; - msr->fault_expected = true; - break; - - case 14: - msr->idx = HV_X64_MSR_RESET; - msr->write = false; - msr->fault_expected = true; - break; - case 15: - vcpu_set_cpuid_feature(vcpu, HV_MSR_RESET_AVAILABLE); - msr->idx = HV_X64_MSR_RESET; - msr->write = false; - msr->fault_expected = false; - break; - case 16: - msr->idx = HV_X64_MSR_RESET; - msr->write = true; - /* - * TODO: the test only writes '0' to HV_X64_MSR_RESET - * at the moment, writing some other value there will - * trigger real vCPU reset and the code is not prepared - * to handle it yet. - */ - msr->write_val = 0; - msr->fault_expected = false; - break; - - case 17: - msr->idx = HV_X64_MSR_REFERENCE_TSC; - msr->write = false; - msr->fault_expected = true; - break; - case 18: - vcpu_set_cpuid_feature(vcpu, HV_MSR_REFERENCE_TSC_AVAILABLE); - msr->idx = HV_X64_MSR_REFERENCE_TSC; - msr->write = false; - msr->fault_expected = false; - break; - case 19: - msr->idx = HV_X64_MSR_REFERENCE_TSC; - msr->write = true; - msr->write_val = 0; - msr->fault_expected = false; - break; - - case 20: - msr->idx = HV_X64_MSR_EOM; - msr->write = false; - msr->fault_expected = true; - break; - case 21: - /* - * Remains unavailable even with KVM_CAP_HYPERV_SYNIC2 - * capability enabled and guest visible CPUID bit unset. - */ - msr->idx = HV_X64_MSR_EOM; - msr->write = false; - msr->fault_expected = true; - break; - case 22: - vcpu_set_cpuid_feature(vcpu, HV_MSR_SYNIC_AVAILABLE); - msr->idx = HV_X64_MSR_EOM; - msr->write = false; - msr->fault_expected = false; - break; - case 23: - msr->idx = HV_X64_MSR_EOM; - msr->write = true; - msr->write_val = 0; - msr->fault_expected = false; - break; - - case 24: - msr->idx = HV_X64_MSR_STIMER0_CONFIG; - msr->write = false; - msr->fault_expected = true; - break; - case 25: - vcpu_set_cpuid_feature(vcpu, HV_MSR_SYNTIMER_AVAILABLE); - msr->idx = HV_X64_MSR_STIMER0_CONFIG; - msr->write = false; - msr->fault_expected = false; - break; - case 26: - msr->idx = HV_X64_MSR_STIMER0_CONFIG; - msr->write = true; - msr->write_val = 0; - msr->fault_expected = false; - break; - case 27: - /* Direct mode test */ - msr->idx = HV_X64_MSR_STIMER0_CONFIG; - msr->write = true; - msr->write_val = 1 << 12; - msr->fault_expected = true; - break; - case 28: - vcpu_set_cpuid_feature(vcpu, HV_STIMER_DIRECT_MODE_AVAILABLE); - msr->idx = HV_X64_MSR_STIMER0_CONFIG; - msr->write = true; - msr->write_val = 1 << 12; - msr->fault_expected = false; - break; - - case 29: - msr->idx = HV_X64_MSR_EOI; - msr->write = false; - msr->fault_expected = true; - break; - case 30: - vcpu_set_cpuid_feature(vcpu, HV_MSR_APIC_ACCESS_AVAILABLE); - msr->idx = HV_X64_MSR_EOI; - msr->write = true; - msr->write_val = 1; - msr->fault_expected = false; - break; - - case 31: - msr->idx = HV_X64_MSR_TSC_FREQUENCY; - msr->write = false; - msr->fault_expected = true; - break; - case 32: - vcpu_set_cpuid_feature(vcpu, HV_ACCESS_FREQUENCY_MSRS); - msr->idx = HV_X64_MSR_TSC_FREQUENCY; - msr->write = false; - msr->fault_expected = false; - break; - case 33: - /* Read only */ - msr->idx = HV_X64_MSR_TSC_FREQUENCY; - msr->write = true; - msr->write_val = 1; - msr->fault_expected = true; - break; - - case 34: - msr->idx = HV_X64_MSR_REENLIGHTENMENT_CONTROL; - msr->write = false; - msr->fault_expected = true; - break; - case 35: - vcpu_set_cpuid_feature(vcpu, HV_ACCESS_REENLIGHTENMENT); - msr->idx = HV_X64_MSR_REENLIGHTENMENT_CONTROL; - msr->write = false; - msr->fault_expected = false; - break; - case 36: - msr->idx = HV_X64_MSR_REENLIGHTENMENT_CONTROL; - msr->write = true; - msr->write_val = 1; - msr->fault_expected = false; - break; - case 37: - /* Can only write '0' */ - msr->idx = HV_X64_MSR_TSC_EMULATION_STATUS; - msr->write = true; - msr->write_val = 1; - msr->fault_expected = true; - break; - - case 38: - msr->idx = HV_X64_MSR_CRASH_P0; - msr->write = false; - msr->fault_expected = true; - break; - case 39: - vcpu_set_cpuid_feature(vcpu, HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE); - msr->idx = HV_X64_MSR_CRASH_P0; - msr->write = false; - msr->fault_expected = false; - break; - case 40: - msr->idx = HV_X64_MSR_CRASH_P0; - msr->write = true; - msr->write_val = 1; - msr->fault_expected = false; - break; - - case 41: - msr->idx = HV_X64_MSR_SYNDBG_STATUS; - msr->write = false; - msr->fault_expected = true; - break; - case 42: - vcpu_set_cpuid_feature(vcpu, HV_FEATURE_DEBUG_MSRS_AVAILABLE); - vcpu_set_cpuid_feature(vcpu, HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING); - msr->idx = HV_X64_MSR_SYNDBG_STATUS; - msr->write = false; - msr->fault_expected = false; - break; - case 43: - msr->idx = HV_X64_MSR_SYNDBG_STATUS; - msr->write = true; - msr->write_val = 0; - msr->fault_expected = false; - break; - - case 44: - /* MSR is not available when CPUID feature bit is unset */ - if (!has_invtsc) - goto next_stage; - msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL; - msr->write = false; - msr->fault_expected = true; - break; - case 45: - /* MSR is vailable when CPUID feature bit is set */ - if (!has_invtsc) - goto next_stage; - vcpu_set_cpuid_feature(vcpu, HV_ACCESS_TSC_INVARIANT); - msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL; - msr->write = false; - msr->fault_expected = false; - break; - case 46: - /* Writing bits other than 0 is forbidden */ - if (!has_invtsc) - goto next_stage; - msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL; - msr->write = true; - msr->write_val = 0xdeadbeef; - msr->fault_expected = true; - break; - case 47: - /* Setting bit 0 enables the feature */ - if (!has_invtsc) - goto next_stage; - msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL; - msr->write = true; - msr->write_val = 1; - msr->fault_expected = false; - break; - - default: - kvm_vm_free(vm); - return; - } - - vcpu_set_cpuid(vcpu); - - memcpy(prev_cpuid, vcpu->cpuid, kvm_cpuid2_size(vcpu->cpuid->nent)); - - pr_debug("Stage %d: testing msr: 0x%x for %s\n", stage, - msr->idx, msr->write ? "write" : "read"); - - vcpu_run(vcpu); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); - - switch (get_ucall(vcpu, &uc)) { - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - return; - case UCALL_DONE: - break; - default: - TEST_FAIL("Unhandled ucall: %ld", uc.cmd); - return; - } - -next_stage: - stage++; - kvm_vm_free(vm); - } -} - -static void guest_test_hcalls_access(void) -{ - struct kvm_cpuid2 *prev_cpuid = NULL; - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - struct ucall uc; - int stage = 0; - vm_vaddr_t hcall_page, hcall_params; - struct hcall_data *hcall; - - while (true) { - vm = vm_create_with_one_vcpu(&vcpu, guest_hcall); - - /* Hypercall input/output */ - hcall_page = vm_vaddr_alloc_pages(vm, 2); - memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize()); - - hcall_params = vm_vaddr_alloc_page(vm); - memset(addr_gva2hva(vm, hcall_params), 0x0, getpagesize()); - hcall = addr_gva2hva(vm, hcall_params); - - vcpu_args_set(vcpu, 2, addr_gva2gpa(vm, hcall_page), hcall_params); - vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_ENFORCE_CPUID, 1); - - if (!prev_cpuid) { - vcpu_reset_hv_cpuid(vcpu); - - prev_cpuid = allocate_kvm_cpuid2(vcpu->cpuid->nent); - } else { - vcpu_init_cpuid(vcpu, prev_cpuid); - } - - switch (stage) { - case 0: - vcpu_set_cpuid_feature(vcpu, HV_MSR_HYPERCALL_AVAILABLE); - hcall->control = 0xbeef; - hcall->expect = HV_STATUS_INVALID_HYPERCALL_CODE; - break; - - case 1: - hcall->control = HVCALL_POST_MESSAGE; - hcall->expect = HV_STATUS_ACCESS_DENIED; - break; - case 2: - vcpu_set_cpuid_feature(vcpu, HV_POST_MESSAGES); - hcall->control = HVCALL_POST_MESSAGE; - hcall->expect = HV_STATUS_INVALID_HYPERCALL_INPUT; - break; - - case 3: - hcall->control = HVCALL_SIGNAL_EVENT; - hcall->expect = HV_STATUS_ACCESS_DENIED; - break; - case 4: - vcpu_set_cpuid_feature(vcpu, HV_SIGNAL_EVENTS); - hcall->control = HVCALL_SIGNAL_EVENT; - hcall->expect = HV_STATUS_INVALID_HYPERCALL_INPUT; - break; - - case 5: - hcall->control = HVCALL_RESET_DEBUG_SESSION; - hcall->expect = HV_STATUS_INVALID_HYPERCALL_CODE; - break; - case 6: - vcpu_set_cpuid_feature(vcpu, HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING); - hcall->control = HVCALL_RESET_DEBUG_SESSION; - hcall->expect = HV_STATUS_ACCESS_DENIED; - break; - case 7: - vcpu_set_cpuid_feature(vcpu, HV_DEBUGGING); - hcall->control = HVCALL_RESET_DEBUG_SESSION; - hcall->expect = HV_STATUS_OPERATION_DENIED; - break; - - case 8: - hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE; - hcall->expect = HV_STATUS_ACCESS_DENIED; - break; - case 9: - vcpu_set_cpuid_feature(vcpu, HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED); - hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE; - hcall->expect = HV_STATUS_SUCCESS; - break; - case 10: - hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX; - hcall->expect = HV_STATUS_ACCESS_DENIED; - break; - case 11: - vcpu_set_cpuid_feature(vcpu, HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED); - hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX; - hcall->expect = HV_STATUS_SUCCESS; - break; - - case 12: - hcall->control = HVCALL_SEND_IPI; - hcall->expect = HV_STATUS_ACCESS_DENIED; - break; - case 13: - vcpu_set_cpuid_feature(vcpu, HV_X64_CLUSTER_IPI_RECOMMENDED); - hcall->control = HVCALL_SEND_IPI; - hcall->expect = HV_STATUS_INVALID_HYPERCALL_INPUT; - break; - case 14: - /* Nothing in 'sparse banks' -> success */ - hcall->control = HVCALL_SEND_IPI_EX; - hcall->expect = HV_STATUS_SUCCESS; - break; - - case 15: - hcall->control = HVCALL_NOTIFY_LONG_SPIN_WAIT; - hcall->expect = HV_STATUS_ACCESS_DENIED; - break; - case 16: - vcpu_set_cpuid_feature(vcpu, HV_PV_SPINLOCKS_TEST); - hcall->control = HVCALL_NOTIFY_LONG_SPIN_WAIT; - hcall->expect = HV_STATUS_SUCCESS; - break; - case 17: - /* XMM fast hypercall */ - hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT; - hcall->ud_expected = true; - break; - case 18: - vcpu_set_cpuid_feature(vcpu, HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE); - hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT; - hcall->ud_expected = false; - hcall->expect = HV_STATUS_SUCCESS; - break; - case 19: - hcall->control = HV_EXT_CALL_QUERY_CAPABILITIES; - hcall->expect = HV_STATUS_ACCESS_DENIED; - break; - case 20: - vcpu_set_cpuid_feature(vcpu, HV_ENABLE_EXTENDED_HYPERCALLS); - hcall->control = HV_EXT_CALL_QUERY_CAPABILITIES | HV_HYPERCALL_FAST_BIT; - hcall->expect = HV_STATUS_INVALID_PARAMETER; - break; - case 21: - kvm_vm_free(vm); - return; - } - - vcpu_set_cpuid(vcpu); - - memcpy(prev_cpuid, vcpu->cpuid, kvm_cpuid2_size(vcpu->cpuid->nent)); - - pr_debug("Stage %d: testing hcall: 0x%lx\n", stage, hcall->control); - - vcpu_run(vcpu); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); - - switch (get_ucall(vcpu, &uc)) { - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - return; - case UCALL_DONE: - break; - default: - TEST_FAIL("Unhandled ucall: %ld", uc.cmd); - return; - } - - stage++; - kvm_vm_free(vm); - } -} - -int main(void) -{ - TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_ENFORCE_CPUID)); - - pr_info("Testing access to Hyper-V specific MSRs\n"); - guest_test_msrs_access(); - - pr_info("Testing access to Hyper-V hypercalls\n"); - guest_test_hcalls_access(); -} diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c b/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c deleted file mode 100644 index 22c0c124582f..000000000000 --- a/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c +++ /dev/null @@ -1,308 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Hyper-V HvCallSendSyntheticClusterIpi{,Ex} tests - * - * Copyright (C) 2022, Red Hat, Inc. - * - */ -#include -#include - -#include "kvm_util.h" -#include "hyperv.h" -#include "test_util.h" -#include "vmx.h" - -#define RECEIVER_VCPU_ID_1 2 -#define RECEIVER_VCPU_ID_2 65 - -#define IPI_VECTOR 0xfe - -static volatile uint64_t ipis_rcvd[RECEIVER_VCPU_ID_2 + 1]; - -struct hv_vpset { - u64 format; - u64 valid_bank_mask; - u64 bank_contents[2]; -}; - -enum HV_GENERIC_SET_FORMAT { - HV_GENERIC_SET_SPARSE_4K, - HV_GENERIC_SET_ALL, -}; - -/* HvCallSendSyntheticClusterIpi hypercall */ -struct hv_send_ipi { - u32 vector; - u32 reserved; - u64 cpu_mask; -}; - -/* HvCallSendSyntheticClusterIpiEx hypercall */ -struct hv_send_ipi_ex { - u32 vector; - u32 reserved; - struct hv_vpset vp_set; -}; - -static inline void hv_init(vm_vaddr_t pgs_gpa) -{ - wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID); - wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa); -} - -static void receiver_code(void *hcall_page, vm_vaddr_t pgs_gpa) -{ - u32 vcpu_id; - - x2apic_enable(); - hv_init(pgs_gpa); - - vcpu_id = rdmsr(HV_X64_MSR_VP_INDEX); - - /* Signal sender vCPU we're ready */ - ipis_rcvd[vcpu_id] = (u64)-1; - - for (;;) - asm volatile("sti; hlt; cli"); -} - -static void guest_ipi_handler(struct ex_regs *regs) -{ - u32 vcpu_id = rdmsr(HV_X64_MSR_VP_INDEX); - - ipis_rcvd[vcpu_id]++; - wrmsr(HV_X64_MSR_EOI, 1); -} - -static inline void nop_loop(void) -{ - int i; - - for (i = 0; i < 100000000; i++) - asm volatile("nop"); -} - -static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa) -{ - struct hv_send_ipi *ipi = (struct hv_send_ipi *)hcall_page; - struct hv_send_ipi_ex *ipi_ex = (struct hv_send_ipi_ex *)hcall_page; - int stage = 1, ipis_expected[2] = {0}; - - hv_init(pgs_gpa); - GUEST_SYNC(stage++); - - /* Wait for receiver vCPUs to come up */ - while (!ipis_rcvd[RECEIVER_VCPU_ID_1] || !ipis_rcvd[RECEIVER_VCPU_ID_2]) - nop_loop(); - ipis_rcvd[RECEIVER_VCPU_ID_1] = ipis_rcvd[RECEIVER_VCPU_ID_2] = 0; - - /* 'Slow' HvCallSendSyntheticClusterIpi to RECEIVER_VCPU_ID_1 */ - ipi->vector = IPI_VECTOR; - ipi->cpu_mask = 1 << RECEIVER_VCPU_ID_1; - hyperv_hypercall(HVCALL_SEND_IPI, pgs_gpa, pgs_gpa + 4096); - nop_loop(); - GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]); - GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]); - GUEST_SYNC(stage++); - /* 'Fast' HvCallSendSyntheticClusterIpi to RECEIVER_VCPU_ID_1 */ - hyperv_hypercall(HVCALL_SEND_IPI | HV_HYPERCALL_FAST_BIT, - IPI_VECTOR, 1 << RECEIVER_VCPU_ID_1); - nop_loop(); - GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]); - GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]); - GUEST_SYNC(stage++); - - /* 'Slow' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_1 */ - memset(hcall_page, 0, 4096); - ipi_ex->vector = IPI_VECTOR; - ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K; - ipi_ex->vp_set.valid_bank_mask = 1 << 0; - ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_1); - hyperv_hypercall(HVCALL_SEND_IPI_EX | (1 << HV_HYPERCALL_VARHEAD_OFFSET), - pgs_gpa, pgs_gpa + 4096); - nop_loop(); - GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]); - GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]); - GUEST_SYNC(stage++); - /* 'XMM Fast' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_1 */ - hyperv_write_xmm_input(&ipi_ex->vp_set.valid_bank_mask, 1); - hyperv_hypercall(HVCALL_SEND_IPI_EX | HV_HYPERCALL_FAST_BIT | - (1 << HV_HYPERCALL_VARHEAD_OFFSET), - IPI_VECTOR, HV_GENERIC_SET_SPARSE_4K); - nop_loop(); - GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]); - GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]); - GUEST_SYNC(stage++); - - /* 'Slow' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_2 */ - memset(hcall_page, 0, 4096); - ipi_ex->vector = IPI_VECTOR; - ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K; - ipi_ex->vp_set.valid_bank_mask = 1 << 1; - ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_2 - 64); - hyperv_hypercall(HVCALL_SEND_IPI_EX | (1 << HV_HYPERCALL_VARHEAD_OFFSET), - pgs_gpa, pgs_gpa + 4096); - nop_loop(); - GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ipis_expected[0]); - GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]); - GUEST_SYNC(stage++); - /* 'XMM Fast' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_2 */ - hyperv_write_xmm_input(&ipi_ex->vp_set.valid_bank_mask, 1); - hyperv_hypercall(HVCALL_SEND_IPI_EX | HV_HYPERCALL_FAST_BIT | - (1 << HV_HYPERCALL_VARHEAD_OFFSET), - IPI_VECTOR, HV_GENERIC_SET_SPARSE_4K); - nop_loop(); - GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ipis_expected[0]); - GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]); - GUEST_SYNC(stage++); - - /* 'Slow' HvCallSendSyntheticClusterIpiEx to both RECEIVER_VCPU_ID_{1,2} */ - memset(hcall_page, 0, 4096); - ipi_ex->vector = IPI_VECTOR; - ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K; - ipi_ex->vp_set.valid_bank_mask = 1 << 1 | 1; - ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_1); - ipi_ex->vp_set.bank_contents[1] = BIT(RECEIVER_VCPU_ID_2 - 64); - hyperv_hypercall(HVCALL_SEND_IPI_EX | (2 << HV_HYPERCALL_VARHEAD_OFFSET), - pgs_gpa, pgs_gpa + 4096); - nop_loop(); - GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]); - GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]); - GUEST_SYNC(stage++); - /* 'XMM Fast' HvCallSendSyntheticClusterIpiEx to both RECEIVER_VCPU_ID_{1, 2} */ - hyperv_write_xmm_input(&ipi_ex->vp_set.valid_bank_mask, 2); - hyperv_hypercall(HVCALL_SEND_IPI_EX | HV_HYPERCALL_FAST_BIT | - (2 << HV_HYPERCALL_VARHEAD_OFFSET), - IPI_VECTOR, HV_GENERIC_SET_SPARSE_4K); - nop_loop(); - GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]); - GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]); - GUEST_SYNC(stage++); - - /* 'Slow' HvCallSendSyntheticClusterIpiEx to HV_GENERIC_SET_ALL */ - memset(hcall_page, 0, 4096); - ipi_ex->vector = IPI_VECTOR; - ipi_ex->vp_set.format = HV_GENERIC_SET_ALL; - hyperv_hypercall(HVCALL_SEND_IPI_EX, pgs_gpa, pgs_gpa + 4096); - nop_loop(); - GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]); - GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]); - GUEST_SYNC(stage++); - /* - * 'XMM Fast' HvCallSendSyntheticClusterIpiEx to HV_GENERIC_SET_ALL. - */ - ipi_ex->vp_set.valid_bank_mask = 0; - hyperv_write_xmm_input(&ipi_ex->vp_set.valid_bank_mask, 2); - hyperv_hypercall(HVCALL_SEND_IPI_EX | HV_HYPERCALL_FAST_BIT, - IPI_VECTOR, HV_GENERIC_SET_ALL); - nop_loop(); - GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]); - GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]); - GUEST_SYNC(stage++); - - GUEST_DONE(); -} - -static void *vcpu_thread(void *arg) -{ - struct kvm_vcpu *vcpu = (struct kvm_vcpu *)arg; - int old, r; - - r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old); - TEST_ASSERT(!r, "pthread_setcanceltype failed on vcpu_id=%u with errno=%d", - vcpu->id, r); - - vcpu_run(vcpu); - - TEST_FAIL("vCPU %u exited unexpectedly", vcpu->id); - - return NULL; -} - -static void cancel_join_vcpu_thread(pthread_t thread, struct kvm_vcpu *vcpu) -{ - void *retval; - int r; - - r = pthread_cancel(thread); - TEST_ASSERT(!r, "pthread_cancel on vcpu_id=%d failed with errno=%d", - vcpu->id, r); - - r = pthread_join(thread, &retval); - TEST_ASSERT(!r, "pthread_join on vcpu_id=%d failed with errno=%d", - vcpu->id, r); - TEST_ASSERT(retval == PTHREAD_CANCELED, - "expected retval=%p, got %p", PTHREAD_CANCELED, - retval); -} - -int main(int argc, char *argv[]) -{ - struct kvm_vm *vm; - struct kvm_vcpu *vcpu[3]; - vm_vaddr_t hcall_page; - pthread_t threads[2]; - int stage = 1, r; - struct ucall uc; - - TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_SEND_IPI)); - - vm = vm_create_with_one_vcpu(&vcpu[0], sender_guest_code); - - /* Hypercall input/output */ - hcall_page = vm_vaddr_alloc_pages(vm, 2); - memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize()); - - - vcpu[1] = vm_vcpu_add(vm, RECEIVER_VCPU_ID_1, receiver_code); - vcpu_args_set(vcpu[1], 2, hcall_page, addr_gva2gpa(vm, hcall_page)); - vcpu_set_msr(vcpu[1], HV_X64_MSR_VP_INDEX, RECEIVER_VCPU_ID_1); - vcpu_set_hv_cpuid(vcpu[1]); - - vcpu[2] = vm_vcpu_add(vm, RECEIVER_VCPU_ID_2, receiver_code); - vcpu_args_set(vcpu[2], 2, hcall_page, addr_gva2gpa(vm, hcall_page)); - vcpu_set_msr(vcpu[2], HV_X64_MSR_VP_INDEX, RECEIVER_VCPU_ID_2); - vcpu_set_hv_cpuid(vcpu[2]); - - vm_install_exception_handler(vm, IPI_VECTOR, guest_ipi_handler); - - vcpu_args_set(vcpu[0], 2, hcall_page, addr_gva2gpa(vm, hcall_page)); - vcpu_set_hv_cpuid(vcpu[0]); - - r = pthread_create(&threads[0], NULL, vcpu_thread, vcpu[1]); - TEST_ASSERT(!r, "pthread_create failed errno=%d", r); - - r = pthread_create(&threads[1], NULL, vcpu_thread, vcpu[2]); - TEST_ASSERT(!r, "pthread_create failed errno=%d", errno); - - while (true) { - vcpu_run(vcpu[0]); - - TEST_ASSERT_KVM_EXIT_REASON(vcpu[0], KVM_EXIT_IO); - - switch (get_ucall(vcpu[0], &uc)) { - case UCALL_SYNC: - TEST_ASSERT(uc.args[1] == stage, - "Unexpected stage: %ld (%d expected)", - uc.args[1], stage); - break; - case UCALL_DONE: - goto done; - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - /* NOT REACHED */ - default: - TEST_FAIL("Unknown ucall %lu", uc.cmd); - } - - stage++; - } - -done: - cancel_join_vcpu_thread(threads[0], vcpu[1]); - cancel_join_vcpu_thread(threads[1], vcpu[2]); - kvm_vm_free(vm); - - return r; -} diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c deleted file mode 100644 index 0ddb63229bcb..000000000000 --- a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c +++ /dev/null @@ -1,199 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2022, Red Hat, Inc. - * - * Tests for Hyper-V extensions to SVM. - */ -#include -#include -#include -#include -#include -#include - -#include "test_util.h" - -#include "kvm_util.h" -#include "processor.h" -#include "svm_util.h" -#include "hyperv.h" - -#define L2_GUEST_STACK_SIZE 256 - -/* Exit to L1 from L2 with RDMSR instruction */ -static inline void rdmsr_from_l2(uint32_t msr) -{ - /* Currently, L1 doesn't preserve GPRs during vmexits. */ - __asm__ __volatile__ ("rdmsr" : : "c"(msr) : - "rax", "rbx", "rdx", "rsi", "rdi", "r8", "r9", - "r10", "r11", "r12", "r13", "r14", "r15"); -} - -void l2_guest_code(void) -{ - u64 unused; - - GUEST_SYNC(3); - /* Exit to L1 */ - vmmcall(); - - /* MSR-Bitmap tests */ - rdmsr_from_l2(MSR_FS_BASE); /* intercepted */ - rdmsr_from_l2(MSR_FS_BASE); /* intercepted */ - rdmsr_from_l2(MSR_GS_BASE); /* not intercepted */ - vmmcall(); - rdmsr_from_l2(MSR_GS_BASE); /* intercepted */ - - GUEST_SYNC(5); - - /* L2 TLB flush tests */ - hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | - HV_HYPERCALL_FAST_BIT, 0x0, - HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | - HV_FLUSH_ALL_PROCESSORS); - rdmsr_from_l2(MSR_FS_BASE); - /* - * Note: hypercall status (RAX) is not preserved correctly by L1 after - * synthetic vmexit, use unchecked version. - */ - __hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | - HV_HYPERCALL_FAST_BIT, 0x0, - HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | - HV_FLUSH_ALL_PROCESSORS, &unused); - - /* Done, exit to L1 and never come back. */ - vmmcall(); -} - -static void __attribute__((__flatten__)) guest_code(struct svm_test_data *svm, - struct hyperv_test_pages *hv_pages, - vm_vaddr_t pgs_gpa) -{ - unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; - struct vmcb *vmcb = svm->vmcb; - struct hv_vmcb_enlightenments *hve = &vmcb->control.hv_enlightenments; - - GUEST_SYNC(1); - - wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID); - wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa); - enable_vp_assist(hv_pages->vp_assist_gpa, hv_pages->vp_assist); - - GUEST_ASSERT(svm->vmcb_gpa); - /* Prepare for L2 execution. */ - generic_svm_setup(svm, l2_guest_code, - &l2_guest_stack[L2_GUEST_STACK_SIZE]); - - /* L2 TLB flush setup */ - hve->partition_assist_page = hv_pages->partition_assist_gpa; - hve->hv_enlightenments_control.nested_flush_hypercall = 1; - hve->hv_vm_id = 1; - hve->hv_vp_id = 1; - current_vp_assist->nested_control.features.directhypercall = 1; - *(u32 *)(hv_pages->partition_assist) = 0; - - GUEST_SYNC(2); - run_guest(vmcb, svm->vmcb_gpa); - GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL); - GUEST_SYNC(4); - vmcb->save.rip += 3; - - /* Intercept RDMSR 0xc0000100 */ - vmcb->control.intercept |= 1ULL << INTERCEPT_MSR_PROT; - __set_bit(2 * (MSR_FS_BASE & 0x1fff), svm->msr + 0x800); - run_guest(vmcb, svm->vmcb_gpa); - GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR); - vmcb->save.rip += 2; /* rdmsr */ - - /* Enable enlightened MSR bitmap */ - hve->hv_enlightenments_control.msr_bitmap = 1; - run_guest(vmcb, svm->vmcb_gpa); - GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR); - vmcb->save.rip += 2; /* rdmsr */ - - /* Intercept RDMSR 0xc0000101 without telling KVM about it */ - __set_bit(2 * (MSR_GS_BASE & 0x1fff), svm->msr + 0x800); - /* Make sure HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP is set */ - vmcb->control.clean |= HV_VMCB_NESTED_ENLIGHTENMENTS; - run_guest(vmcb, svm->vmcb_gpa); - /* Make sure we don't see SVM_EXIT_MSR here so eMSR bitmap works */ - GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL); - vmcb->save.rip += 3; /* vmcall */ - - /* Now tell KVM we've changed MSR-Bitmap */ - vmcb->control.clean &= ~HV_VMCB_NESTED_ENLIGHTENMENTS; - run_guest(vmcb, svm->vmcb_gpa); - GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR); - vmcb->save.rip += 2; /* rdmsr */ - - - /* - * L2 TLB flush test. First VMCALL should be handled directly by L0, - * no VMCALL exit expected. - */ - run_guest(vmcb, svm->vmcb_gpa); - GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR); - vmcb->save.rip += 2; /* rdmsr */ - /* Enable synthetic vmexit */ - *(u32 *)(hv_pages->partition_assist) = 1; - run_guest(vmcb, svm->vmcb_gpa); - GUEST_ASSERT(vmcb->control.exit_code == HV_SVM_EXITCODE_ENL); - GUEST_ASSERT(vmcb->control.exit_info_1 == HV_SVM_ENL_EXITCODE_TRAP_AFTER_FLUSH); - - run_guest(vmcb, svm->vmcb_gpa); - GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL); - GUEST_SYNC(6); - - GUEST_DONE(); -} - -int main(int argc, char *argv[]) -{ - vm_vaddr_t nested_gva = 0, hv_pages_gva = 0; - vm_vaddr_t hcall_page; - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - struct ucall uc; - int stage; - - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM)); - TEST_REQUIRE(kvm_hv_cpu_has(HV_X64_NESTED_DIRECT_FLUSH)); - - /* Create VM */ - vm = vm_create_with_one_vcpu(&vcpu, guest_code); - vcpu_set_hv_cpuid(vcpu); - vcpu_alloc_svm(vm, &nested_gva); - vcpu_alloc_hyperv_test_pages(vm, &hv_pages_gva); - - hcall_page = vm_vaddr_alloc_pages(vm, 1); - memset(addr_gva2hva(vm, hcall_page), 0x0, getpagesize()); - - vcpu_args_set(vcpu, 3, nested_gva, hv_pages_gva, addr_gva2gpa(vm, hcall_page)); - vcpu_set_msr(vcpu, HV_X64_MSR_VP_INDEX, vcpu->id); - - for (stage = 1;; stage++) { - vcpu_run(vcpu); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); - - switch (get_ucall(vcpu, &uc)) { - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - /* NOT REACHED */ - case UCALL_SYNC: - break; - case UCALL_DONE: - goto done; - default: - TEST_FAIL("Unknown ucall %lu", uc.cmd); - } - - /* UCALL_SYNC is handled here. */ - TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") && - uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx", - stage, (ulong)uc.args[1]); - - } - -done: - kvm_vm_free(vm); -} diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c b/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c deleted file mode 100644 index 077cd0ec3040..000000000000 --- a/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c +++ /dev/null @@ -1,680 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Hyper-V HvFlushVirtualAddress{List,Space}{,Ex} tests - * - * Copyright (C) 2022, Red Hat, Inc. - * - */ -#include -#include -#include - -#include "kvm_util.h" -#include "processor.h" -#include "hyperv.h" -#include "test_util.h" -#include "vmx.h" - -#define WORKER_VCPU_ID_1 2 -#define WORKER_VCPU_ID_2 65 - -#define NTRY 100 -#define NTEST_PAGES 2 - -struct hv_vpset { - u64 format; - u64 valid_bank_mask; - u64 bank_contents[]; -}; - -enum HV_GENERIC_SET_FORMAT { - HV_GENERIC_SET_SPARSE_4K, - HV_GENERIC_SET_ALL, -}; - -#define HV_FLUSH_ALL_PROCESSORS BIT(0) -#define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES BIT(1) -#define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY BIT(2) -#define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT BIT(3) - -/* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */ -struct hv_tlb_flush { - u64 address_space; - u64 flags; - u64 processor_mask; - u64 gva_list[]; -} __packed; - -/* HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressListEx hypercalls */ -struct hv_tlb_flush_ex { - u64 address_space; - u64 flags; - struct hv_vpset hv_vp_set; - u64 gva_list[]; -} __packed; - -/* - * Pass the following info to 'workers' and 'sender' - * - Hypercall page's GVA - * - Hypercall page's GPA - * - Test pages GVA - * - GVAs of the test pages' PTEs - */ -struct test_data { - vm_vaddr_t hcall_gva; - vm_paddr_t hcall_gpa; - vm_vaddr_t test_pages; - vm_vaddr_t test_pages_pte[NTEST_PAGES]; -}; - -/* 'Worker' vCPU code checking the contents of the test page */ -static void worker_guest_code(vm_vaddr_t test_data) -{ - struct test_data *data = (struct test_data *)test_data; - u32 vcpu_id = rdmsr(HV_X64_MSR_VP_INDEX); - void *exp_page = (void *)data->test_pages + PAGE_SIZE * NTEST_PAGES; - u64 *this_cpu = (u64 *)(exp_page + vcpu_id * sizeof(u64)); - u64 expected, val; - - x2apic_enable(); - wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID); - - for (;;) { - cpu_relax(); - - expected = READ_ONCE(*this_cpu); - - /* - * Make sure the value in the test page is read after reading - * the expectation for the first time. Pairs with wmb() in - * prepare_to_test(). - */ - rmb(); - - val = READ_ONCE(*(u64 *)data->test_pages); - - /* - * Make sure the value in the test page is read after before - * reading the expectation for the second time. Pairs with wmb() - * post_test(). - */ - rmb(); - - /* - * '0' indicates the sender is between iterations, wait until - * the sender is ready for this vCPU to start checking again. - */ - if (!expected) - continue; - - /* - * Re-read the per-vCPU byte to ensure the sender didn't move - * onto a new iteration. - */ - if (expected != READ_ONCE(*this_cpu)) - continue; - - GUEST_ASSERT(val == expected); - } -} - -/* - * Write per-CPU info indicating what each 'worker' CPU is supposed to see in - * test page. '0' means don't check. - */ -static void set_expected_val(void *addr, u64 val, int vcpu_id) -{ - void *exp_page = addr + PAGE_SIZE * NTEST_PAGES; - - *(u64 *)(exp_page + vcpu_id * sizeof(u64)) = val; -} - -/* - * Update PTEs swapping two test pages. - * TODO: use swap()/xchg() when these are provided. - */ -static void swap_two_test_pages(vm_paddr_t pte_gva1, vm_paddr_t pte_gva2) -{ - uint64_t tmp = *(uint64_t *)pte_gva1; - - *(uint64_t *)pte_gva1 = *(uint64_t *)pte_gva2; - *(uint64_t *)pte_gva2 = tmp; -} - -/* - * TODO: replace the silly NOP loop with a proper udelay() implementation. - */ -static inline void do_delay(void) -{ - int i; - - for (i = 0; i < 1000000; i++) - asm volatile("nop"); -} - -/* - * Prepare to test: 'disable' workers by setting the expectation to '0', - * clear hypercall input page and then swap two test pages. - */ -static inline void prepare_to_test(struct test_data *data) -{ - /* Clear hypercall input page */ - memset((void *)data->hcall_gva, 0, PAGE_SIZE); - - /* 'Disable' workers */ - set_expected_val((void *)data->test_pages, 0x0, WORKER_VCPU_ID_1); - set_expected_val((void *)data->test_pages, 0x0, WORKER_VCPU_ID_2); - - /* Make sure workers are 'disabled' before we swap PTEs. */ - wmb(); - - /* Make sure workers have enough time to notice */ - do_delay(); - - /* Swap test page mappings */ - swap_two_test_pages(data->test_pages_pte[0], data->test_pages_pte[1]); -} - -/* - * Finalize the test: check hypercall resule set the expected val for - * 'worker' CPUs and give them some time to test. - */ -static inline void post_test(struct test_data *data, u64 exp1, u64 exp2) -{ - /* Make sure we change the expectation after swapping PTEs */ - wmb(); - - /* Set the expectation for workers, '0' means don't test */ - set_expected_val((void *)data->test_pages, exp1, WORKER_VCPU_ID_1); - set_expected_val((void *)data->test_pages, exp2, WORKER_VCPU_ID_2); - - /* Make sure workers have enough time to test */ - do_delay(); -} - -#define TESTVAL1 0x0101010101010101 -#define TESTVAL2 0x0202020202020202 - -/* Main vCPU doing the test */ -static void sender_guest_code(vm_vaddr_t test_data) -{ - struct test_data *data = (struct test_data *)test_data; - struct hv_tlb_flush *flush = (struct hv_tlb_flush *)data->hcall_gva; - struct hv_tlb_flush_ex *flush_ex = (struct hv_tlb_flush_ex *)data->hcall_gva; - vm_paddr_t hcall_gpa = data->hcall_gpa; - int i, stage = 1; - - wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID); - wrmsr(HV_X64_MSR_HYPERCALL, data->hcall_gpa); - - /* "Slow" hypercalls */ - - GUEST_SYNC(stage++); - - /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for WORKER_VCPU_ID_1 */ - for (i = 0; i < NTRY; i++) { - prepare_to_test(data); - flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; - flush->processor_mask = BIT(WORKER_VCPU_ID_1); - hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE, hcall_gpa, - hcall_gpa + PAGE_SIZE); - post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, 0x0); - } - - GUEST_SYNC(stage++); - - /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for WORKER_VCPU_ID_1 */ - for (i = 0; i < NTRY; i++) { - prepare_to_test(data); - flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; - flush->processor_mask = BIT(WORKER_VCPU_ID_1); - flush->gva_list[0] = (u64)data->test_pages; - hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST | - (1UL << HV_HYPERCALL_REP_COMP_OFFSET), - hcall_gpa, hcall_gpa + PAGE_SIZE); - post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, 0x0); - } - - GUEST_SYNC(stage++); - - /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for HV_FLUSH_ALL_PROCESSORS */ - for (i = 0; i < NTRY; i++) { - prepare_to_test(data); - flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | - HV_FLUSH_ALL_PROCESSORS; - flush->processor_mask = 0; - hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE, hcall_gpa, - hcall_gpa + PAGE_SIZE); - post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, i % 2 ? TESTVAL1 : TESTVAL2); - } - - GUEST_SYNC(stage++); - - /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for HV_FLUSH_ALL_PROCESSORS */ - for (i = 0; i < NTRY; i++) { - prepare_to_test(data); - flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | - HV_FLUSH_ALL_PROCESSORS; - flush->gva_list[0] = (u64)data->test_pages; - hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST | - (1UL << HV_HYPERCALL_REP_COMP_OFFSET), - hcall_gpa, hcall_gpa + PAGE_SIZE); - post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, - i % 2 ? TESTVAL1 : TESTVAL2); - } - - GUEST_SYNC(stage++); - - /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for WORKER_VCPU_ID_2 */ - for (i = 0; i < NTRY; i++) { - prepare_to_test(data); - flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; - flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; - flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64); - flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64); - hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX | - (1 << HV_HYPERCALL_VARHEAD_OFFSET), - hcall_gpa, hcall_gpa + PAGE_SIZE); - post_test(data, 0x0, i % 2 ? TESTVAL1 : TESTVAL2); - } - - GUEST_SYNC(stage++); - - /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for WORKER_VCPU_ID_2 */ - for (i = 0; i < NTRY; i++) { - prepare_to_test(data); - flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; - flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; - flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64); - flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64); - /* bank_contents and gva_list occupy the same space, thus [1] */ - flush_ex->gva_list[1] = (u64)data->test_pages; - hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX | - (1 << HV_HYPERCALL_VARHEAD_OFFSET) | - (1UL << HV_HYPERCALL_REP_COMP_OFFSET), - hcall_gpa, hcall_gpa + PAGE_SIZE); - post_test(data, 0x0, i % 2 ? TESTVAL1 : TESTVAL2); - } - - GUEST_SYNC(stage++); - - /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for both vCPUs */ - for (i = 0; i < NTRY; i++) { - prepare_to_test(data); - flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; - flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; - flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64) | - BIT_ULL(WORKER_VCPU_ID_1 / 64); - flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64); - flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64); - hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX | - (2 << HV_HYPERCALL_VARHEAD_OFFSET), - hcall_gpa, hcall_gpa + PAGE_SIZE); - post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, - i % 2 ? TESTVAL1 : TESTVAL2); - } - - GUEST_SYNC(stage++); - - /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for both vCPUs */ - for (i = 0; i < NTRY; i++) { - prepare_to_test(data); - flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; - flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; - flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_1 / 64) | - BIT_ULL(WORKER_VCPU_ID_2 / 64); - flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64); - flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64); - /* bank_contents and gva_list occupy the same space, thus [2] */ - flush_ex->gva_list[2] = (u64)data->test_pages; - hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX | - (2 << HV_HYPERCALL_VARHEAD_OFFSET) | - (1UL << HV_HYPERCALL_REP_COMP_OFFSET), - hcall_gpa, hcall_gpa + PAGE_SIZE); - post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, - i % 2 ? TESTVAL1 : TESTVAL2); - } - - GUEST_SYNC(stage++); - - /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for HV_GENERIC_SET_ALL */ - for (i = 0; i < NTRY; i++) { - prepare_to_test(data); - flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; - flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL; - hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX, - hcall_gpa, hcall_gpa + PAGE_SIZE); - post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, - i % 2 ? TESTVAL1 : TESTVAL2); - } - - GUEST_SYNC(stage++); - - /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for HV_GENERIC_SET_ALL */ - for (i = 0; i < NTRY; i++) { - prepare_to_test(data); - flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; - flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL; - flush_ex->gva_list[0] = (u64)data->test_pages; - hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX | - (1UL << HV_HYPERCALL_REP_COMP_OFFSET), - hcall_gpa, hcall_gpa + PAGE_SIZE); - post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, - i % 2 ? TESTVAL1 : TESTVAL2); - } - - /* "Fast" hypercalls */ - - GUEST_SYNC(stage++); - - /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for WORKER_VCPU_ID_1 */ - for (i = 0; i < NTRY; i++) { - prepare_to_test(data); - flush->processor_mask = BIT(WORKER_VCPU_ID_1); - hyperv_write_xmm_input(&flush->processor_mask, 1); - hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | - HV_HYPERCALL_FAST_BIT, 0x0, - HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES); - post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, 0x0); - } - - GUEST_SYNC(stage++); - - /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for WORKER_VCPU_ID_1 */ - for (i = 0; i < NTRY; i++) { - prepare_to_test(data); - flush->processor_mask = BIT(WORKER_VCPU_ID_1); - flush->gva_list[0] = (u64)data->test_pages; - hyperv_write_xmm_input(&flush->processor_mask, 1); - hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST | - HV_HYPERCALL_FAST_BIT | - (1UL << HV_HYPERCALL_REP_COMP_OFFSET), - 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES); - post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, 0x0); - } - - GUEST_SYNC(stage++); - - /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for HV_FLUSH_ALL_PROCESSORS */ - for (i = 0; i < NTRY; i++) { - prepare_to_test(data); - hyperv_write_xmm_input(&flush->processor_mask, 1); - hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | - HV_HYPERCALL_FAST_BIT, 0x0, - HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | - HV_FLUSH_ALL_PROCESSORS); - post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, - i % 2 ? TESTVAL1 : TESTVAL2); - } - - GUEST_SYNC(stage++); - - /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for HV_FLUSH_ALL_PROCESSORS */ - for (i = 0; i < NTRY; i++) { - prepare_to_test(data); - flush->gva_list[0] = (u64)data->test_pages; - hyperv_write_xmm_input(&flush->processor_mask, 1); - hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST | - HV_HYPERCALL_FAST_BIT | - (1UL << HV_HYPERCALL_REP_COMP_OFFSET), 0x0, - HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | - HV_FLUSH_ALL_PROCESSORS); - post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, - i % 2 ? TESTVAL1 : TESTVAL2); - } - - GUEST_SYNC(stage++); - - /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for WORKER_VCPU_ID_2 */ - for (i = 0; i < NTRY; i++) { - prepare_to_test(data); - flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; - flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64); - flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64); - hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2); - hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX | - HV_HYPERCALL_FAST_BIT | - (1 << HV_HYPERCALL_VARHEAD_OFFSET), - 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES); - post_test(data, 0x0, i % 2 ? TESTVAL1 : TESTVAL2); - } - - GUEST_SYNC(stage++); - - /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for WORKER_VCPU_ID_2 */ - for (i = 0; i < NTRY; i++) { - prepare_to_test(data); - flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; - flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64); - flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64); - /* bank_contents and gva_list occupy the same space, thus [1] */ - flush_ex->gva_list[1] = (u64)data->test_pages; - hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2); - hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX | - HV_HYPERCALL_FAST_BIT | - (1 << HV_HYPERCALL_VARHEAD_OFFSET) | - (1UL << HV_HYPERCALL_REP_COMP_OFFSET), - 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES); - post_test(data, 0x0, i % 2 ? TESTVAL1 : TESTVAL2); - } - - GUEST_SYNC(stage++); - - /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for both vCPUs */ - for (i = 0; i < NTRY; i++) { - prepare_to_test(data); - flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; - flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64) | - BIT_ULL(WORKER_VCPU_ID_1 / 64); - flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64); - flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64); - hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2); - hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX | - HV_HYPERCALL_FAST_BIT | - (2 << HV_HYPERCALL_VARHEAD_OFFSET), - 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES); - post_test(data, i % 2 ? TESTVAL1 : - TESTVAL2, i % 2 ? TESTVAL1 : TESTVAL2); - } - - GUEST_SYNC(stage++); - - /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for both vCPUs */ - for (i = 0; i < NTRY; i++) { - prepare_to_test(data); - flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; - flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_1 / 64) | - BIT_ULL(WORKER_VCPU_ID_2 / 64); - flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64); - flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64); - /* bank_contents and gva_list occupy the same space, thus [2] */ - flush_ex->gva_list[2] = (u64)data->test_pages; - hyperv_write_xmm_input(&flush_ex->hv_vp_set, 3); - hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX | - HV_HYPERCALL_FAST_BIT | - (2 << HV_HYPERCALL_VARHEAD_OFFSET) | - (1UL << HV_HYPERCALL_REP_COMP_OFFSET), - 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES); - post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, - i % 2 ? TESTVAL1 : TESTVAL2); - } - - GUEST_SYNC(stage++); - - /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for HV_GENERIC_SET_ALL */ - for (i = 0; i < NTRY; i++) { - prepare_to_test(data); - flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; - flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL; - hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2); - hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX | - HV_HYPERCALL_FAST_BIT, - 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES); - post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, - i % 2 ? TESTVAL1 : TESTVAL2); - } - - GUEST_SYNC(stage++); - - /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for HV_GENERIC_SET_ALL */ - for (i = 0; i < NTRY; i++) { - prepare_to_test(data); - flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; - flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL; - flush_ex->gva_list[0] = (u64)data->test_pages; - hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2); - hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX | - HV_HYPERCALL_FAST_BIT | - (1UL << HV_HYPERCALL_REP_COMP_OFFSET), - 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES); - post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, - i % 2 ? TESTVAL1 : TESTVAL2); - } - - GUEST_DONE(); -} - -static void *vcpu_thread(void *arg) -{ - struct kvm_vcpu *vcpu = (struct kvm_vcpu *)arg; - struct ucall uc; - int old; - int r; - - r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old); - TEST_ASSERT(!r, "pthread_setcanceltype failed on vcpu_id=%u with errno=%d", - vcpu->id, r); - - vcpu_run(vcpu); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); - - switch (get_ucall(vcpu, &uc)) { - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - /* NOT REACHED */ - default: - TEST_FAIL("Unexpected ucall %lu, vCPU %d", uc.cmd, vcpu->id); - } - - return NULL; -} - -static void cancel_join_vcpu_thread(pthread_t thread, struct kvm_vcpu *vcpu) -{ - void *retval; - int r; - - r = pthread_cancel(thread); - TEST_ASSERT(!r, "pthread_cancel on vcpu_id=%d failed with errno=%d", - vcpu->id, r); - - r = pthread_join(thread, &retval); - TEST_ASSERT(!r, "pthread_join on vcpu_id=%d failed with errno=%d", - vcpu->id, r); - TEST_ASSERT(retval == PTHREAD_CANCELED, - "expected retval=%p, got %p", PTHREAD_CANCELED, - retval); -} - -int main(int argc, char *argv[]) -{ - struct kvm_vm *vm; - struct kvm_vcpu *vcpu[3]; - pthread_t threads[2]; - vm_vaddr_t test_data_page, gva; - vm_paddr_t gpa; - uint64_t *pte; - struct test_data *data; - struct ucall uc; - int stage = 1, r, i; - - TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_TLBFLUSH)); - - vm = vm_create_with_one_vcpu(&vcpu[0], sender_guest_code); - - /* Test data page */ - test_data_page = vm_vaddr_alloc_page(vm); - data = (struct test_data *)addr_gva2hva(vm, test_data_page); - - /* Hypercall input/output */ - data->hcall_gva = vm_vaddr_alloc_pages(vm, 2); - data->hcall_gpa = addr_gva2gpa(vm, data->hcall_gva); - memset(addr_gva2hva(vm, data->hcall_gva), 0x0, 2 * PAGE_SIZE); - - /* - * Test pages: the first one is filled with '0x01's, the second with '0x02's - * and the test will swap their mappings. The third page keeps the indication - * about the current state of mappings. - */ - data->test_pages = vm_vaddr_alloc_pages(vm, NTEST_PAGES + 1); - for (i = 0; i < NTEST_PAGES; i++) - memset(addr_gva2hva(vm, data->test_pages + PAGE_SIZE * i), - (u8)(i + 1), PAGE_SIZE); - set_expected_val(addr_gva2hva(vm, data->test_pages), 0x0, WORKER_VCPU_ID_1); - set_expected_val(addr_gva2hva(vm, data->test_pages), 0x0, WORKER_VCPU_ID_2); - - /* - * Get PTE pointers for test pages and map them inside the guest. - * Use separate page for each PTE for simplicity. - */ - gva = vm_vaddr_unused_gap(vm, NTEST_PAGES * PAGE_SIZE, KVM_UTIL_MIN_VADDR); - for (i = 0; i < NTEST_PAGES; i++) { - pte = vm_get_page_table_entry(vm, data->test_pages + i * PAGE_SIZE); - gpa = addr_hva2gpa(vm, pte); - __virt_pg_map(vm, gva + PAGE_SIZE * i, gpa & PAGE_MASK, PG_LEVEL_4K); - data->test_pages_pte[i] = gva + (gpa & ~PAGE_MASK); - } - - /* - * Sender vCPU which performs the test: swaps test pages, sets expectation - * for 'workers' and issues TLB flush hypercalls. - */ - vcpu_args_set(vcpu[0], 1, test_data_page); - vcpu_set_hv_cpuid(vcpu[0]); - - /* Create worker vCPUs which check the contents of the test pages */ - vcpu[1] = vm_vcpu_add(vm, WORKER_VCPU_ID_1, worker_guest_code); - vcpu_args_set(vcpu[1], 1, test_data_page); - vcpu_set_msr(vcpu[1], HV_X64_MSR_VP_INDEX, WORKER_VCPU_ID_1); - vcpu_set_hv_cpuid(vcpu[1]); - - vcpu[2] = vm_vcpu_add(vm, WORKER_VCPU_ID_2, worker_guest_code); - vcpu_args_set(vcpu[2], 1, test_data_page); - vcpu_set_msr(vcpu[2], HV_X64_MSR_VP_INDEX, WORKER_VCPU_ID_2); - vcpu_set_hv_cpuid(vcpu[2]); - - r = pthread_create(&threads[0], NULL, vcpu_thread, vcpu[1]); - TEST_ASSERT(!r, "pthread_create() failed"); - - r = pthread_create(&threads[1], NULL, vcpu_thread, vcpu[2]); - TEST_ASSERT(!r, "pthread_create() failed"); - - while (true) { - vcpu_run(vcpu[0]); - TEST_ASSERT_KVM_EXIT_REASON(vcpu[0], KVM_EXIT_IO); - - switch (get_ucall(vcpu[0], &uc)) { - case UCALL_SYNC: - TEST_ASSERT(uc.args[1] == stage, - "Unexpected stage: %ld (%d expected)", - uc.args[1], stage); - break; - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - /* NOT REACHED */ - case UCALL_DONE: - goto done; - default: - TEST_FAIL("Unknown ucall %lu", uc.cmd); - } - - stage++; - } - -done: - cancel_join_vcpu_thread(threads[0], vcpu[1]); - cancel_join_vcpu_thread(threads[1], vcpu[2]); - kvm_vm_free(vm); - - return 0; -} diff --git a/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c b/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c deleted file mode 100644 index 5bc12222d87a..000000000000 --- a/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c +++ /dev/null @@ -1,156 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2021, Google LLC. - * - * Tests for adjusting the KVM clock from userspace - */ -#include -#include -#include -#include -#include -#include -#include - -#include "test_util.h" -#include "kvm_util.h" -#include "processor.h" - -struct test_case { - uint64_t kvmclock_base; - int64_t realtime_offset; -}; - -static struct test_case test_cases[] = { - { .kvmclock_base = 0 }, - { .kvmclock_base = 180 * NSEC_PER_SEC }, - { .kvmclock_base = 0, .realtime_offset = -180 * NSEC_PER_SEC }, - { .kvmclock_base = 0, .realtime_offset = 180 * NSEC_PER_SEC }, -}; - -#define GUEST_SYNC_CLOCK(__stage, __val) \ - GUEST_SYNC_ARGS(__stage, __val, 0, 0, 0) - -static void guest_main(vm_paddr_t pvti_pa, struct pvclock_vcpu_time_info *pvti) -{ - int i; - - wrmsr(MSR_KVM_SYSTEM_TIME_NEW, pvti_pa | KVM_MSR_ENABLED); - for (i = 0; i < ARRAY_SIZE(test_cases); i++) - GUEST_SYNC_CLOCK(i, __pvclock_read_cycles(pvti, rdtsc())); -} - -#define EXPECTED_FLAGS (KVM_CLOCK_REALTIME | KVM_CLOCK_HOST_TSC) - -static inline void assert_flags(struct kvm_clock_data *data) -{ - TEST_ASSERT((data->flags & EXPECTED_FLAGS) == EXPECTED_FLAGS, - "unexpected clock data flags: %x (want set: %x)", - data->flags, EXPECTED_FLAGS); -} - -static void handle_sync(struct ucall *uc, struct kvm_clock_data *start, - struct kvm_clock_data *end) -{ - uint64_t obs, exp_lo, exp_hi; - - obs = uc->args[2]; - exp_lo = start->clock; - exp_hi = end->clock; - - assert_flags(start); - assert_flags(end); - - TEST_ASSERT(exp_lo <= obs && obs <= exp_hi, - "unexpected kvm-clock value: %"PRIu64" expected range: [%"PRIu64", %"PRIu64"]", - obs, exp_lo, exp_hi); - - pr_info("kvm-clock value: %"PRIu64" expected range [%"PRIu64", %"PRIu64"]\n", - obs, exp_lo, exp_hi); -} - -static void handle_abort(struct ucall *uc) -{ - REPORT_GUEST_ASSERT(*uc); -} - -static void setup_clock(struct kvm_vm *vm, struct test_case *test_case) -{ - struct kvm_clock_data data; - - memset(&data, 0, sizeof(data)); - - data.clock = test_case->kvmclock_base; - if (test_case->realtime_offset) { - struct timespec ts; - int r; - - data.flags |= KVM_CLOCK_REALTIME; - do { - r = clock_gettime(CLOCK_REALTIME, &ts); - if (!r) - break; - } while (errno == EINTR); - - TEST_ASSERT(!r, "clock_gettime() failed: %d", r); - - data.realtime = ts.tv_sec * NSEC_PER_SEC; - data.realtime += ts.tv_nsec; - data.realtime += test_case->realtime_offset; - } - - vm_ioctl(vm, KVM_SET_CLOCK, &data); -} - -static void enter_guest(struct kvm_vcpu *vcpu) -{ - struct kvm_clock_data start, end; - struct kvm_vm *vm = vcpu->vm; - struct ucall uc; - int i; - - for (i = 0; i < ARRAY_SIZE(test_cases); i++) { - setup_clock(vm, &test_cases[i]); - - vm_ioctl(vm, KVM_GET_CLOCK, &start); - - vcpu_run(vcpu); - vm_ioctl(vm, KVM_GET_CLOCK, &end); - - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); - - switch (get_ucall(vcpu, &uc)) { - case UCALL_SYNC: - handle_sync(&uc, &start, &end); - break; - case UCALL_ABORT: - handle_abort(&uc); - return; - default: - TEST_ASSERT(0, "unhandled ucall: %ld", uc.cmd); - } - } -} - -int main(void) -{ - struct kvm_vcpu *vcpu; - vm_vaddr_t pvti_gva; - vm_paddr_t pvti_gpa; - struct kvm_vm *vm; - int flags; - - flags = kvm_check_cap(KVM_CAP_ADJUST_CLOCK); - TEST_REQUIRE(flags & KVM_CLOCK_REALTIME); - - TEST_REQUIRE(sys_clocksource_is_based_on_tsc()); - - vm = vm_create_with_one_vcpu(&vcpu, guest_main); - - pvti_gva = vm_vaddr_alloc(vm, getpagesize(), 0x10000); - pvti_gpa = addr_gva2gpa(vm, pvti_gva); - vcpu_args_set(vcpu, 2, pvti_gpa, pvti_gva); - - enter_guest(vcpu); - kvm_vm_free(vm); -} diff --git a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c deleted file mode 100644 index 78878b3a2725..000000000000 --- a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c +++ /dev/null @@ -1,190 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2020, Google LLC. - * - * Tests for KVM paravirtual feature disablement - */ -#include -#include -#include - -#include "test_util.h" -#include "kvm_util.h" -#include "processor.h" - -struct msr_data { - uint32_t idx; - const char *name; -}; - -#define TEST_MSR(msr) { .idx = msr, .name = #msr } -#define UCALL_PR_MSR 0xdeadbeef -#define PR_MSR(msr) ucall(UCALL_PR_MSR, 1, msr) - -/* - * KVM paravirtual msrs to test. Expect a #GP if any of these msrs are read or - * written, as the KVM_CPUID_FEATURES leaf is cleared. - */ -static struct msr_data msrs_to_test[] = { - TEST_MSR(MSR_KVM_SYSTEM_TIME), - TEST_MSR(MSR_KVM_SYSTEM_TIME_NEW), - TEST_MSR(MSR_KVM_WALL_CLOCK), - TEST_MSR(MSR_KVM_WALL_CLOCK_NEW), - TEST_MSR(MSR_KVM_ASYNC_PF_EN), - TEST_MSR(MSR_KVM_STEAL_TIME), - TEST_MSR(MSR_KVM_PV_EOI_EN), - TEST_MSR(MSR_KVM_POLL_CONTROL), - TEST_MSR(MSR_KVM_ASYNC_PF_INT), - TEST_MSR(MSR_KVM_ASYNC_PF_ACK), -}; - -static void test_msr(struct msr_data *msr) -{ - uint64_t ignored; - uint8_t vector; - - PR_MSR(msr); - - vector = rdmsr_safe(msr->idx, &ignored); - GUEST_ASSERT_EQ(vector, GP_VECTOR); - - vector = wrmsr_safe(msr->idx, 0); - GUEST_ASSERT_EQ(vector, GP_VECTOR); -} - -struct hcall_data { - uint64_t nr; - const char *name; -}; - -#define TEST_HCALL(hc) { .nr = hc, .name = #hc } -#define UCALL_PR_HCALL 0xdeadc0de -#define PR_HCALL(hc) ucall(UCALL_PR_HCALL, 1, hc) - -/* - * KVM hypercalls to test. Expect -KVM_ENOSYS when called, as the corresponding - * features have been cleared in KVM_CPUID_FEATURES. - */ -static struct hcall_data hcalls_to_test[] = { - TEST_HCALL(KVM_HC_KICK_CPU), - TEST_HCALL(KVM_HC_SEND_IPI), - TEST_HCALL(KVM_HC_SCHED_YIELD), -}; - -static void test_hcall(struct hcall_data *hc) -{ - uint64_t r; - - PR_HCALL(hc); - r = kvm_hypercall(hc->nr, 0, 0, 0, 0); - GUEST_ASSERT_EQ(r, -KVM_ENOSYS); -} - -static void guest_main(void) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(msrs_to_test); i++) { - test_msr(&msrs_to_test[i]); - } - - for (i = 0; i < ARRAY_SIZE(hcalls_to_test); i++) { - test_hcall(&hcalls_to_test[i]); - } - - GUEST_DONE(); -} - -static void pr_msr(struct ucall *uc) -{ - struct msr_data *msr = (struct msr_data *)uc->args[0]; - - pr_info("testing msr: %s (%#x)\n", msr->name, msr->idx); -} - -static void pr_hcall(struct ucall *uc) -{ - struct hcall_data *hc = (struct hcall_data *)uc->args[0]; - - pr_info("testing hcall: %s (%lu)\n", hc->name, hc->nr); -} - -static void enter_guest(struct kvm_vcpu *vcpu) -{ - struct ucall uc; - - while (true) { - vcpu_run(vcpu); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); - - switch (get_ucall(vcpu, &uc)) { - case UCALL_PR_MSR: - pr_msr(&uc); - break; - case UCALL_PR_HCALL: - pr_hcall(&uc); - break; - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - return; - case UCALL_DONE: - return; - } - } -} - -static void test_pv_unhalt(void) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - struct kvm_cpuid_entry2 *ent; - u32 kvm_sig_old; - - pr_info("testing KVM_FEATURE_PV_UNHALT\n"); - - TEST_REQUIRE(KVM_CAP_X86_DISABLE_EXITS); - - /* KVM_PV_UNHALT test */ - vm = vm_create_with_one_vcpu(&vcpu, guest_main); - vcpu_set_cpuid_feature(vcpu, X86_FEATURE_KVM_PV_UNHALT); - - TEST_ASSERT(vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT), - "Enabling X86_FEATURE_KVM_PV_UNHALT had no effect"); - - /* Make sure KVM clears vcpu->arch.kvm_cpuid */ - ent = vcpu_get_cpuid_entry(vcpu, KVM_CPUID_SIGNATURE); - kvm_sig_old = ent->ebx; - ent->ebx = 0xdeadbeef; - vcpu_set_cpuid(vcpu); - - vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS, KVM_X86_DISABLE_EXITS_HLT); - ent = vcpu_get_cpuid_entry(vcpu, KVM_CPUID_SIGNATURE); - ent->ebx = kvm_sig_old; - vcpu_set_cpuid(vcpu); - - TEST_ASSERT(!vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT), - "KVM_FEATURE_PV_UNHALT is set with KVM_CAP_X86_DISABLE_EXITS"); - - /* FIXME: actually test KVM_FEATURE_PV_UNHALT feature */ - - kvm_vm_free(vm); -} - -int main(void) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - - TEST_REQUIRE(kvm_has_cap(KVM_CAP_ENFORCE_PV_FEATURE_CPUID)); - - vm = vm_create_with_one_vcpu(&vcpu, guest_main); - - vcpu_enable_cap(vcpu, KVM_CAP_ENFORCE_PV_FEATURE_CPUID, 1); - - vcpu_clear_cpuid_entry(vcpu, KVM_CPUID_FEATURES); - - enter_guest(vcpu); - kvm_vm_free(vm); - - test_pv_unhalt(); -} diff --git a/tools/testing/selftests/kvm/x86_64/max_vcpuid_cap_test.c b/tools/testing/selftests/kvm/x86_64/max_vcpuid_cap_test.c deleted file mode 100644 index 7e2bfb3c3f3b..000000000000 --- a/tools/testing/selftests/kvm/x86_64/max_vcpuid_cap_test.c +++ /dev/null @@ -1,62 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * maximum APIC ID capability tests - * - * Copyright (C) 2022, Intel, Inc. - * - * Tests for getting/setting maximum APIC ID capability - */ - -#include "kvm_util.h" - -#define MAX_VCPU_ID 2 - -int main(int argc, char *argv[]) -{ - struct kvm_vm *vm; - int ret; - - vm = vm_create_barebones(); - - /* Get KVM_CAP_MAX_VCPU_ID cap supported in KVM */ - ret = vm_check_cap(vm, KVM_CAP_MAX_VCPU_ID); - - /* Try to set KVM_CAP_MAX_VCPU_ID beyond KVM cap */ - ret = __vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, ret + 1); - TEST_ASSERT(ret < 0, - "Setting KVM_CAP_MAX_VCPU_ID beyond KVM cap should fail"); - - /* Test BOOT_CPU_ID interaction (MAX_VCPU_ID cannot be lower) */ - if (kvm_has_cap(KVM_CAP_SET_BOOT_CPU_ID)) { - vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)MAX_VCPU_ID); - - /* Try setting KVM_CAP_MAX_VCPU_ID below BOOT_CPU_ID */ - ret = __vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, MAX_VCPU_ID - 1); - TEST_ASSERT(ret < 0, - "Setting KVM_CAP_MAX_VCPU_ID below BOOT_CPU_ID should fail"); - } - - /* Set KVM_CAP_MAX_VCPU_ID */ - vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, MAX_VCPU_ID); - - /* Try to set KVM_CAP_MAX_VCPU_ID again */ - ret = __vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, MAX_VCPU_ID + 1); - TEST_ASSERT(ret < 0, - "Setting KVM_CAP_MAX_VCPU_ID multiple times should fail"); - - /* Create vCPU with id beyond KVM_CAP_MAX_VCPU_ID cap */ - ret = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)MAX_VCPU_ID); - TEST_ASSERT(ret < 0, "Creating vCPU with ID > MAX_VCPU_ID should fail"); - - /* Create vCPU with bits 63:32 != 0, but an otherwise valid id */ - ret = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)(1L << 32)); - TEST_ASSERT(ret < 0, "Creating vCPU with ID[63:32] != 0 should fail"); - - /* Create vCPU with id within bounds */ - ret = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)0); - TEST_ASSERT(ret >= 0, "Creating vCPU with ID 0 should succeed"); - - close(ret); - kvm_vm_free(vm); - return 0; -} diff --git a/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c b/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c deleted file mode 100644 index 2b550eff35f1..000000000000 --- a/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c +++ /dev/null @@ -1,129 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include -#include -#include -#include - -#include "kvm_util.h" -#include "processor.h" - -#define CPUID_MWAIT (1u << 3) - -enum monitor_mwait_testcases { - MWAIT_QUIRK_DISABLED = BIT(0), - MISC_ENABLES_QUIRK_DISABLED = BIT(1), - MWAIT_DISABLED = BIT(2), -}; - -/* - * If both MWAIT and its quirk are disabled, MONITOR/MWAIT should #UD, in all - * other scenarios KVM should emulate them as nops. - */ -#define GUEST_ASSERT_MONITOR_MWAIT(insn, testcase, vector) \ -do { \ - bool fault_wanted = ((testcase) & MWAIT_QUIRK_DISABLED) && \ - ((testcase) & MWAIT_DISABLED); \ - \ - if (fault_wanted) \ - __GUEST_ASSERT((vector) == UD_VECTOR, \ - "Expected #UD on " insn " for testcase '0x%x', got '0x%x'", \ - testcase, vector); \ - else \ - __GUEST_ASSERT(!(vector), \ - "Expected success on " insn " for testcase '0x%x', got '0x%x'", \ - testcase, vector); \ -} while (0) - -static void guest_monitor_wait(int testcase) -{ - u8 vector; - - GUEST_SYNC(testcase); - - /* - * Arbitrarily MONITOR this function, SVM performs fault checks before - * intercept checks, so the inputs for MONITOR and MWAIT must be valid. - */ - vector = kvm_asm_safe("monitor", "a"(guest_monitor_wait), "c"(0), "d"(0)); - GUEST_ASSERT_MONITOR_MWAIT("MONITOR", testcase, vector); - - vector = kvm_asm_safe("mwait", "a"(guest_monitor_wait), "c"(0), "d"(0)); - GUEST_ASSERT_MONITOR_MWAIT("MWAIT", testcase, vector); -} - -static void guest_code(void) -{ - guest_monitor_wait(MWAIT_DISABLED); - - guest_monitor_wait(MWAIT_QUIRK_DISABLED | MWAIT_DISABLED); - - guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_DISABLED); - guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED); - - guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_QUIRK_DISABLED | MWAIT_DISABLED); - guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_QUIRK_DISABLED); - - GUEST_DONE(); -} - -int main(int argc, char *argv[]) -{ - uint64_t disabled_quirks; - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - struct ucall uc; - int testcase; - - TEST_REQUIRE(this_cpu_has(X86_FEATURE_MWAIT)); - TEST_REQUIRE(kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2)); - - vm = vm_create_with_one_vcpu(&vcpu, guest_code); - vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_MWAIT); - - while (1) { - vcpu_run(vcpu); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); - - switch (get_ucall(vcpu, &uc)) { - case UCALL_SYNC: - testcase = uc.args[1]; - break; - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - goto done; - case UCALL_DONE: - goto done; - default: - TEST_FAIL("Unknown ucall %lu", uc.cmd); - goto done; - } - - disabled_quirks = 0; - if (testcase & MWAIT_QUIRK_DISABLED) - disabled_quirks |= KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS; - if (testcase & MISC_ENABLES_QUIRK_DISABLED) - disabled_quirks |= KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT; - vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, disabled_quirks); - - /* - * If the MISC_ENABLES quirk (KVM neglects to update CPUID to - * enable/disable MWAIT) is disabled, toggle the ENABLE_MWAIT - * bit in MISC_ENABLES accordingly. If the quirk is enabled, - * the only valid configuration is MWAIT disabled, as CPUID - * can't be manually changed after running the vCPU. - */ - if (!(testcase & MISC_ENABLES_QUIRK_DISABLED)) { - TEST_ASSERT(testcase & MWAIT_DISABLED, - "Can't toggle CPUID features after running vCPU"); - continue; - } - - vcpu_set_msr(vcpu, MSR_IA32_MISC_ENABLE, - (testcase & MWAIT_DISABLED) ? 0 : MSR_IA32_MISC_ENABLE_MWAIT); - } - -done: - kvm_vm_free(vm); - return 0; -} diff --git a/tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c b/tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c deleted file mode 100644 index 3eb0313ffa39..000000000000 --- a/tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c +++ /dev/null @@ -1,288 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -#include "test_util.h" -#include "kvm_util.h" -#include "processor.h" -#include "vmx.h" -#include "svm_util.h" - -#define L2_GUEST_STACK_SIZE 256 - -/* - * Arbitrary, never shoved into KVM/hardware, just need to avoid conflict with - * the "real" exceptions used, #SS/#GP/#DF (12/13/8). - */ -#define FAKE_TRIPLE_FAULT_VECTOR 0xaa - -/* Arbitrary 32-bit error code injected by this test. */ -#define SS_ERROR_CODE 0xdeadbeef - -/* - * Bit '0' is set on Intel if the exception occurs while delivering a previous - * event/exception. AMD's wording is ambiguous, but presumably the bit is set - * if the exception occurs while delivering an external event, e.g. NMI or INTR, - * but not for exceptions that occur when delivering other exceptions or - * software interrupts. - * - * Note, Intel's name for it, "External event", is misleading and much more - * aligned with AMD's behavior, but the SDM is quite clear on its behavior. - */ -#define ERROR_CODE_EXT_FLAG BIT(0) - -/* - * Bit '1' is set if the fault occurred when looking up a descriptor in the - * IDT, which is the case here as the IDT is empty/NULL. - */ -#define ERROR_CODE_IDT_FLAG BIT(1) - -/* - * The #GP that occurs when vectoring #SS should show the index into the IDT - * for #SS, plus have the "IDT flag" set. - */ -#define GP_ERROR_CODE_AMD ((SS_VECTOR * 8) | ERROR_CODE_IDT_FLAG) -#define GP_ERROR_CODE_INTEL ((SS_VECTOR * 8) | ERROR_CODE_IDT_FLAG | ERROR_CODE_EXT_FLAG) - -/* - * Intel and AMD both shove '0' into the error code on #DF, regardless of what - * led to the double fault. - */ -#define DF_ERROR_CODE 0 - -#define INTERCEPT_SS (BIT_ULL(SS_VECTOR)) -#define INTERCEPT_SS_DF (INTERCEPT_SS | BIT_ULL(DF_VECTOR)) -#define INTERCEPT_SS_GP_DF (INTERCEPT_SS_DF | BIT_ULL(GP_VECTOR)) - -static void l2_ss_pending_test(void) -{ - GUEST_SYNC(SS_VECTOR); -} - -static void l2_ss_injected_gp_test(void) -{ - GUEST_SYNC(GP_VECTOR); -} - -static void l2_ss_injected_df_test(void) -{ - GUEST_SYNC(DF_VECTOR); -} - -static void l2_ss_injected_tf_test(void) -{ - GUEST_SYNC(FAKE_TRIPLE_FAULT_VECTOR); -} - -static void svm_run_l2(struct svm_test_data *svm, void *l2_code, int vector, - uint32_t error_code) -{ - struct vmcb *vmcb = svm->vmcb; - struct vmcb_control_area *ctrl = &vmcb->control; - - vmcb->save.rip = (u64)l2_code; - run_guest(vmcb, svm->vmcb_gpa); - - if (vector == FAKE_TRIPLE_FAULT_VECTOR) - return; - - GUEST_ASSERT_EQ(ctrl->exit_code, (SVM_EXIT_EXCP_BASE + vector)); - GUEST_ASSERT_EQ(ctrl->exit_info_1, error_code); -} - -static void l1_svm_code(struct svm_test_data *svm) -{ - struct vmcb_control_area *ctrl = &svm->vmcb->control; - unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; - - generic_svm_setup(svm, NULL, &l2_guest_stack[L2_GUEST_STACK_SIZE]); - svm->vmcb->save.idtr.limit = 0; - ctrl->intercept |= BIT_ULL(INTERCEPT_SHUTDOWN); - - ctrl->intercept_exceptions = INTERCEPT_SS_GP_DF; - svm_run_l2(svm, l2_ss_pending_test, SS_VECTOR, SS_ERROR_CODE); - svm_run_l2(svm, l2_ss_injected_gp_test, GP_VECTOR, GP_ERROR_CODE_AMD); - - ctrl->intercept_exceptions = INTERCEPT_SS_DF; - svm_run_l2(svm, l2_ss_injected_df_test, DF_VECTOR, DF_ERROR_CODE); - - ctrl->intercept_exceptions = INTERCEPT_SS; - svm_run_l2(svm, l2_ss_injected_tf_test, FAKE_TRIPLE_FAULT_VECTOR, 0); - GUEST_ASSERT_EQ(ctrl->exit_code, SVM_EXIT_SHUTDOWN); - - GUEST_DONE(); -} - -static void vmx_run_l2(void *l2_code, int vector, uint32_t error_code) -{ - GUEST_ASSERT(!vmwrite(GUEST_RIP, (u64)l2_code)); - - GUEST_ASSERT_EQ(vector == SS_VECTOR ? vmlaunch() : vmresume(), 0); - - if (vector == FAKE_TRIPLE_FAULT_VECTOR) - return; - - GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_EXCEPTION_NMI); - GUEST_ASSERT_EQ((vmreadz(VM_EXIT_INTR_INFO) & 0xff), vector); - GUEST_ASSERT_EQ(vmreadz(VM_EXIT_INTR_ERROR_CODE), error_code); -} - -static void l1_vmx_code(struct vmx_pages *vmx) -{ - unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; - - GUEST_ASSERT_EQ(prepare_for_vmx_operation(vmx), true); - - GUEST_ASSERT_EQ(load_vmcs(vmx), true); - - prepare_vmcs(vmx, NULL, &l2_guest_stack[L2_GUEST_STACK_SIZE]); - GUEST_ASSERT_EQ(vmwrite(GUEST_IDTR_LIMIT, 0), 0); - - /* - * VMX disallows injecting an exception with error_code[31:16] != 0, - * and hardware will never generate a VM-Exit with bits 31:16 set. - * KVM should likewise truncate the "bad" userspace value. - */ - GUEST_ASSERT_EQ(vmwrite(EXCEPTION_BITMAP, INTERCEPT_SS_GP_DF), 0); - vmx_run_l2(l2_ss_pending_test, SS_VECTOR, (u16)SS_ERROR_CODE); - vmx_run_l2(l2_ss_injected_gp_test, GP_VECTOR, GP_ERROR_CODE_INTEL); - - GUEST_ASSERT_EQ(vmwrite(EXCEPTION_BITMAP, INTERCEPT_SS_DF), 0); - vmx_run_l2(l2_ss_injected_df_test, DF_VECTOR, DF_ERROR_CODE); - - GUEST_ASSERT_EQ(vmwrite(EXCEPTION_BITMAP, INTERCEPT_SS), 0); - vmx_run_l2(l2_ss_injected_tf_test, FAKE_TRIPLE_FAULT_VECTOR, 0); - GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_TRIPLE_FAULT); - - GUEST_DONE(); -} - -static void __attribute__((__flatten__)) l1_guest_code(void *test_data) -{ - if (this_cpu_has(X86_FEATURE_SVM)) - l1_svm_code(test_data); - else - l1_vmx_code(test_data); -} - -static void assert_ucall_vector(struct kvm_vcpu *vcpu, int vector) -{ - struct ucall uc; - - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); - - switch (get_ucall(vcpu, &uc)) { - case UCALL_SYNC: - TEST_ASSERT(vector == uc.args[1], - "Expected L2 to ask for %d, got %ld", vector, uc.args[1]); - break; - case UCALL_DONE: - TEST_ASSERT(vector == -1, - "Expected L2 to ask for %d, L2 says it's done", vector); - break; - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - break; - default: - TEST_FAIL("Expected L2 to ask for %d, got unexpected ucall %lu", vector, uc.cmd); - } -} - -static void queue_ss_exception(struct kvm_vcpu *vcpu, bool inject) -{ - struct kvm_vcpu_events events; - - vcpu_events_get(vcpu, &events); - - TEST_ASSERT(!events.exception.pending, - "Vector %d unexpectedlt pending", events.exception.nr); - TEST_ASSERT(!events.exception.injected, - "Vector %d unexpectedly injected", events.exception.nr); - - events.flags = KVM_VCPUEVENT_VALID_PAYLOAD; - events.exception.pending = !inject; - events.exception.injected = inject; - events.exception.nr = SS_VECTOR; - events.exception.has_error_code = true; - events.exception.error_code = SS_ERROR_CODE; - vcpu_events_set(vcpu, &events); -} - -/* - * Verify KVM_{G,S}ET_EVENTS play nice with pending vs. injected exceptions - * when an exception is being queued for L2. Specifically, verify that KVM - * honors L1 exception intercept controls when a #SS is pending/injected, - * triggers a #GP on vectoring the #SS, morphs to #DF if #GP isn't intercepted - * by L1, and finally causes (nested) SHUTDOWN if #DF isn't intercepted by L1. - */ -int main(int argc, char *argv[]) -{ - vm_vaddr_t nested_test_data_gva; - struct kvm_vcpu_events events; - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - - TEST_REQUIRE(kvm_has_cap(KVM_CAP_EXCEPTION_PAYLOAD)); - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM) || kvm_cpu_has(X86_FEATURE_VMX)); - - vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); - vm_enable_cap(vm, KVM_CAP_EXCEPTION_PAYLOAD, -2ul); - - if (kvm_cpu_has(X86_FEATURE_SVM)) - vcpu_alloc_svm(vm, &nested_test_data_gva); - else - vcpu_alloc_vmx(vm, &nested_test_data_gva); - - vcpu_args_set(vcpu, 1, nested_test_data_gva); - - /* Run L1 => L2. L2 should sync and request #SS. */ - vcpu_run(vcpu); - assert_ucall_vector(vcpu, SS_VECTOR); - - /* Pend #SS and request immediate exit. #SS should still be pending. */ - queue_ss_exception(vcpu, false); - vcpu->run->immediate_exit = true; - vcpu_run_complete_io(vcpu); - - /* Verify the pending events comes back out the same as it went in. */ - vcpu_events_get(vcpu, &events); - TEST_ASSERT_EQ(events.flags & KVM_VCPUEVENT_VALID_PAYLOAD, - KVM_VCPUEVENT_VALID_PAYLOAD); - TEST_ASSERT_EQ(events.exception.pending, true); - TEST_ASSERT_EQ(events.exception.nr, SS_VECTOR); - TEST_ASSERT_EQ(events.exception.has_error_code, true); - TEST_ASSERT_EQ(events.exception.error_code, SS_ERROR_CODE); - - /* - * Run for real with the pending #SS, L1 should get a VM-Exit due to - * #SS interception and re-enter L2 to request #GP (via injected #SS). - */ - vcpu->run->immediate_exit = false; - vcpu_run(vcpu); - assert_ucall_vector(vcpu, GP_VECTOR); - - /* - * Inject #SS, the #SS should bypass interception and cause #GP, which - * L1 should intercept before KVM morphs it to #DF. L1 should then - * disable #GP interception and run L2 to request #DF (via #SS => #GP). - */ - queue_ss_exception(vcpu, true); - vcpu_run(vcpu); - assert_ucall_vector(vcpu, DF_VECTOR); - - /* - * Inject #SS, the #SS should bypass interception and cause #GP, which - * L1 is no longer interception, and so should see a #DF VM-Exit. L1 - * should then signal that is done. - */ - queue_ss_exception(vcpu, true); - vcpu_run(vcpu); - assert_ucall_vector(vcpu, FAKE_TRIPLE_FAULT_VECTOR); - - /* - * Inject #SS yet again. L1 is not intercepting #GP or #DF, and so - * should see nested TRIPLE_FAULT / SHUTDOWN. - */ - queue_ss_exception(vcpu, true); - vcpu_run(vcpu); - assert_ucall_vector(vcpu, -1); - - kvm_vm_free(vm); -} diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c deleted file mode 100644 index e7efb2b35f8b..000000000000 --- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c +++ /dev/null @@ -1,266 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Usage: to be run via nx_huge_page_test.sh, which does the necessary - * environment setup and teardown - * - * Copyright (C) 2022, Google LLC. - */ -#include -#include -#include - -#include -#include "kvm_util.h" -#include "processor.h" - -#define HPAGE_SLOT 10 -#define HPAGE_GPA (4UL << 30) /* 4G prevents collision w/ slot 0 */ -#define HPAGE_GVA HPAGE_GPA /* GVA is arbitrary, so use GPA. */ -#define PAGES_PER_2MB_HUGE_PAGE 512 -#define HPAGE_SLOT_NPAGES (3 * PAGES_PER_2MB_HUGE_PAGE) - -/* - * Passed by nx_huge_pages_test.sh to provide an easy warning if this test is - * being run without it. - */ -#define MAGIC_TOKEN 887563923 - -/* - * x86 opcode for the return instruction. Used to call into, and then - * immediately return from, memory backed with hugepages. - */ -#define RETURN_OPCODE 0xC3 - -/* Call the specified memory address. */ -static void guest_do_CALL(uint64_t target) -{ - ((void (*)(void)) target)(); -} - -/* - * Exit the VM after each memory access so that the userspace component of the - * test can make assertions about the pages backing the VM. - * - * See the below for an explanation of how each access should affect the - * backing mappings. - */ -void guest_code(void) -{ - uint64_t hpage_1 = HPAGE_GVA; - uint64_t hpage_2 = hpage_1 + (PAGE_SIZE * 512); - uint64_t hpage_3 = hpage_2 + (PAGE_SIZE * 512); - - READ_ONCE(*(uint64_t *)hpage_1); - GUEST_SYNC(1); - - READ_ONCE(*(uint64_t *)hpage_2); - GUEST_SYNC(2); - - guest_do_CALL(hpage_1); - GUEST_SYNC(3); - - guest_do_CALL(hpage_3); - GUEST_SYNC(4); - - READ_ONCE(*(uint64_t *)hpage_1); - GUEST_SYNC(5); - - READ_ONCE(*(uint64_t *)hpage_3); - GUEST_SYNC(6); -} - -static void check_2m_page_count(struct kvm_vm *vm, int expected_pages_2m) -{ - int actual_pages_2m; - - actual_pages_2m = vm_get_stat(vm, "pages_2m"); - - TEST_ASSERT(actual_pages_2m == expected_pages_2m, - "Unexpected 2m page count. Expected %d, got %d", - expected_pages_2m, actual_pages_2m); -} - -static void check_split_count(struct kvm_vm *vm, int expected_splits) -{ - int actual_splits; - - actual_splits = vm_get_stat(vm, "nx_lpage_splits"); - - TEST_ASSERT(actual_splits == expected_splits, - "Unexpected NX huge page split count. Expected %d, got %d", - expected_splits, actual_splits); -} - -static void wait_for_reclaim(int reclaim_period_ms) -{ - long reclaim_wait_ms; - struct timespec ts; - - reclaim_wait_ms = reclaim_period_ms * 5; - ts.tv_sec = reclaim_wait_ms / 1000; - ts.tv_nsec = (reclaim_wait_ms - (ts.tv_sec * 1000)) * 1000000; - nanosleep(&ts, NULL); -} - -void run_test(int reclaim_period_ms, bool disable_nx_huge_pages, - bool reboot_permissions) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - uint64_t nr_bytes; - void *hva; - int r; - - vm = vm_create(1); - - if (disable_nx_huge_pages) { - r = __vm_disable_nx_huge_pages(vm); - if (reboot_permissions) { - TEST_ASSERT(!r, "Disabling NX huge pages should succeed if process has reboot permissions"); - } else { - TEST_ASSERT(r == -1 && errno == EPERM, - "This process should not have permission to disable NX huge pages"); - return; - } - } - - vcpu = vm_vcpu_add(vm, 0, guest_code); - - vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS_HUGETLB, - HPAGE_GPA, HPAGE_SLOT, - HPAGE_SLOT_NPAGES, 0); - - nr_bytes = HPAGE_SLOT_NPAGES * vm->page_size; - - /* - * Ensure that KVM can map HPAGE_SLOT with huge pages by mapping the - * region into the guest with 2MiB pages whenever TDP is disabled (i.e. - * whenever KVM is shadowing the guest page tables). - * - * When TDP is enabled, KVM should be able to map HPAGE_SLOT with huge - * pages irrespective of the guest page size, so map with 4KiB pages - * to test that that is the case. - */ - if (kvm_is_tdp_enabled()) - virt_map_level(vm, HPAGE_GVA, HPAGE_GPA, nr_bytes, PG_LEVEL_4K); - else - virt_map_level(vm, HPAGE_GVA, HPAGE_GPA, nr_bytes, PG_LEVEL_2M); - - hva = addr_gpa2hva(vm, HPAGE_GPA); - memset(hva, RETURN_OPCODE, nr_bytes); - - check_2m_page_count(vm, 0); - check_split_count(vm, 0); - - /* - * The guest code will first read from the first hugepage, resulting - * in a huge page mapping being created. - */ - vcpu_run(vcpu); - check_2m_page_count(vm, 1); - check_split_count(vm, 0); - - /* - * Then the guest code will read from the second hugepage, resulting - * in another huge page mapping being created. - */ - vcpu_run(vcpu); - check_2m_page_count(vm, 2); - check_split_count(vm, 0); - - /* - * Next, the guest will execute from the first huge page, causing it - * to be remapped at 4k. - * - * If NX huge pages are disabled, this should have no effect. - */ - vcpu_run(vcpu); - check_2m_page_count(vm, disable_nx_huge_pages ? 2 : 1); - check_split_count(vm, disable_nx_huge_pages ? 0 : 1); - - /* - * Executing from the third huge page (previously unaccessed) will - * cause part to be mapped at 4k. - * - * If NX huge pages are disabled, it should be mapped at 2M. - */ - vcpu_run(vcpu); - check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 1); - check_split_count(vm, disable_nx_huge_pages ? 0 : 2); - - /* Reading from the first huge page again should have no effect. */ - vcpu_run(vcpu); - check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 1); - check_split_count(vm, disable_nx_huge_pages ? 0 : 2); - - /* Give recovery thread time to run. */ - wait_for_reclaim(reclaim_period_ms); - - /* - * Now that the reclaimer has run, all the split pages should be gone. - * - * If NX huge pages are disabled, the relaimer will not run, so - * nothing should change from here on. - */ - check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 1); - check_split_count(vm, 0); - - /* - * The 4k mapping on hpage 3 should have been removed, so check that - * reading from it causes a huge page mapping to be installed. - */ - vcpu_run(vcpu); - check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 2); - check_split_count(vm, 0); - - kvm_vm_free(vm); -} - -static void help(char *name) -{ - puts(""); - printf("usage: %s [-h] [-p period_ms] [-t token]\n", name); - puts(""); - printf(" -p: The NX reclaim period in milliseconds.\n"); - printf(" -t: The magic token to indicate environment setup is done.\n"); - printf(" -r: The test has reboot permissions and can disable NX huge pages.\n"); - puts(""); - exit(0); -} - -int main(int argc, char **argv) -{ - int reclaim_period_ms = 0, token = 0, opt; - bool reboot_permissions = false; - - while ((opt = getopt(argc, argv, "hp:t:r")) != -1) { - switch (opt) { - case 'p': - reclaim_period_ms = atoi_positive("Reclaim period", optarg); - break; - case 't': - token = atoi_paranoid(optarg); - break; - case 'r': - reboot_permissions = true; - break; - case 'h': - default: - help(argv[0]); - break; - } - } - - TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_DISABLE_NX_HUGE_PAGES)); - - __TEST_REQUIRE(token == MAGIC_TOKEN, - "This test must be run with the magic token via '-t %d'.\n" - "Running via nx_huge_pages_test.sh, which also handles " - "environment setup, is strongly recommended.", MAGIC_TOKEN); - - run_test(reclaim_period_ms, false, reboot_permissions); - run_test(reclaim_period_ms, true, reboot_permissions); - - return 0; -} - diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh deleted file mode 100755 index caad084b8bfd..000000000000 --- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh +++ /dev/null @@ -1,69 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0-only */ -# -# Wrapper script which performs setup and cleanup for nx_huge_pages_test. -# Makes use of root privileges to set up huge pages and KVM module parameters. -# -# Copyright (C) 2022, Google LLC. - -set -e - -NX_HUGE_PAGES=$(cat /sys/module/kvm/parameters/nx_huge_pages) -NX_HUGE_PAGES_RECOVERY_RATIO=$(cat /sys/module/kvm/parameters/nx_huge_pages_recovery_ratio) -NX_HUGE_PAGES_RECOVERY_PERIOD=$(cat /sys/module/kvm/parameters/nx_huge_pages_recovery_period_ms) -HUGE_PAGES=$(cat /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages) - -# If we're already root, the host might not have sudo. -if [ $(whoami) == "root" ]; then - function do_sudo () { - "$@" - } -else - function do_sudo () { - sudo "$@" - } -fi - -set +e - -function sudo_echo () { - echo "$1" | do_sudo tee -a "$2" > /dev/null -} - -NXECUTABLE="$(dirname $0)/nx_huge_pages_test" - -sudo_echo test /dev/null || exit 4 # KSFT_SKIP=4 - -( - set -e - - sudo_echo 1 /sys/module/kvm/parameters/nx_huge_pages - sudo_echo 1 /sys/module/kvm/parameters/nx_huge_pages_recovery_ratio - sudo_echo 100 /sys/module/kvm/parameters/nx_huge_pages_recovery_period_ms - sudo_echo "$(( $HUGE_PAGES + 3 ))" /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages - - # Test with reboot permissions - if [ $(whoami) == "root" ] || sudo setcap cap_sys_boot+ep $NXECUTABLE 2> /dev/null; then - echo Running test with CAP_SYS_BOOT enabled - $NXECUTABLE -t 887563923 -p 100 -r - test $(whoami) == "root" || sudo setcap cap_sys_boot-ep $NXECUTABLE - else - echo setcap failed, skipping nx_huge_pages_test with CAP_SYS_BOOT enabled - fi - - # Test without reboot permissions - if [ $(whoami) != "root" ] ; then - echo Running test with CAP_SYS_BOOT disabled - $NXECUTABLE -t 887563923 -p 100 - else - echo Running as root, skipping nx_huge_pages_test with CAP_SYS_BOOT disabled - fi -) -RET=$? - -sudo_echo "$NX_HUGE_PAGES" /sys/module/kvm/parameters/nx_huge_pages -sudo_echo "$NX_HUGE_PAGES_RECOVERY_RATIO" /sys/module/kvm/parameters/nx_huge_pages_recovery_ratio -sudo_echo "$NX_HUGE_PAGES_RECOVERY_PERIOD" /sys/module/kvm/parameters/nx_huge_pages_recovery_period_ms -sudo_echo "$HUGE_PAGES" /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages - -exit $RET diff --git a/tools/testing/selftests/kvm/x86_64/platform_info_test.c b/tools/testing/selftests/kvm/x86_64/platform_info_test.c deleted file mode 100644 index 9cbf283ebc55..000000000000 --- a/tools/testing/selftests/kvm/x86_64/platform_info_test.c +++ /dev/null @@ -1,78 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Test for x86 KVM_CAP_MSR_PLATFORM_INFO - * - * Copyright (C) 2018, Google LLC. - * - * This work is licensed under the terms of the GNU GPL, version 2. - * - * Verifies expected behavior of controlling guest access to - * MSR_PLATFORM_INFO. - */ -#include -#include -#include -#include -#include - -#include "test_util.h" -#include "kvm_util.h" -#include "processor.h" - -#define MSR_PLATFORM_INFO_MAX_TURBO_RATIO 0xff00 - -static void guest_code(void) -{ - uint64_t msr_platform_info; - uint8_t vector; - - GUEST_SYNC(true); - msr_platform_info = rdmsr(MSR_PLATFORM_INFO); - GUEST_ASSERT_EQ(msr_platform_info & MSR_PLATFORM_INFO_MAX_TURBO_RATIO, - MSR_PLATFORM_INFO_MAX_TURBO_RATIO); - - GUEST_SYNC(false); - vector = rdmsr_safe(MSR_PLATFORM_INFO, &msr_platform_info); - GUEST_ASSERT_EQ(vector, GP_VECTOR); - - GUEST_DONE(); -} - -int main(int argc, char *argv[]) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - uint64_t msr_platform_info; - struct ucall uc; - - TEST_REQUIRE(kvm_has_cap(KVM_CAP_MSR_PLATFORM_INFO)); - - vm = vm_create_with_one_vcpu(&vcpu, guest_code); - - msr_platform_info = vcpu_get_msr(vcpu, MSR_PLATFORM_INFO); - vcpu_set_msr(vcpu, MSR_PLATFORM_INFO, - msr_platform_info | MSR_PLATFORM_INFO_MAX_TURBO_RATIO); - - for (;;) { - vcpu_run(vcpu); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); - - switch (get_ucall(vcpu, &uc)) { - case UCALL_SYNC: - vm_enable_cap(vm, KVM_CAP_MSR_PLATFORM_INFO, uc.args[1]); - break; - case UCALL_DONE: - goto done; - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - default: - TEST_FAIL("Unexpected ucall %lu", uc.cmd); - break; - } - } - -done: - kvm_vm_free(vm); - - return 0; -} diff --git a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c b/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c deleted file mode 100644 index 698cb36989db..000000000000 --- a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c +++ /dev/null @@ -1,644 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2023, Tencent, Inc. - */ -#include - -#include "pmu.h" -#include "processor.h" - -/* Number of iterations of the loop for the guest measurement payload. */ -#define NUM_LOOPS 10 - -/* Each iteration of the loop retires one branch instruction. */ -#define NUM_BRANCH_INSNS_RETIRED (NUM_LOOPS) - -/* - * Number of instructions in each loop. 1 CLFLUSH/CLFLUSHOPT/NOP, 1 MFENCE, - * 1 LOOP. - */ -#define NUM_INSNS_PER_LOOP 3 - -/* - * Number of "extra" instructions that will be counted, i.e. the number of - * instructions that are needed to set up the loop and then disable the - * counter. 2 MOV, 2 XOR, 1 WRMSR. - */ -#define NUM_EXTRA_INSNS 5 - -/* Total number of instructions retired within the measured section. */ -#define NUM_INSNS_RETIRED (NUM_LOOPS * NUM_INSNS_PER_LOOP + NUM_EXTRA_INSNS) - - -static uint8_t kvm_pmu_version; -static bool kvm_has_perf_caps; - -static struct kvm_vm *pmu_vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, - void *guest_code, - uint8_t pmu_version, - uint64_t perf_capabilities) -{ - struct kvm_vm *vm; - - vm = vm_create_with_one_vcpu(vcpu, guest_code); - sync_global_to_guest(vm, kvm_pmu_version); - - /* - * Set PERF_CAPABILITIES before PMU version as KVM disallows enabling - * features via PERF_CAPABILITIES if the guest doesn't have a vPMU. - */ - if (kvm_has_perf_caps) - vcpu_set_msr(*vcpu, MSR_IA32_PERF_CAPABILITIES, perf_capabilities); - - vcpu_set_cpuid_property(*vcpu, X86_PROPERTY_PMU_VERSION, pmu_version); - return vm; -} - -static void run_vcpu(struct kvm_vcpu *vcpu) -{ - struct ucall uc; - - do { - vcpu_run(vcpu); - switch (get_ucall(vcpu, &uc)) { - case UCALL_SYNC: - break; - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - break; - case UCALL_PRINTF: - pr_info("%s", uc.buffer); - break; - case UCALL_DONE: - break; - default: - TEST_FAIL("Unexpected ucall: %lu", uc.cmd); - } - } while (uc.cmd != UCALL_DONE); -} - -static uint8_t guest_get_pmu_version(void) -{ - /* - * Return the effective PMU version, i.e. the minimum between what KVM - * supports and what is enumerated to the guest. The host deliberately - * advertises a PMU version to the guest beyond what is actually - * supported by KVM to verify KVM doesn't freak out and do something - * bizarre with an architecturally valid, but unsupported, version. - */ - return min_t(uint8_t, kvm_pmu_version, this_cpu_property(X86_PROPERTY_PMU_VERSION)); -} - -/* - * If an architectural event is supported and guaranteed to generate at least - * one "hit, assert that its count is non-zero. If an event isn't supported or - * the test can't guarantee the associated action will occur, then all bets are - * off regarding the count, i.e. no checks can be done. - * - * Sanity check that in all cases, the event doesn't count when it's disabled, - * and that KVM correctly emulates the write of an arbitrary value. - */ -static void guest_assert_event_count(uint8_t idx, - struct kvm_x86_pmu_feature event, - uint32_t pmc, uint32_t pmc_msr) -{ - uint64_t count; - - count = _rdpmc(pmc); - if (!this_pmu_has(event)) - goto sanity_checks; - - switch (idx) { - case INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX: - GUEST_ASSERT_EQ(count, NUM_INSNS_RETIRED); - break; - case INTEL_ARCH_BRANCHES_RETIRED_INDEX: - GUEST_ASSERT_EQ(count, NUM_BRANCH_INSNS_RETIRED); - break; - case INTEL_ARCH_LLC_REFERENCES_INDEX: - case INTEL_ARCH_LLC_MISSES_INDEX: - if (!this_cpu_has(X86_FEATURE_CLFLUSHOPT) && - !this_cpu_has(X86_FEATURE_CLFLUSH)) - break; - fallthrough; - case INTEL_ARCH_CPU_CYCLES_INDEX: - case INTEL_ARCH_REFERENCE_CYCLES_INDEX: - GUEST_ASSERT_NE(count, 0); - break; - case INTEL_ARCH_TOPDOWN_SLOTS_INDEX: - GUEST_ASSERT(count >= NUM_INSNS_RETIRED); - break; - default: - break; - } - -sanity_checks: - __asm__ __volatile__("loop ." : "+c"((int){NUM_LOOPS})); - GUEST_ASSERT_EQ(_rdpmc(pmc), count); - - wrmsr(pmc_msr, 0xdead); - GUEST_ASSERT_EQ(_rdpmc(pmc), 0xdead); -} - -/* - * Enable and disable the PMC in a monolithic asm blob to ensure that the - * compiler can't insert _any_ code into the measured sequence. Note, ECX - * doesn't need to be clobbered as the input value, @pmc_msr, is restored - * before the end of the sequence. - * - * If CLFUSH{,OPT} is supported, flush the cacheline containing (at least) the - * CLFUSH{,OPT} instruction on each loop iteration to force LLC references and - * misses, i.e. to allow testing that those events actually count. - * - * If forced emulation is enabled (and specified), force emulation on a subset - * of the measured code to verify that KVM correctly emulates instructions and - * branches retired events in conjunction with hardware also counting said - * events. - */ -#define GUEST_MEASURE_EVENT(_msr, _value, clflush, FEP) \ -do { \ - __asm__ __volatile__("wrmsr\n\t" \ - " mov $" __stringify(NUM_LOOPS) ", %%ecx\n\t" \ - "1:\n\t" \ - clflush "\n\t" \ - "mfence\n\t" \ - FEP "loop 1b\n\t" \ - FEP "mov %%edi, %%ecx\n\t" \ - FEP "xor %%eax, %%eax\n\t" \ - FEP "xor %%edx, %%edx\n\t" \ - "wrmsr\n\t" \ - :: "a"((uint32_t)_value), "d"(_value >> 32), \ - "c"(_msr), "D"(_msr) \ - ); \ -} while (0) - -#define GUEST_TEST_EVENT(_idx, _event, _pmc, _pmc_msr, _ctrl_msr, _value, FEP) \ -do { \ - wrmsr(pmc_msr, 0); \ - \ - if (this_cpu_has(X86_FEATURE_CLFLUSHOPT)) \ - GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflushopt .", FEP); \ - else if (this_cpu_has(X86_FEATURE_CLFLUSH)) \ - GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflush .", FEP); \ - else \ - GUEST_MEASURE_EVENT(_ctrl_msr, _value, "nop", FEP); \ - \ - guest_assert_event_count(_idx, _event, _pmc, _pmc_msr); \ -} while (0) - -static void __guest_test_arch_event(uint8_t idx, struct kvm_x86_pmu_feature event, - uint32_t pmc, uint32_t pmc_msr, - uint32_t ctrl_msr, uint64_t ctrl_msr_value) -{ - GUEST_TEST_EVENT(idx, event, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, ""); - - if (is_forced_emulation_enabled) - GUEST_TEST_EVENT(idx, event, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, KVM_FEP); -} - -#define X86_PMU_FEATURE_NULL \ -({ \ - struct kvm_x86_pmu_feature feature = {}; \ - \ - feature; \ -}) - -static bool pmu_is_null_feature(struct kvm_x86_pmu_feature event) -{ - return !(*(u64 *)&event); -} - -static void guest_test_arch_event(uint8_t idx) -{ - const struct { - struct kvm_x86_pmu_feature gp_event; - struct kvm_x86_pmu_feature fixed_event; - } intel_event_to_feature[] = { - [INTEL_ARCH_CPU_CYCLES_INDEX] = { X86_PMU_FEATURE_CPU_CYCLES, X86_PMU_FEATURE_CPU_CYCLES_FIXED }, - [INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX] = { X86_PMU_FEATURE_INSNS_RETIRED, X86_PMU_FEATURE_INSNS_RETIRED_FIXED }, - /* - * Note, the fixed counter for reference cycles is NOT the same - * as the general purpose architectural event. The fixed counter - * explicitly counts at the same frequency as the TSC, whereas - * the GP event counts at a fixed, but uarch specific, frequency. - * Bundle them here for simplicity. - */ - [INTEL_ARCH_REFERENCE_CYCLES_INDEX] = { X86_PMU_FEATURE_REFERENCE_CYCLES, X86_PMU_FEATURE_REFERENCE_TSC_CYCLES_FIXED }, - [INTEL_ARCH_LLC_REFERENCES_INDEX] = { X86_PMU_FEATURE_LLC_REFERENCES, X86_PMU_FEATURE_NULL }, - [INTEL_ARCH_LLC_MISSES_INDEX] = { X86_PMU_FEATURE_LLC_MISSES, X86_PMU_FEATURE_NULL }, - [INTEL_ARCH_BRANCHES_RETIRED_INDEX] = { X86_PMU_FEATURE_BRANCH_INSNS_RETIRED, X86_PMU_FEATURE_NULL }, - [INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX] = { X86_PMU_FEATURE_BRANCHES_MISPREDICTED, X86_PMU_FEATURE_NULL }, - [INTEL_ARCH_TOPDOWN_SLOTS_INDEX] = { X86_PMU_FEATURE_TOPDOWN_SLOTS, X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED }, - }; - - uint32_t nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS); - uint32_t pmu_version = guest_get_pmu_version(); - /* PERF_GLOBAL_CTRL exists only for Architectural PMU Version 2+. */ - bool guest_has_perf_global_ctrl = pmu_version >= 2; - struct kvm_x86_pmu_feature gp_event, fixed_event; - uint32_t base_pmc_msr; - unsigned int i; - - /* The host side shouldn't invoke this without a guest PMU. */ - GUEST_ASSERT(pmu_version); - - if (this_cpu_has(X86_FEATURE_PDCM) && - rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES) - base_pmc_msr = MSR_IA32_PMC0; - else - base_pmc_msr = MSR_IA32_PERFCTR0; - - gp_event = intel_event_to_feature[idx].gp_event; - GUEST_ASSERT_EQ(idx, gp_event.f.bit); - - GUEST_ASSERT(nr_gp_counters); - - for (i = 0; i < nr_gp_counters; i++) { - uint64_t eventsel = ARCH_PERFMON_EVENTSEL_OS | - ARCH_PERFMON_EVENTSEL_ENABLE | - intel_pmu_arch_events[idx]; - - wrmsr(MSR_P6_EVNTSEL0 + i, 0); - if (guest_has_perf_global_ctrl) - wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, BIT_ULL(i)); - - __guest_test_arch_event(idx, gp_event, i, base_pmc_msr + i, - MSR_P6_EVNTSEL0 + i, eventsel); - } - - if (!guest_has_perf_global_ctrl) - return; - - fixed_event = intel_event_to_feature[idx].fixed_event; - if (pmu_is_null_feature(fixed_event) || !this_pmu_has(fixed_event)) - return; - - i = fixed_event.f.bit; - - wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL)); - - __guest_test_arch_event(idx, fixed_event, i | INTEL_RDPMC_FIXED, - MSR_CORE_PERF_FIXED_CTR0 + i, - MSR_CORE_PERF_GLOBAL_CTRL, - FIXED_PMC_GLOBAL_CTRL_ENABLE(i)); -} - -static void guest_test_arch_events(void) -{ - uint8_t i; - - for (i = 0; i < NR_INTEL_ARCH_EVENTS; i++) - guest_test_arch_event(i); - - GUEST_DONE(); -} - -static void test_arch_events(uint8_t pmu_version, uint64_t perf_capabilities, - uint8_t length, uint8_t unavailable_mask) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - - /* Testing arch events requires a vPMU (there are no negative tests). */ - if (!pmu_version) - return; - - vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_arch_events, - pmu_version, perf_capabilities); - - vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH, - length); - vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_EVENTS_MASK, - unavailable_mask); - - run_vcpu(vcpu); - - kvm_vm_free(vm); -} - -/* - * Limit testing to MSRs that are actually defined by Intel (in the SDM). MSRs - * that aren't defined counter MSRs *probably* don't exist, but there's no - * guarantee that currently undefined MSR indices won't be used for something - * other than PMCs in the future. - */ -#define MAX_NR_GP_COUNTERS 8 -#define MAX_NR_FIXED_COUNTERS 3 - -#define GUEST_ASSERT_PMC_MSR_ACCESS(insn, msr, expect_gp, vector) \ -__GUEST_ASSERT(expect_gp ? vector == GP_VECTOR : !vector, \ - "Expected %s on " #insn "(0x%x), got vector %u", \ - expect_gp ? "#GP" : "no fault", msr, vector) \ - -#define GUEST_ASSERT_PMC_VALUE(insn, msr, val, expected) \ - __GUEST_ASSERT(val == expected_val, \ - "Expected " #insn "(0x%x) to yield 0x%lx, got 0x%lx", \ - msr, expected_val, val); - -static void guest_test_rdpmc(uint32_t rdpmc_idx, bool expect_success, - uint64_t expected_val) -{ - uint8_t vector; - uint64_t val; - - vector = rdpmc_safe(rdpmc_idx, &val); - GUEST_ASSERT_PMC_MSR_ACCESS(RDPMC, rdpmc_idx, !expect_success, vector); - if (expect_success) - GUEST_ASSERT_PMC_VALUE(RDPMC, rdpmc_idx, val, expected_val); - - if (!is_forced_emulation_enabled) - return; - - vector = rdpmc_safe_fep(rdpmc_idx, &val); - GUEST_ASSERT_PMC_MSR_ACCESS(RDPMC, rdpmc_idx, !expect_success, vector); - if (expect_success) - GUEST_ASSERT_PMC_VALUE(RDPMC, rdpmc_idx, val, expected_val); -} - -static void guest_rd_wr_counters(uint32_t base_msr, uint8_t nr_possible_counters, - uint8_t nr_counters, uint32_t or_mask) -{ - const bool pmu_has_fast_mode = !guest_get_pmu_version(); - uint8_t i; - - for (i = 0; i < nr_possible_counters; i++) { - /* - * TODO: Test a value that validates full-width writes and the - * width of the counters. - */ - const uint64_t test_val = 0xffff; - const uint32_t msr = base_msr + i; - - /* - * Fixed counters are supported if the counter is less than the - * number of enumerated contiguous counters *or* the counter is - * explicitly enumerated in the supported counters mask. - */ - const bool expect_success = i < nr_counters || (or_mask & BIT(i)); - - /* - * KVM drops writes to MSR_P6_PERFCTR[0|1] if the counters are - * unsupported, i.e. doesn't #GP and reads back '0'. - */ - const uint64_t expected_val = expect_success ? test_val : 0; - const bool expect_gp = !expect_success && msr != MSR_P6_PERFCTR0 && - msr != MSR_P6_PERFCTR1; - uint32_t rdpmc_idx; - uint8_t vector; - uint64_t val; - - vector = wrmsr_safe(msr, test_val); - GUEST_ASSERT_PMC_MSR_ACCESS(WRMSR, msr, expect_gp, vector); - - vector = rdmsr_safe(msr, &val); - GUEST_ASSERT_PMC_MSR_ACCESS(RDMSR, msr, expect_gp, vector); - - /* On #GP, the result of RDMSR is undefined. */ - if (!expect_gp) - GUEST_ASSERT_PMC_VALUE(RDMSR, msr, val, expected_val); - - /* - * Redo the read tests with RDPMC, which has different indexing - * semantics and additional capabilities. - */ - rdpmc_idx = i; - if (base_msr == MSR_CORE_PERF_FIXED_CTR0) - rdpmc_idx |= INTEL_RDPMC_FIXED; - - guest_test_rdpmc(rdpmc_idx, expect_success, expected_val); - - /* - * KVM doesn't support non-architectural PMUs, i.e. it should - * impossible to have fast mode RDPMC. Verify that attempting - * to use fast RDPMC always #GPs. - */ - GUEST_ASSERT(!expect_success || !pmu_has_fast_mode); - rdpmc_idx |= INTEL_RDPMC_FAST; - guest_test_rdpmc(rdpmc_idx, false, -1ull); - - vector = wrmsr_safe(msr, 0); - GUEST_ASSERT_PMC_MSR_ACCESS(WRMSR, msr, expect_gp, vector); - } -} - -static void guest_test_gp_counters(void) -{ - uint8_t pmu_version = guest_get_pmu_version(); - uint8_t nr_gp_counters = 0; - uint32_t base_msr; - - if (pmu_version) - nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS); - - /* - * For v2+ PMUs, PERF_GLOBAL_CTRL's architectural post-RESET value is - * "Sets bits n-1:0 and clears the upper bits", where 'n' is the number - * of GP counters. If there are no GP counters, require KVM to leave - * PERF_GLOBAL_CTRL '0'. This edge case isn't covered by the SDM, but - * follow the spirit of the architecture and only globally enable GP - * counters, of which there are none. - */ - if (pmu_version > 1) { - uint64_t global_ctrl = rdmsr(MSR_CORE_PERF_GLOBAL_CTRL); - - if (nr_gp_counters) - GUEST_ASSERT_EQ(global_ctrl, GENMASK_ULL(nr_gp_counters - 1, 0)); - else - GUEST_ASSERT_EQ(global_ctrl, 0); - } - - if (this_cpu_has(X86_FEATURE_PDCM) && - rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES) - base_msr = MSR_IA32_PMC0; - else - base_msr = MSR_IA32_PERFCTR0; - - guest_rd_wr_counters(base_msr, MAX_NR_GP_COUNTERS, nr_gp_counters, 0); - GUEST_DONE(); -} - -static void test_gp_counters(uint8_t pmu_version, uint64_t perf_capabilities, - uint8_t nr_gp_counters) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - - vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_gp_counters, - pmu_version, perf_capabilities); - - vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_NR_GP_COUNTERS, - nr_gp_counters); - - run_vcpu(vcpu); - - kvm_vm_free(vm); -} - -static void guest_test_fixed_counters(void) -{ - uint64_t supported_bitmask = 0; - uint8_t nr_fixed_counters = 0; - uint8_t i; - - /* Fixed counters require Architectural vPMU Version 2+. */ - if (guest_get_pmu_version() >= 2) - nr_fixed_counters = this_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS); - - /* - * The supported bitmask for fixed counters was introduced in PMU - * version 5. - */ - if (guest_get_pmu_version() >= 5) - supported_bitmask = this_cpu_property(X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK); - - guest_rd_wr_counters(MSR_CORE_PERF_FIXED_CTR0, MAX_NR_FIXED_COUNTERS, - nr_fixed_counters, supported_bitmask); - - for (i = 0; i < MAX_NR_FIXED_COUNTERS; i++) { - uint8_t vector; - uint64_t val; - - if (i >= nr_fixed_counters && !(supported_bitmask & BIT_ULL(i))) { - vector = wrmsr_safe(MSR_CORE_PERF_FIXED_CTR_CTRL, - FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL)); - __GUEST_ASSERT(vector == GP_VECTOR, - "Expected #GP for counter %u in FIXED_CTR_CTRL", i); - - vector = wrmsr_safe(MSR_CORE_PERF_GLOBAL_CTRL, - FIXED_PMC_GLOBAL_CTRL_ENABLE(i)); - __GUEST_ASSERT(vector == GP_VECTOR, - "Expected #GP for counter %u in PERF_GLOBAL_CTRL", i); - continue; - } - - wrmsr(MSR_CORE_PERF_FIXED_CTR0 + i, 0); - wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL)); - wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, FIXED_PMC_GLOBAL_CTRL_ENABLE(i)); - __asm__ __volatile__("loop ." : "+c"((int){NUM_LOOPS})); - wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0); - val = rdmsr(MSR_CORE_PERF_FIXED_CTR0 + i); - - GUEST_ASSERT_NE(val, 0); - } - GUEST_DONE(); -} - -static void test_fixed_counters(uint8_t pmu_version, uint64_t perf_capabilities, - uint8_t nr_fixed_counters, - uint32_t supported_bitmask) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - - vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_fixed_counters, - pmu_version, perf_capabilities); - - vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK, - supported_bitmask); - vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_NR_FIXED_COUNTERS, - nr_fixed_counters); - - run_vcpu(vcpu); - - kvm_vm_free(vm); -} - -static void test_intel_counters(void) -{ - uint8_t nr_arch_events = kvm_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH); - uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS); - uint8_t nr_gp_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS); - uint8_t pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION); - unsigned int i; - uint8_t v, j; - uint32_t k; - - const uint64_t perf_caps[] = { - 0, - PMU_CAP_FW_WRITES, - }; - - /* - * Test up to PMU v5, which is the current maximum version defined by - * Intel, i.e. is the last version that is guaranteed to be backwards - * compatible with KVM's existing behavior. - */ - uint8_t max_pmu_version = max_t(typeof(pmu_version), pmu_version, 5); - - /* - * Detect the existence of events that aren't supported by selftests. - * This will (obviously) fail any time the kernel adds support for a - * new event, but it's worth paying that price to keep the test fresh. - */ - TEST_ASSERT(nr_arch_events <= NR_INTEL_ARCH_EVENTS, - "New architectural event(s) detected; please update this test (length = %u, mask = %x)", - nr_arch_events, kvm_cpu_property(X86_PROPERTY_PMU_EVENTS_MASK)); - - /* - * Force iterating over known arch events regardless of whether or not - * KVM/hardware supports a given event. - */ - nr_arch_events = max_t(typeof(nr_arch_events), nr_arch_events, NR_INTEL_ARCH_EVENTS); - - for (v = 0; v <= max_pmu_version; v++) { - for (i = 0; i < ARRAY_SIZE(perf_caps); i++) { - if (!kvm_has_perf_caps && perf_caps[i]) - continue; - - pr_info("Testing arch events, PMU version %u, perf_caps = %lx\n", - v, perf_caps[i]); - /* - * To keep the total runtime reasonable, test every - * possible non-zero, non-reserved bitmap combination - * only with the native PMU version and the full bit - * vector length. - */ - if (v == pmu_version) { - for (k = 1; k < (BIT(nr_arch_events) - 1); k++) - test_arch_events(v, perf_caps[i], nr_arch_events, k); - } - /* - * Test single bits for all PMU version and lengths up - * the number of events +1 (to verify KVM doesn't do - * weird things if the guest length is greater than the - * host length). Explicitly test a mask of '0' and all - * ones i.e. all events being available and unavailable. - */ - for (j = 0; j <= nr_arch_events + 1; j++) { - test_arch_events(v, perf_caps[i], j, 0); - test_arch_events(v, perf_caps[i], j, 0xff); - - for (k = 0; k < nr_arch_events; k++) - test_arch_events(v, perf_caps[i], j, BIT(k)); - } - - pr_info("Testing GP counters, PMU version %u, perf_caps = %lx\n", - v, perf_caps[i]); - for (j = 0; j <= nr_gp_counters; j++) - test_gp_counters(v, perf_caps[i], j); - - pr_info("Testing fixed counters, PMU version %u, perf_caps = %lx\n", - v, perf_caps[i]); - for (j = 0; j <= nr_fixed_counters; j++) { - for (k = 0; k <= (BIT(nr_fixed_counters) - 1); k++) - test_fixed_counters(v, perf_caps[i], j, k); - } - } - } -} - -int main(int argc, char *argv[]) -{ - TEST_REQUIRE(kvm_is_pmu_enabled()); - - TEST_REQUIRE(host_cpu_is_intel); - TEST_REQUIRE(kvm_cpu_has_p(X86_PROPERTY_PMU_VERSION)); - TEST_REQUIRE(kvm_cpu_property(X86_PROPERTY_PMU_VERSION) > 0); - - kvm_pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION); - kvm_has_perf_caps = kvm_cpu_has(X86_FEATURE_PDCM); - - test_intel_counters(); - - return 0; -} diff --git a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c deleted file mode 100644 index c15513cd74d1..000000000000 --- a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c +++ /dev/null @@ -1,876 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Test for x86 KVM_SET_PMU_EVENT_FILTER. - * - * Copyright (C) 2022, Google LLC. - * - * This work is licensed under the terms of the GNU GPL, version 2. - * - * Verifies the expected behavior of allow lists and deny lists for - * virtual PMU events. - */ -#include "kvm_util.h" -#include "pmu.h" -#include "processor.h" -#include "test_util.h" - -#define NUM_BRANCHES 42 -#define MAX_TEST_EVENTS 10 - -#define PMU_EVENT_FILTER_INVALID_ACTION (KVM_PMU_EVENT_DENY + 1) -#define PMU_EVENT_FILTER_INVALID_FLAGS (KVM_PMU_EVENT_FLAGS_VALID_MASK << 1) -#define PMU_EVENT_FILTER_INVALID_NEVENTS (KVM_PMU_EVENT_FILTER_MAX_EVENTS + 1) - -struct __kvm_pmu_event_filter { - __u32 action; - __u32 nevents; - __u32 fixed_counter_bitmap; - __u32 flags; - __u32 pad[4]; - __u64 events[KVM_PMU_EVENT_FILTER_MAX_EVENTS]; -}; - -/* - * This event list comprises Intel's known architectural events, plus AMD's - * Branch Instructions Retired for Zen CPUs. Note, AMD and Intel use the - * same encoding for Instructions Retired. - */ -kvm_static_assert(INTEL_ARCH_INSTRUCTIONS_RETIRED == AMD_ZEN_INSTRUCTIONS_RETIRED); - -static const struct __kvm_pmu_event_filter base_event_filter = { - .nevents = ARRAY_SIZE(base_event_filter.events), - .events = { - INTEL_ARCH_CPU_CYCLES, - INTEL_ARCH_INSTRUCTIONS_RETIRED, - INTEL_ARCH_REFERENCE_CYCLES, - INTEL_ARCH_LLC_REFERENCES, - INTEL_ARCH_LLC_MISSES, - INTEL_ARCH_BRANCHES_RETIRED, - INTEL_ARCH_BRANCHES_MISPREDICTED, - INTEL_ARCH_TOPDOWN_SLOTS, - AMD_ZEN_BRANCHES_RETIRED, - }, -}; - -struct { - uint64_t loads; - uint64_t stores; - uint64_t loads_stores; - uint64_t branches_retired; - uint64_t instructions_retired; -} pmc_results; - -/* - * If we encounter a #GP during the guest PMU sanity check, then the guest - * PMU is not functional. Inform the hypervisor via GUEST_SYNC(0). - */ -static void guest_gp_handler(struct ex_regs *regs) -{ - GUEST_SYNC(-EFAULT); -} - -/* - * Check that we can write a new value to the given MSR and read it back. - * The caller should provide a non-empty set of bits that are safe to flip. - * - * Return on success. GUEST_SYNC(0) on error. - */ -static void check_msr(uint32_t msr, uint64_t bits_to_flip) -{ - uint64_t v = rdmsr(msr) ^ bits_to_flip; - - wrmsr(msr, v); - if (rdmsr(msr) != v) - GUEST_SYNC(-EIO); - - v ^= bits_to_flip; - wrmsr(msr, v); - if (rdmsr(msr) != v) - GUEST_SYNC(-EIO); -} - -static void run_and_measure_loop(uint32_t msr_base) -{ - const uint64_t branches_retired = rdmsr(msr_base + 0); - const uint64_t insn_retired = rdmsr(msr_base + 1); - - __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES})); - - pmc_results.branches_retired = rdmsr(msr_base + 0) - branches_retired; - pmc_results.instructions_retired = rdmsr(msr_base + 1) - insn_retired; -} - -static void intel_guest_code(void) -{ - check_msr(MSR_CORE_PERF_GLOBAL_CTRL, 1); - check_msr(MSR_P6_EVNTSEL0, 0xffff); - check_msr(MSR_IA32_PMC0, 0xffff); - GUEST_SYNC(0); - - for (;;) { - wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0); - wrmsr(MSR_P6_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE | - ARCH_PERFMON_EVENTSEL_OS | INTEL_ARCH_BRANCHES_RETIRED); - wrmsr(MSR_P6_EVNTSEL1, ARCH_PERFMON_EVENTSEL_ENABLE | - ARCH_PERFMON_EVENTSEL_OS | INTEL_ARCH_INSTRUCTIONS_RETIRED); - wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0x3); - - run_and_measure_loop(MSR_IA32_PMC0); - GUEST_SYNC(0); - } -} - -/* - * To avoid needing a check for CPUID.80000001:ECX.PerfCtrExtCore[bit 23], - * this code uses the always-available, legacy K7 PMU MSRs, which alias to - * the first four of the six extended core PMU MSRs. - */ -static void amd_guest_code(void) -{ - check_msr(MSR_K7_EVNTSEL0, 0xffff); - check_msr(MSR_K7_PERFCTR0, 0xffff); - GUEST_SYNC(0); - - for (;;) { - wrmsr(MSR_K7_EVNTSEL0, 0); - wrmsr(MSR_K7_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE | - ARCH_PERFMON_EVENTSEL_OS | AMD_ZEN_BRANCHES_RETIRED); - wrmsr(MSR_K7_EVNTSEL1, ARCH_PERFMON_EVENTSEL_ENABLE | - ARCH_PERFMON_EVENTSEL_OS | AMD_ZEN_INSTRUCTIONS_RETIRED); - - run_and_measure_loop(MSR_K7_PERFCTR0); - GUEST_SYNC(0); - } -} - -/* - * Run the VM to the next GUEST_SYNC(value), and return the value passed - * to the sync. Any other exit from the guest is fatal. - */ -static uint64_t run_vcpu_to_sync(struct kvm_vcpu *vcpu) -{ - struct ucall uc; - - vcpu_run(vcpu); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); - get_ucall(vcpu, &uc); - TEST_ASSERT(uc.cmd == UCALL_SYNC, - "Received ucall other than UCALL_SYNC: %lu", uc.cmd); - return uc.args[1]; -} - -static void run_vcpu_and_sync_pmc_results(struct kvm_vcpu *vcpu) -{ - uint64_t r; - - memset(&pmc_results, 0, sizeof(pmc_results)); - sync_global_to_guest(vcpu->vm, pmc_results); - - r = run_vcpu_to_sync(vcpu); - TEST_ASSERT(!r, "Unexpected sync value: 0x%lx", r); - - sync_global_from_guest(vcpu->vm, pmc_results); -} - -/* - * In a nested environment or if the vPMU is disabled, the guest PMU - * might not work as architected (accessing the PMU MSRs may raise - * #GP, or writes could simply be discarded). In those situations, - * there is no point in running these tests. The guest code will perform - * a sanity check and then GUEST_SYNC(success). In the case of failure, - * the behavior of the guest on resumption is undefined. - */ -static bool sanity_check_pmu(struct kvm_vcpu *vcpu) -{ - uint64_t r; - - vm_install_exception_handler(vcpu->vm, GP_VECTOR, guest_gp_handler); - r = run_vcpu_to_sync(vcpu); - vm_install_exception_handler(vcpu->vm, GP_VECTOR, NULL); - - return !r; -} - -/* - * Remove the first occurrence of 'event' (if any) from the filter's - * event list. - */ -static void remove_event(struct __kvm_pmu_event_filter *f, uint64_t event) -{ - bool found = false; - int i; - - for (i = 0; i < f->nevents; i++) { - if (found) - f->events[i - 1] = f->events[i]; - else - found = f->events[i] == event; - } - if (found) - f->nevents--; -} - -#define ASSERT_PMC_COUNTING_INSTRUCTIONS() \ -do { \ - uint64_t br = pmc_results.branches_retired; \ - uint64_t ir = pmc_results.instructions_retired; \ - \ - if (br && br != NUM_BRANCHES) \ - pr_info("%s: Branch instructions retired = %lu (expected %u)\n", \ - __func__, br, NUM_BRANCHES); \ - TEST_ASSERT(br, "%s: Branch instructions retired = %lu (expected > 0)", \ - __func__, br); \ - TEST_ASSERT(ir, "%s: Instructions retired = %lu (expected > 0)", \ - __func__, ir); \ -} while (0) - -#define ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS() \ -do { \ - uint64_t br = pmc_results.branches_retired; \ - uint64_t ir = pmc_results.instructions_retired; \ - \ - TEST_ASSERT(!br, "%s: Branch instructions retired = %lu (expected 0)", \ - __func__, br); \ - TEST_ASSERT(!ir, "%s: Instructions retired = %lu (expected 0)", \ - __func__, ir); \ -} while (0) - -static void test_without_filter(struct kvm_vcpu *vcpu) -{ - run_vcpu_and_sync_pmc_results(vcpu); - - ASSERT_PMC_COUNTING_INSTRUCTIONS(); -} - -static void test_with_filter(struct kvm_vcpu *vcpu, - struct __kvm_pmu_event_filter *__f) -{ - struct kvm_pmu_event_filter *f = (void *)__f; - - vm_ioctl(vcpu->vm, KVM_SET_PMU_EVENT_FILTER, f); - run_vcpu_and_sync_pmc_results(vcpu); -} - -static void test_amd_deny_list(struct kvm_vcpu *vcpu) -{ - struct __kvm_pmu_event_filter f = { - .action = KVM_PMU_EVENT_DENY, - .nevents = 1, - .events = { - RAW_EVENT(0x1C2, 0), - }, - }; - - test_with_filter(vcpu, &f); - - ASSERT_PMC_COUNTING_INSTRUCTIONS(); -} - -static void test_member_deny_list(struct kvm_vcpu *vcpu) -{ - struct __kvm_pmu_event_filter f = base_event_filter; - - f.action = KVM_PMU_EVENT_DENY; - test_with_filter(vcpu, &f); - - ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS(); -} - -static void test_member_allow_list(struct kvm_vcpu *vcpu) -{ - struct __kvm_pmu_event_filter f = base_event_filter; - - f.action = KVM_PMU_EVENT_ALLOW; - test_with_filter(vcpu, &f); - - ASSERT_PMC_COUNTING_INSTRUCTIONS(); -} - -static void test_not_member_deny_list(struct kvm_vcpu *vcpu) -{ - struct __kvm_pmu_event_filter f = base_event_filter; - - f.action = KVM_PMU_EVENT_DENY; - - remove_event(&f, INTEL_ARCH_INSTRUCTIONS_RETIRED); - remove_event(&f, INTEL_ARCH_BRANCHES_RETIRED); - remove_event(&f, AMD_ZEN_BRANCHES_RETIRED); - test_with_filter(vcpu, &f); - - ASSERT_PMC_COUNTING_INSTRUCTIONS(); -} - -static void test_not_member_allow_list(struct kvm_vcpu *vcpu) -{ - struct __kvm_pmu_event_filter f = base_event_filter; - - f.action = KVM_PMU_EVENT_ALLOW; - - remove_event(&f, INTEL_ARCH_INSTRUCTIONS_RETIRED); - remove_event(&f, INTEL_ARCH_BRANCHES_RETIRED); - remove_event(&f, AMD_ZEN_BRANCHES_RETIRED); - test_with_filter(vcpu, &f); - - ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS(); -} - -/* - * Verify that setting KVM_PMU_CAP_DISABLE prevents the use of the PMU. - * - * Note that KVM_CAP_PMU_CAPABILITY must be invoked prior to creating VCPUs. - */ -static void test_pmu_config_disable(void (*guest_code)(void)) -{ - struct kvm_vcpu *vcpu; - int r; - struct kvm_vm *vm; - - r = kvm_check_cap(KVM_CAP_PMU_CAPABILITY); - if (!(r & KVM_PMU_CAP_DISABLE)) - return; - - vm = vm_create(1); - - vm_enable_cap(vm, KVM_CAP_PMU_CAPABILITY, KVM_PMU_CAP_DISABLE); - - vcpu = vm_vcpu_add(vm, 0, guest_code); - TEST_ASSERT(!sanity_check_pmu(vcpu), - "Guest should not be able to use disabled PMU."); - - kvm_vm_free(vm); -} - -/* - * On Intel, check for a non-zero PMU version, at least one general-purpose - * counter per logical processor, and support for counting the number of branch - * instructions retired. - */ -static bool use_intel_pmu(void) -{ - return host_cpu_is_intel && - kvm_cpu_property(X86_PROPERTY_PMU_VERSION) && - kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS) && - kvm_pmu_has(X86_PMU_FEATURE_BRANCH_INSNS_RETIRED); -} - -/* - * On AMD, all Family 17h+ CPUs (Zen and its successors) use event encoding - * 0xc2,0 for Branch Instructions Retired. - */ -static bool use_amd_pmu(void) -{ - return host_cpu_is_amd && kvm_cpu_family() >= 0x17; -} - -/* - * "MEM_INST_RETIRED.ALL_LOADS", "MEM_INST_RETIRED.ALL_STORES", and - * "MEM_INST_RETIRED.ANY" from https://perfmon-events.intel.com/ - * supported on Intel Xeon processors: - * - Sapphire Rapids, Ice Lake, Cascade Lake, Skylake. - */ -#define MEM_INST_RETIRED 0xD0 -#define MEM_INST_RETIRED_LOAD RAW_EVENT(MEM_INST_RETIRED, 0x81) -#define MEM_INST_RETIRED_STORE RAW_EVENT(MEM_INST_RETIRED, 0x82) -#define MEM_INST_RETIRED_LOAD_STORE RAW_EVENT(MEM_INST_RETIRED, 0x83) - -static bool supports_event_mem_inst_retired(void) -{ - uint32_t eax, ebx, ecx, edx; - - cpuid(1, &eax, &ebx, &ecx, &edx); - if (x86_family(eax) == 0x6) { - switch (x86_model(eax)) { - /* Sapphire Rapids */ - case 0x8F: - /* Ice Lake */ - case 0x6A: - /* Skylake */ - /* Cascade Lake */ - case 0x55: - return true; - } - } - - return false; -} - -/* - * "LS Dispatch", from Processor Programming Reference - * (PPR) for AMD Family 17h Model 01h, Revision B1 Processors, - * Preliminary Processor Programming Reference (PPR) for AMD Family - * 17h Model 31h, Revision B0 Processors, and Preliminary Processor - * Programming Reference (PPR) for AMD Family 19h Model 01h, Revision - * B1 Processors Volume 1 of 2. - */ -#define LS_DISPATCH 0x29 -#define LS_DISPATCH_LOAD RAW_EVENT(LS_DISPATCH, BIT(0)) -#define LS_DISPATCH_STORE RAW_EVENT(LS_DISPATCH, BIT(1)) -#define LS_DISPATCH_LOAD_STORE RAW_EVENT(LS_DISPATCH, BIT(2)) - -#define INCLUDE_MASKED_ENTRY(event_select, mask, match) \ - KVM_PMU_ENCODE_MASKED_ENTRY(event_select, mask, match, false) -#define EXCLUDE_MASKED_ENTRY(event_select, mask, match) \ - KVM_PMU_ENCODE_MASKED_ENTRY(event_select, mask, match, true) - -static void masked_events_guest_test(uint32_t msr_base) -{ - /* - * The actual value of the counters don't determine the outcome of - * the test. Only that they are zero or non-zero. - */ - const uint64_t loads = rdmsr(msr_base + 0); - const uint64_t stores = rdmsr(msr_base + 1); - const uint64_t loads_stores = rdmsr(msr_base + 2); - int val; - - - __asm__ __volatile__("movl $0, %[v];" - "movl %[v], %%eax;" - "incl %[v];" - : [v]"+m"(val) :: "eax"); - - pmc_results.loads = rdmsr(msr_base + 0) - loads; - pmc_results.stores = rdmsr(msr_base + 1) - stores; - pmc_results.loads_stores = rdmsr(msr_base + 2) - loads_stores; -} - -static void intel_masked_events_guest_code(void) -{ - for (;;) { - wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0); - - wrmsr(MSR_P6_EVNTSEL0 + 0, ARCH_PERFMON_EVENTSEL_ENABLE | - ARCH_PERFMON_EVENTSEL_OS | MEM_INST_RETIRED_LOAD); - wrmsr(MSR_P6_EVNTSEL0 + 1, ARCH_PERFMON_EVENTSEL_ENABLE | - ARCH_PERFMON_EVENTSEL_OS | MEM_INST_RETIRED_STORE); - wrmsr(MSR_P6_EVNTSEL0 + 2, ARCH_PERFMON_EVENTSEL_ENABLE | - ARCH_PERFMON_EVENTSEL_OS | MEM_INST_RETIRED_LOAD_STORE); - - wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0x7); - - masked_events_guest_test(MSR_IA32_PMC0); - GUEST_SYNC(0); - } -} - -static void amd_masked_events_guest_code(void) -{ - for (;;) { - wrmsr(MSR_K7_EVNTSEL0, 0); - wrmsr(MSR_K7_EVNTSEL1, 0); - wrmsr(MSR_K7_EVNTSEL2, 0); - - wrmsr(MSR_K7_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE | - ARCH_PERFMON_EVENTSEL_OS | LS_DISPATCH_LOAD); - wrmsr(MSR_K7_EVNTSEL1, ARCH_PERFMON_EVENTSEL_ENABLE | - ARCH_PERFMON_EVENTSEL_OS | LS_DISPATCH_STORE); - wrmsr(MSR_K7_EVNTSEL2, ARCH_PERFMON_EVENTSEL_ENABLE | - ARCH_PERFMON_EVENTSEL_OS | LS_DISPATCH_LOAD_STORE); - - masked_events_guest_test(MSR_K7_PERFCTR0); - GUEST_SYNC(0); - } -} - -static void run_masked_events_test(struct kvm_vcpu *vcpu, - const uint64_t masked_events[], - const int nmasked_events) -{ - struct __kvm_pmu_event_filter f = { - .nevents = nmasked_events, - .action = KVM_PMU_EVENT_ALLOW, - .flags = KVM_PMU_EVENT_FLAG_MASKED_EVENTS, - }; - - memcpy(f.events, masked_events, sizeof(uint64_t) * nmasked_events); - test_with_filter(vcpu, &f); -} - -#define ALLOW_LOADS BIT(0) -#define ALLOW_STORES BIT(1) -#define ALLOW_LOADS_STORES BIT(2) - -struct masked_events_test { - uint64_t intel_events[MAX_TEST_EVENTS]; - uint64_t intel_event_end; - uint64_t amd_events[MAX_TEST_EVENTS]; - uint64_t amd_event_end; - const char *msg; - uint32_t flags; -}; - -/* - * These are the test cases for the masked events tests. - * - * For each test, the guest enables 3 PMU counters (loads, stores, - * loads + stores). The filter is then set in KVM with the masked events - * provided. The test then verifies that the counters agree with which - * ones should be counting and which ones should be filtered. - */ -const struct masked_events_test test_cases[] = { - { - .intel_events = { - INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFF, 0x81), - }, - .amd_events = { - INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xFF, BIT(0)), - }, - .msg = "Only allow loads.", - .flags = ALLOW_LOADS, - }, { - .intel_events = { - INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFF, 0x82), - }, - .amd_events = { - INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xFF, BIT(1)), - }, - .msg = "Only allow stores.", - .flags = ALLOW_STORES, - }, { - .intel_events = { - INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFF, 0x83), - }, - .amd_events = { - INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xFF, BIT(2)), - }, - .msg = "Only allow loads + stores.", - .flags = ALLOW_LOADS_STORES, - }, { - .intel_events = { - INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0x7C, 0), - EXCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFF, 0x83), - }, - .amd_events = { - INCLUDE_MASKED_ENTRY(LS_DISPATCH, ~(BIT(0) | BIT(1)), 0), - }, - .msg = "Only allow loads and stores.", - .flags = ALLOW_LOADS | ALLOW_STORES, - }, { - .intel_events = { - INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0x7C, 0), - EXCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFF, 0x82), - }, - .amd_events = { - INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xF8, 0), - EXCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xFF, BIT(1)), - }, - .msg = "Only allow loads and loads + stores.", - .flags = ALLOW_LOADS | ALLOW_LOADS_STORES - }, { - .intel_events = { - INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFE, 0x82), - }, - .amd_events = { - INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xF8, 0), - EXCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xFF, BIT(0)), - }, - .msg = "Only allow stores and loads + stores.", - .flags = ALLOW_STORES | ALLOW_LOADS_STORES - }, { - .intel_events = { - INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0x7C, 0), - }, - .amd_events = { - INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xF8, 0), - }, - .msg = "Only allow loads, stores, and loads + stores.", - .flags = ALLOW_LOADS | ALLOW_STORES | ALLOW_LOADS_STORES - }, -}; - -static int append_test_events(const struct masked_events_test *test, - uint64_t *events, int nevents) -{ - const uint64_t *evts; - int i; - - evts = use_intel_pmu() ? test->intel_events : test->amd_events; - for (i = 0; i < MAX_TEST_EVENTS; i++) { - if (evts[i] == 0) - break; - - events[nevents + i] = evts[i]; - } - - return nevents + i; -} - -static bool bool_eq(bool a, bool b) -{ - return a == b; -} - -static void run_masked_events_tests(struct kvm_vcpu *vcpu, uint64_t *events, - int nevents) -{ - int ntests = ARRAY_SIZE(test_cases); - int i, n; - - for (i = 0; i < ntests; i++) { - const struct masked_events_test *test = &test_cases[i]; - - /* Do any test case events overflow MAX_TEST_EVENTS? */ - assert(test->intel_event_end == 0); - assert(test->amd_event_end == 0); - - n = append_test_events(test, events, nevents); - - run_masked_events_test(vcpu, events, n); - - TEST_ASSERT(bool_eq(pmc_results.loads, test->flags & ALLOW_LOADS) && - bool_eq(pmc_results.stores, test->flags & ALLOW_STORES) && - bool_eq(pmc_results.loads_stores, - test->flags & ALLOW_LOADS_STORES), - "%s loads: %lu, stores: %lu, loads + stores: %lu", - test->msg, pmc_results.loads, pmc_results.stores, - pmc_results.loads_stores); - } -} - -static void add_dummy_events(uint64_t *events, int nevents) -{ - int i; - - for (i = 0; i < nevents; i++) { - int event_select = i % 0xFF; - bool exclude = ((i % 4) == 0); - - if (event_select == MEM_INST_RETIRED || - event_select == LS_DISPATCH) - event_select++; - - events[i] = KVM_PMU_ENCODE_MASKED_ENTRY(event_select, 0, - 0, exclude); - } -} - -static void test_masked_events(struct kvm_vcpu *vcpu) -{ - int nevents = KVM_PMU_EVENT_FILTER_MAX_EVENTS - MAX_TEST_EVENTS; - uint64_t events[KVM_PMU_EVENT_FILTER_MAX_EVENTS]; - - /* Run the test cases against a sparse PMU event filter. */ - run_masked_events_tests(vcpu, events, 0); - - /* Run the test cases against a dense PMU event filter. */ - add_dummy_events(events, KVM_PMU_EVENT_FILTER_MAX_EVENTS); - run_masked_events_tests(vcpu, events, nevents); -} - -static int set_pmu_event_filter(struct kvm_vcpu *vcpu, - struct __kvm_pmu_event_filter *__f) -{ - struct kvm_pmu_event_filter *f = (void *)__f; - - return __vm_ioctl(vcpu->vm, KVM_SET_PMU_EVENT_FILTER, f); -} - -static int set_pmu_single_event_filter(struct kvm_vcpu *vcpu, uint64_t event, - uint32_t flags, uint32_t action) -{ - struct __kvm_pmu_event_filter f = { - .nevents = 1, - .flags = flags, - .action = action, - .events = { - event, - }, - }; - - return set_pmu_event_filter(vcpu, &f); -} - -static void test_filter_ioctl(struct kvm_vcpu *vcpu) -{ - uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS); - struct __kvm_pmu_event_filter f; - uint64_t e = ~0ul; - int r; - - /* - * Unfortunately having invalid bits set in event data is expected to - * pass when flags == 0 (bits other than eventsel+umask). - */ - r = set_pmu_single_event_filter(vcpu, e, 0, KVM_PMU_EVENT_ALLOW); - TEST_ASSERT(r == 0, "Valid PMU Event Filter is failing"); - - r = set_pmu_single_event_filter(vcpu, e, - KVM_PMU_EVENT_FLAG_MASKED_EVENTS, - KVM_PMU_EVENT_ALLOW); - TEST_ASSERT(r != 0, "Invalid PMU Event Filter is expected to fail"); - - e = KVM_PMU_ENCODE_MASKED_ENTRY(0xff, 0xff, 0xff, 0xf); - r = set_pmu_single_event_filter(vcpu, e, - KVM_PMU_EVENT_FLAG_MASKED_EVENTS, - KVM_PMU_EVENT_ALLOW); - TEST_ASSERT(r == 0, "Valid PMU Event Filter is failing"); - - f = base_event_filter; - f.action = PMU_EVENT_FILTER_INVALID_ACTION; - r = set_pmu_event_filter(vcpu, &f); - TEST_ASSERT(r, "Set invalid action is expected to fail"); - - f = base_event_filter; - f.flags = PMU_EVENT_FILTER_INVALID_FLAGS; - r = set_pmu_event_filter(vcpu, &f); - TEST_ASSERT(r, "Set invalid flags is expected to fail"); - - f = base_event_filter; - f.nevents = PMU_EVENT_FILTER_INVALID_NEVENTS; - r = set_pmu_event_filter(vcpu, &f); - TEST_ASSERT(r, "Exceeding the max number of filter events should fail"); - - f = base_event_filter; - f.fixed_counter_bitmap = ~GENMASK_ULL(nr_fixed_counters, 0); - r = set_pmu_event_filter(vcpu, &f); - TEST_ASSERT(!r, "Masking non-existent fixed counters should be allowed"); -} - -static void intel_run_fixed_counter_guest_code(uint8_t idx) -{ - for (;;) { - wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0); - wrmsr(MSR_CORE_PERF_FIXED_CTR0 + idx, 0); - - /* Only OS_EN bit is enabled for fixed counter[idx]. */ - wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(idx, FIXED_PMC_KERNEL)); - wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, FIXED_PMC_GLOBAL_CTRL_ENABLE(idx)); - __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES})); - wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0); - - GUEST_SYNC(rdmsr(MSR_CORE_PERF_FIXED_CTR0 + idx)); - } -} - -static uint64_t test_with_fixed_counter_filter(struct kvm_vcpu *vcpu, - uint32_t action, uint32_t bitmap) -{ - struct __kvm_pmu_event_filter f = { - .action = action, - .fixed_counter_bitmap = bitmap, - }; - set_pmu_event_filter(vcpu, &f); - - return run_vcpu_to_sync(vcpu); -} - -static uint64_t test_set_gp_and_fixed_event_filter(struct kvm_vcpu *vcpu, - uint32_t action, - uint32_t bitmap) -{ - struct __kvm_pmu_event_filter f = base_event_filter; - - f.action = action; - f.fixed_counter_bitmap = bitmap; - set_pmu_event_filter(vcpu, &f); - - return run_vcpu_to_sync(vcpu); -} - -static void __test_fixed_counter_bitmap(struct kvm_vcpu *vcpu, uint8_t idx, - uint8_t nr_fixed_counters) -{ - unsigned int i; - uint32_t bitmap; - uint64_t count; - - TEST_ASSERT(nr_fixed_counters < sizeof(bitmap) * 8, - "Invalid nr_fixed_counters"); - - /* - * Check the fixed performance counter can count normally when KVM - * userspace doesn't set any pmu filter. - */ - count = run_vcpu_to_sync(vcpu); - TEST_ASSERT(count, "Unexpected count value: %ld", count); - - for (i = 0; i < BIT(nr_fixed_counters); i++) { - bitmap = BIT(i); - count = test_with_fixed_counter_filter(vcpu, KVM_PMU_EVENT_ALLOW, - bitmap); - TEST_ASSERT_EQ(!!count, !!(bitmap & BIT(idx))); - - count = test_with_fixed_counter_filter(vcpu, KVM_PMU_EVENT_DENY, - bitmap); - TEST_ASSERT_EQ(!!count, !(bitmap & BIT(idx))); - - /* - * Check that fixed_counter_bitmap has higher priority than - * events[] when both are set. - */ - count = test_set_gp_and_fixed_event_filter(vcpu, - KVM_PMU_EVENT_ALLOW, - bitmap); - TEST_ASSERT_EQ(!!count, !!(bitmap & BIT(idx))); - - count = test_set_gp_and_fixed_event_filter(vcpu, - KVM_PMU_EVENT_DENY, - bitmap); - TEST_ASSERT_EQ(!!count, !(bitmap & BIT(idx))); - } -} - -static void test_fixed_counter_bitmap(void) -{ - uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS); - struct kvm_vm *vm; - struct kvm_vcpu *vcpu; - uint8_t idx; - - /* - * Check that pmu_event_filter works as expected when it's applied to - * fixed performance counters. - */ - for (idx = 0; idx < nr_fixed_counters; idx++) { - vm = vm_create_with_one_vcpu(&vcpu, - intel_run_fixed_counter_guest_code); - vcpu_args_set(vcpu, 1, idx); - __test_fixed_counter_bitmap(vcpu, idx, nr_fixed_counters); - kvm_vm_free(vm); - } -} - -int main(int argc, char *argv[]) -{ - void (*guest_code)(void); - struct kvm_vcpu *vcpu, *vcpu2 = NULL; - struct kvm_vm *vm; - - TEST_REQUIRE(kvm_is_pmu_enabled()); - TEST_REQUIRE(kvm_has_cap(KVM_CAP_PMU_EVENT_FILTER)); - TEST_REQUIRE(kvm_has_cap(KVM_CAP_PMU_EVENT_MASKED_EVENTS)); - - TEST_REQUIRE(use_intel_pmu() || use_amd_pmu()); - guest_code = use_intel_pmu() ? intel_guest_code : amd_guest_code; - - vm = vm_create_with_one_vcpu(&vcpu, guest_code); - - TEST_REQUIRE(sanity_check_pmu(vcpu)); - - if (use_amd_pmu()) - test_amd_deny_list(vcpu); - - test_without_filter(vcpu); - test_member_deny_list(vcpu); - test_member_allow_list(vcpu); - test_not_member_deny_list(vcpu); - test_not_member_allow_list(vcpu); - - if (use_intel_pmu() && - supports_event_mem_inst_retired() && - kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS) >= 3) - vcpu2 = vm_vcpu_add(vm, 2, intel_masked_events_guest_code); - else if (use_amd_pmu()) - vcpu2 = vm_vcpu_add(vm, 2, amd_masked_events_guest_code); - - if (vcpu2) - test_masked_events(vcpu2); - test_filter_ioctl(vcpu); - - kvm_vm_free(vm); - - test_pmu_config_disable(guest_code); - test_fixed_counter_bitmap(); - - return 0; -} diff --git a/tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c b/tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c deleted file mode 100644 index 82a8d88b5338..000000000000 --- a/tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c +++ /dev/null @@ -1,483 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2022, Google LLC. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include -#include -#include - -#define BASE_DATA_SLOT 10 -#define BASE_DATA_GPA ((uint64_t)(1ull << 32)) -#define PER_CPU_DATA_SIZE ((uint64_t)(SZ_2M + PAGE_SIZE)) - -/* Horrific macro so that the line info is captured accurately :-( */ -#define memcmp_g(gpa, pattern, size) \ -do { \ - uint8_t *mem = (uint8_t *)gpa; \ - size_t i; \ - \ - for (i = 0; i < size; i++) \ - __GUEST_ASSERT(mem[i] == pattern, \ - "Guest expected 0x%x at offset %lu (gpa 0x%lx), got 0x%x", \ - pattern, i, gpa + i, mem[i]); \ -} while (0) - -static void memcmp_h(uint8_t *mem, uint64_t gpa, uint8_t pattern, size_t size) -{ - size_t i; - - for (i = 0; i < size; i++) - TEST_ASSERT(mem[i] == pattern, - "Host expected 0x%x at gpa 0x%lx, got 0x%x", - pattern, gpa + i, mem[i]); -} - -/* - * Run memory conversion tests with explicit conversion: - * Execute KVM hypercall to map/unmap gpa range which will cause userspace exit - * to back/unback private memory. Subsequent accesses by guest to the gpa range - * will not cause exit to userspace. - * - * Test memory conversion scenarios with following steps: - * 1) Access private memory using private access and verify that memory contents - * are not visible to userspace. - * 2) Convert memory to shared using explicit conversions and ensure that - * userspace is able to access the shared regions. - * 3) Convert memory back to private using explicit conversions and ensure that - * userspace is again not able to access converted private regions. - */ - -#define GUEST_STAGE(o, s) { .offset = o, .size = s } - -enum ucall_syncs { - SYNC_SHARED, - SYNC_PRIVATE, -}; - -static void guest_sync_shared(uint64_t gpa, uint64_t size, - uint8_t current_pattern, uint8_t new_pattern) -{ - GUEST_SYNC5(SYNC_SHARED, gpa, size, current_pattern, new_pattern); -} - -static void guest_sync_private(uint64_t gpa, uint64_t size, uint8_t pattern) -{ - GUEST_SYNC4(SYNC_PRIVATE, gpa, size, pattern); -} - -/* Arbitrary values, KVM doesn't care about the attribute flags. */ -#define MAP_GPA_SET_ATTRIBUTES BIT(0) -#define MAP_GPA_SHARED BIT(1) -#define MAP_GPA_DO_FALLOCATE BIT(2) - -static void guest_map_mem(uint64_t gpa, uint64_t size, bool map_shared, - bool do_fallocate) -{ - uint64_t flags = MAP_GPA_SET_ATTRIBUTES; - - if (map_shared) - flags |= MAP_GPA_SHARED; - if (do_fallocate) - flags |= MAP_GPA_DO_FALLOCATE; - kvm_hypercall_map_gpa_range(gpa, size, flags); -} - -static void guest_map_shared(uint64_t gpa, uint64_t size, bool do_fallocate) -{ - guest_map_mem(gpa, size, true, do_fallocate); -} - -static void guest_map_private(uint64_t gpa, uint64_t size, bool do_fallocate) -{ - guest_map_mem(gpa, size, false, do_fallocate); -} - -struct { - uint64_t offset; - uint64_t size; -} static const test_ranges[] = { - GUEST_STAGE(0, PAGE_SIZE), - GUEST_STAGE(0, SZ_2M), - GUEST_STAGE(PAGE_SIZE, PAGE_SIZE), - GUEST_STAGE(PAGE_SIZE, SZ_2M), - GUEST_STAGE(SZ_2M, PAGE_SIZE), -}; - -static void guest_test_explicit_conversion(uint64_t base_gpa, bool do_fallocate) -{ - const uint8_t def_p = 0xaa; - const uint8_t init_p = 0xcc; - uint64_t j; - int i; - - /* Memory should be shared by default. */ - memset((void *)base_gpa, def_p, PER_CPU_DATA_SIZE); - memcmp_g(base_gpa, def_p, PER_CPU_DATA_SIZE); - guest_sync_shared(base_gpa, PER_CPU_DATA_SIZE, def_p, init_p); - - memcmp_g(base_gpa, init_p, PER_CPU_DATA_SIZE); - - for (i = 0; i < ARRAY_SIZE(test_ranges); i++) { - uint64_t gpa = base_gpa + test_ranges[i].offset; - uint64_t size = test_ranges[i].size; - uint8_t p1 = 0x11; - uint8_t p2 = 0x22; - uint8_t p3 = 0x33; - uint8_t p4 = 0x44; - - /* - * Set the test region to pattern one to differentiate it from - * the data range as a whole (contains the initial pattern). - */ - memset((void *)gpa, p1, size); - - /* - * Convert to private, set and verify the private data, and - * then verify that the rest of the data (map shared) still - * holds the initial pattern, and that the host always sees the - * shared memory (initial pattern). Unlike shared memory, - * punching a hole in private memory is destructive, i.e. - * previous values aren't guaranteed to be preserved. - */ - guest_map_private(gpa, size, do_fallocate); - - if (size > PAGE_SIZE) { - memset((void *)gpa, p2, PAGE_SIZE); - goto skip; - } - - memset((void *)gpa, p2, size); - guest_sync_private(gpa, size, p1); - - /* - * Verify that the private memory was set to pattern two, and - * that shared memory still holds the initial pattern. - */ - memcmp_g(gpa, p2, size); - if (gpa > base_gpa) - memcmp_g(base_gpa, init_p, gpa - base_gpa); - if (gpa + size < base_gpa + PER_CPU_DATA_SIZE) - memcmp_g(gpa + size, init_p, - (base_gpa + PER_CPU_DATA_SIZE) - (gpa + size)); - - /* - * Convert odd-number page frames back to shared to verify KVM - * also correctly handles holes in private ranges. - */ - for (j = 0; j < size; j += PAGE_SIZE) { - if ((j >> PAGE_SHIFT) & 1) { - guest_map_shared(gpa + j, PAGE_SIZE, do_fallocate); - guest_sync_shared(gpa + j, PAGE_SIZE, p1, p3); - - memcmp_g(gpa + j, p3, PAGE_SIZE); - } else { - guest_sync_private(gpa + j, PAGE_SIZE, p1); - } - } - -skip: - /* - * Convert the entire region back to shared, explicitly write - * pattern three to fill in the even-number frames before - * asking the host to verify (and write pattern four). - */ - guest_map_shared(gpa, size, do_fallocate); - memset((void *)gpa, p3, size); - guest_sync_shared(gpa, size, p3, p4); - memcmp_g(gpa, p4, size); - - /* Reset the shared memory back to the initial pattern. */ - memset((void *)gpa, init_p, size); - - /* - * Free (via PUNCH_HOLE) *all* private memory so that the next - * iteration starts from a clean slate, e.g. with respect to - * whether or not there are pages/folios in guest_mem. - */ - guest_map_shared(base_gpa, PER_CPU_DATA_SIZE, true); - } -} - -static void guest_punch_hole(uint64_t gpa, uint64_t size) -{ - /* "Mapping" memory shared via fallocate() is done via PUNCH_HOLE. */ - uint64_t flags = MAP_GPA_SHARED | MAP_GPA_DO_FALLOCATE; - - kvm_hypercall_map_gpa_range(gpa, size, flags); -} - -/* - * Test that PUNCH_HOLE actually frees memory by punching holes without doing a - * proper conversion. Freeing (PUNCH_HOLE) should zap SPTEs, and reallocating - * (subsequent fault) should zero memory. - */ -static void guest_test_punch_hole(uint64_t base_gpa, bool precise) -{ - const uint8_t init_p = 0xcc; - int i; - - /* - * Convert the entire range to private, this testcase is all about - * punching holes in guest_memfd, i.e. shared mappings aren't needed. - */ - guest_map_private(base_gpa, PER_CPU_DATA_SIZE, false); - - for (i = 0; i < ARRAY_SIZE(test_ranges); i++) { - uint64_t gpa = base_gpa + test_ranges[i].offset; - uint64_t size = test_ranges[i].size; - - /* - * Free all memory before each iteration, even for the !precise - * case where the memory will be faulted back in. Freeing and - * reallocating should obviously work, and freeing all memory - * minimizes the probability of cross-testcase influence. - */ - guest_punch_hole(base_gpa, PER_CPU_DATA_SIZE); - - /* Fault-in and initialize memory, and verify the pattern. */ - if (precise) { - memset((void *)gpa, init_p, size); - memcmp_g(gpa, init_p, size); - } else { - memset((void *)base_gpa, init_p, PER_CPU_DATA_SIZE); - memcmp_g(base_gpa, init_p, PER_CPU_DATA_SIZE); - } - - /* - * Punch a hole at the target range and verify that reads from - * the guest succeed and return zeroes. - */ - guest_punch_hole(gpa, size); - memcmp_g(gpa, 0, size); - } -} - -static void guest_code(uint64_t base_gpa) -{ - /* - * Run the conversion test twice, with and without doing fallocate() on - * the guest_memfd backing when converting between shared and private. - */ - guest_test_explicit_conversion(base_gpa, false); - guest_test_explicit_conversion(base_gpa, true); - - /* - * Run the PUNCH_HOLE test twice too, once with the entire guest_memfd - * faulted in, once with only the target range faulted in. - */ - guest_test_punch_hole(base_gpa, false); - guest_test_punch_hole(base_gpa, true); - GUEST_DONE(); -} - -static void handle_exit_hypercall(struct kvm_vcpu *vcpu) -{ - struct kvm_run *run = vcpu->run; - uint64_t gpa = run->hypercall.args[0]; - uint64_t size = run->hypercall.args[1] * PAGE_SIZE; - bool set_attributes = run->hypercall.args[2] & MAP_GPA_SET_ATTRIBUTES; - bool map_shared = run->hypercall.args[2] & MAP_GPA_SHARED; - bool do_fallocate = run->hypercall.args[2] & MAP_GPA_DO_FALLOCATE; - struct kvm_vm *vm = vcpu->vm; - - TEST_ASSERT(run->hypercall.nr == KVM_HC_MAP_GPA_RANGE, - "Wanted MAP_GPA_RANGE (%u), got '%llu'", - KVM_HC_MAP_GPA_RANGE, run->hypercall.nr); - - if (do_fallocate) - vm_guest_mem_fallocate(vm, gpa, size, map_shared); - - if (set_attributes) - vm_set_memory_attributes(vm, gpa, size, - map_shared ? 0 : KVM_MEMORY_ATTRIBUTE_PRIVATE); - run->hypercall.ret = 0; -} - -static bool run_vcpus; - -static void *__test_mem_conversions(void *__vcpu) -{ - struct kvm_vcpu *vcpu = __vcpu; - struct kvm_run *run = vcpu->run; - struct kvm_vm *vm = vcpu->vm; - struct ucall uc; - - while (!READ_ONCE(run_vcpus)) - ; - - for ( ;; ) { - vcpu_run(vcpu); - - if (run->exit_reason == KVM_EXIT_HYPERCALL) { - handle_exit_hypercall(vcpu); - continue; - } - - TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, - "Wanted KVM_EXIT_IO, got exit reason: %u (%s)", - run->exit_reason, exit_reason_str(run->exit_reason)); - - switch (get_ucall(vcpu, &uc)) { - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - case UCALL_SYNC: { - uint64_t gpa = uc.args[1]; - size_t size = uc.args[2]; - size_t i; - - TEST_ASSERT(uc.args[0] == SYNC_SHARED || - uc.args[0] == SYNC_PRIVATE, - "Unknown sync command '%ld'", uc.args[0]); - - for (i = 0; i < size; i += vm->page_size) { - size_t nr_bytes = min_t(size_t, vm->page_size, size - i); - uint8_t *hva = addr_gpa2hva(vm, gpa + i); - - /* In all cases, the host should observe the shared data. */ - memcmp_h(hva, gpa + i, uc.args[3], nr_bytes); - - /* For shared, write the new pattern to guest memory. */ - if (uc.args[0] == SYNC_SHARED) - memset(hva, uc.args[4], nr_bytes); - } - break; - } - case UCALL_DONE: - return NULL; - default: - TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd); - } - } -} - -static void test_mem_conversions(enum vm_mem_backing_src_type src_type, uint32_t nr_vcpus, - uint32_t nr_memslots) -{ - /* - * Allocate enough memory so that each vCPU's chunk of memory can be - * naturally aligned with respect to the size of the backing store. - */ - const size_t alignment = max_t(size_t, SZ_2M, get_backing_src_pagesz(src_type)); - const size_t per_cpu_size = align_up(PER_CPU_DATA_SIZE, alignment); - const size_t memfd_size = per_cpu_size * nr_vcpus; - const size_t slot_size = memfd_size / nr_memslots; - struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; - pthread_t threads[KVM_MAX_VCPUS]; - struct kvm_vm *vm; - int memfd, i, r; - - const struct vm_shape shape = { - .mode = VM_MODE_DEFAULT, - .type = KVM_X86_SW_PROTECTED_VM, - }; - - TEST_ASSERT(slot_size * nr_memslots == memfd_size, - "The memfd size (0x%lx) needs to be cleanly divisible by the number of memslots (%u)", - memfd_size, nr_memslots); - vm = __vm_create_with_vcpus(shape, nr_vcpus, 0, guest_code, vcpus); - - vm_enable_cap(vm, KVM_CAP_EXIT_HYPERCALL, (1 << KVM_HC_MAP_GPA_RANGE)); - - memfd = vm_create_guest_memfd(vm, memfd_size, 0); - - for (i = 0; i < nr_memslots; i++) - vm_mem_add(vm, src_type, BASE_DATA_GPA + slot_size * i, - BASE_DATA_SLOT + i, slot_size / vm->page_size, - KVM_MEM_GUEST_MEMFD, memfd, slot_size * i); - - for (i = 0; i < nr_vcpus; i++) { - uint64_t gpa = BASE_DATA_GPA + i * per_cpu_size; - - vcpu_args_set(vcpus[i], 1, gpa); - - /* - * Map only what is needed so that an out-of-bounds access - * results #PF => SHUTDOWN instead of data corruption. - */ - virt_map(vm, gpa, gpa, PER_CPU_DATA_SIZE / vm->page_size); - - pthread_create(&threads[i], NULL, __test_mem_conversions, vcpus[i]); - } - - WRITE_ONCE(run_vcpus, true); - - for (i = 0; i < nr_vcpus; i++) - pthread_join(threads[i], NULL); - - kvm_vm_free(vm); - - /* - * Allocate and free memory from the guest_memfd after closing the VM - * fd. The guest_memfd is gifted a reference to its owning VM, i.e. - * should prevent the VM from being fully destroyed until the last - * reference to the guest_memfd is also put. - */ - r = fallocate(memfd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, memfd_size); - TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r)); - - r = fallocate(memfd, FALLOC_FL_KEEP_SIZE, 0, memfd_size); - TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r)); - - close(memfd); -} - -static void usage(const char *cmd) -{ - puts(""); - printf("usage: %s [-h] [-m nr_memslots] [-s mem_type] [-n nr_vcpus]\n", cmd); - puts(""); - backing_src_help("-s"); - puts(""); - puts(" -n: specify the number of vcpus (default: 1)"); - puts(""); - puts(" -m: specify the number of memslots (default: 1)"); - puts(""); -} - -int main(int argc, char *argv[]) -{ - enum vm_mem_backing_src_type src_type = DEFAULT_VM_MEM_SRC; - uint32_t nr_memslots = 1; - uint32_t nr_vcpus = 1; - int opt; - - TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM)); - - while ((opt = getopt(argc, argv, "hm:s:n:")) != -1) { - switch (opt) { - case 's': - src_type = parse_backing_src_type(optarg); - break; - case 'n': - nr_vcpus = atoi_positive("nr_vcpus", optarg); - break; - case 'm': - nr_memslots = atoi_positive("nr_memslots", optarg); - break; - case 'h': - default: - usage(argv[0]); - exit(0); - } - } - - test_mem_conversions(src_type, nr_vcpus, nr_memslots); - - return 0; -} diff --git a/tools/testing/selftests/kvm/x86_64/private_mem_kvm_exits_test.c b/tools/testing/selftests/kvm/x86_64/private_mem_kvm_exits_test.c deleted file mode 100644 index 13e72fcec8dd..000000000000 --- a/tools/testing/selftests/kvm/x86_64/private_mem_kvm_exits_test.c +++ /dev/null @@ -1,120 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2023, Google LLC. - */ -#include -#include -#include - -#include "kvm_util.h" -#include "processor.h" -#include "test_util.h" - -/* Arbitrarily selected to avoid overlaps with anything else */ -#define EXITS_TEST_GVA 0xc0000000 -#define EXITS_TEST_GPA EXITS_TEST_GVA -#define EXITS_TEST_NPAGES 1 -#define EXITS_TEST_SIZE (EXITS_TEST_NPAGES * PAGE_SIZE) -#define EXITS_TEST_SLOT 10 - -static uint64_t guest_repeatedly_read(void) -{ - volatile uint64_t value; - - while (true) - value = *((uint64_t *) EXITS_TEST_GVA); - - return value; -} - -static uint32_t run_vcpu_get_exit_reason(struct kvm_vcpu *vcpu) -{ - int r; - - r = _vcpu_run(vcpu); - if (r) { - TEST_ASSERT(errno == EFAULT, KVM_IOCTL_ERROR(KVM_RUN, r)); - TEST_ASSERT_EQ(vcpu->run->exit_reason, KVM_EXIT_MEMORY_FAULT); - } - return vcpu->run->exit_reason; -} - -const struct vm_shape protected_vm_shape = { - .mode = VM_MODE_DEFAULT, - .type = KVM_X86_SW_PROTECTED_VM, -}; - -static void test_private_access_memslot_deleted(void) -{ - struct kvm_vm *vm; - struct kvm_vcpu *vcpu; - pthread_t vm_thread; - void *thread_return; - uint32_t exit_reason; - - vm = vm_create_shape_with_one_vcpu(protected_vm_shape, &vcpu, - guest_repeatedly_read); - - vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, - EXITS_TEST_GPA, EXITS_TEST_SLOT, - EXITS_TEST_NPAGES, - KVM_MEM_GUEST_MEMFD); - - virt_map(vm, EXITS_TEST_GVA, EXITS_TEST_GPA, EXITS_TEST_NPAGES); - - /* Request to access page privately */ - vm_mem_set_private(vm, EXITS_TEST_GPA, EXITS_TEST_SIZE); - - pthread_create(&vm_thread, NULL, - (void *(*)(void *))run_vcpu_get_exit_reason, - (void *)vcpu); - - vm_mem_region_delete(vm, EXITS_TEST_SLOT); - - pthread_join(vm_thread, &thread_return); - exit_reason = (uint32_t)(uint64_t)thread_return; - - TEST_ASSERT_EQ(exit_reason, KVM_EXIT_MEMORY_FAULT); - TEST_ASSERT_EQ(vcpu->run->memory_fault.flags, KVM_MEMORY_EXIT_FLAG_PRIVATE); - TEST_ASSERT_EQ(vcpu->run->memory_fault.gpa, EXITS_TEST_GPA); - TEST_ASSERT_EQ(vcpu->run->memory_fault.size, EXITS_TEST_SIZE); - - kvm_vm_free(vm); -} - -static void test_private_access_memslot_not_private(void) -{ - struct kvm_vm *vm; - struct kvm_vcpu *vcpu; - uint32_t exit_reason; - - vm = vm_create_shape_with_one_vcpu(protected_vm_shape, &vcpu, - guest_repeatedly_read); - - /* Add a non-private memslot (flags = 0) */ - vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, - EXITS_TEST_GPA, EXITS_TEST_SLOT, - EXITS_TEST_NPAGES, 0); - - virt_map(vm, EXITS_TEST_GVA, EXITS_TEST_GPA, EXITS_TEST_NPAGES); - - /* Request to access page privately */ - vm_mem_set_private(vm, EXITS_TEST_GPA, EXITS_TEST_SIZE); - - exit_reason = run_vcpu_get_exit_reason(vcpu); - - TEST_ASSERT_EQ(exit_reason, KVM_EXIT_MEMORY_FAULT); - TEST_ASSERT_EQ(vcpu->run->memory_fault.flags, KVM_MEMORY_EXIT_FLAG_PRIVATE); - TEST_ASSERT_EQ(vcpu->run->memory_fault.gpa, EXITS_TEST_GPA); - TEST_ASSERT_EQ(vcpu->run->memory_fault.size, EXITS_TEST_SIZE); - - kvm_vm_free(vm); -} - -int main(int argc, char *argv[]) -{ - TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM)); - - test_private_access_memslot_deleted(); - test_private_access_memslot_not_private(); -} diff --git a/tools/testing/selftests/kvm/x86_64/recalc_apic_map_test.c b/tools/testing/selftests/kvm/x86_64/recalc_apic_map_test.c deleted file mode 100644 index cbc92a862ea9..000000000000 --- a/tools/testing/selftests/kvm/x86_64/recalc_apic_map_test.c +++ /dev/null @@ -1,74 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Test edge cases and race conditions in kvm_recalculate_apic_map(). - */ - -#include -#include -#include - -#include "processor.h" -#include "test_util.h" -#include "kvm_util.h" -#include "apic.h" - -#define TIMEOUT 5 /* seconds */ - -#define LAPIC_DISABLED 0 -#define LAPIC_X2APIC (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE) -#define MAX_XAPIC_ID 0xff - -static void *race(void *arg) -{ - struct kvm_lapic_state lapic = {}; - struct kvm_vcpu *vcpu = arg; - - while (1) { - /* Trigger kvm_recalculate_apic_map(). */ - vcpu_ioctl(vcpu, KVM_SET_LAPIC, &lapic); - pthread_testcancel(); - } - - return NULL; -} - -int main(void) -{ - struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; - struct kvm_vcpu *vcpuN; - struct kvm_vm *vm; - pthread_t thread; - time_t t; - int i; - - kvm_static_assert(KVM_MAX_VCPUS > MAX_XAPIC_ID); - - /* - * Create the max number of vCPUs supported by selftests so that KVM - * has decent amount of work to do when recalculating the map, i.e. to - * make the problematic window large enough to hit. - */ - vm = vm_create_with_vcpus(KVM_MAX_VCPUS, NULL, vcpus); - - /* - * Enable x2APIC on all vCPUs so that KVM doesn't bail from the recalc - * due to vCPUs having aliased xAPIC IDs (truncated to 8 bits). - */ - for (i = 0; i < KVM_MAX_VCPUS; i++) - vcpu_set_msr(vcpus[i], MSR_IA32_APICBASE, LAPIC_X2APIC); - - TEST_ASSERT_EQ(pthread_create(&thread, NULL, race, vcpus[0]), 0); - - vcpuN = vcpus[KVM_MAX_VCPUS - 1]; - for (t = time(NULL) + TIMEOUT; time(NULL) < t;) { - vcpu_set_msr(vcpuN, MSR_IA32_APICBASE, LAPIC_X2APIC); - vcpu_set_msr(vcpuN, MSR_IA32_APICBASE, LAPIC_DISABLED); - } - - TEST_ASSERT_EQ(pthread_cancel(thread), 0); - TEST_ASSERT_EQ(pthread_join(thread, NULL), 0); - - kvm_vm_free(vm); - - return 0; -} diff --git a/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c b/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c deleted file mode 100644 index 49913784bc82..000000000000 --- a/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c +++ /dev/null @@ -1,146 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Test that KVM_SET_BOOT_CPU_ID works as intended - * - * Copyright (C) 2020, Red Hat, Inc. - */ -#include -#include -#include -#include -#include - -#include "test_util.h" -#include "kvm_util.h" -#include "processor.h" -#include "apic.h" - -static void guest_bsp_vcpu(void *arg) -{ - GUEST_SYNC(1); - - GUEST_ASSERT_NE(get_bsp_flag(), 0); - - GUEST_DONE(); -} - -static void guest_not_bsp_vcpu(void *arg) -{ - GUEST_SYNC(1); - - GUEST_ASSERT_EQ(get_bsp_flag(), 0); - - GUEST_DONE(); -} - -static void test_set_invalid_bsp(struct kvm_vm *vm) -{ - unsigned long max_vcpu_id = vm_check_cap(vm, KVM_CAP_MAX_VCPU_ID); - int r; - - if (max_vcpu_id) { - r = __vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)(max_vcpu_id + 1)); - TEST_ASSERT(r == -1 && errno == EINVAL, "BSP with ID > MAX should fail"); - } - - r = __vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)(1L << 32)); - TEST_ASSERT(r == -1 && errno == EINVAL, "BSP with ID[63:32]!=0 should fail"); -} - -static void test_set_bsp_busy(struct kvm_vcpu *vcpu, const char *msg) -{ - int r = __vm_ioctl(vcpu->vm, KVM_SET_BOOT_CPU_ID, - (void *)(unsigned long)vcpu->id); - - TEST_ASSERT(r == -1 && errno == EBUSY, "KVM_SET_BOOT_CPU_ID set %s", msg); -} - -static void run_vcpu(struct kvm_vcpu *vcpu) -{ - struct ucall uc; - int stage; - - for (stage = 0; stage < 2; stage++) { - - vcpu_run(vcpu); - - switch (get_ucall(vcpu, &uc)) { - case UCALL_SYNC: - TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") && - uc.args[1] == stage + 1, - "Stage %d: Unexpected register values vmexit, got %lx", - stage + 1, (ulong)uc.args[1]); - test_set_bsp_busy(vcpu, "while running vm"); - break; - case UCALL_DONE: - TEST_ASSERT(stage == 1, - "Expected GUEST_DONE in stage 2, got stage %d", - stage); - break; - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - default: - TEST_ASSERT(false, "Unexpected exit: %s", - exit_reason_str(vcpu->run->exit_reason)); - } - } -} - -static struct kvm_vm *create_vm(uint32_t nr_vcpus, uint32_t bsp_vcpu_id, - struct kvm_vcpu *vcpus[]) -{ - struct kvm_vm *vm; - uint32_t i; - - vm = vm_create(nr_vcpus); - - test_set_invalid_bsp(vm); - - vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)(unsigned long)bsp_vcpu_id); - - for (i = 0; i < nr_vcpus; i++) - vcpus[i] = vm_vcpu_add(vm, i, i == bsp_vcpu_id ? guest_bsp_vcpu : - guest_not_bsp_vcpu); - return vm; -} - -static void run_vm_bsp(uint32_t bsp_vcpu_id) -{ - struct kvm_vcpu *vcpus[2]; - struct kvm_vm *vm; - - vm = create_vm(ARRAY_SIZE(vcpus), bsp_vcpu_id, vcpus); - - run_vcpu(vcpus[0]); - run_vcpu(vcpus[1]); - - kvm_vm_free(vm); -} - -static void check_set_bsp_busy(void) -{ - struct kvm_vcpu *vcpus[2]; - struct kvm_vm *vm; - - vm = create_vm(ARRAY_SIZE(vcpus), 0, vcpus); - - test_set_bsp_busy(vcpus[1], "after adding vcpu"); - - run_vcpu(vcpus[0]); - run_vcpu(vcpus[1]); - - test_set_bsp_busy(vcpus[1], "to a terminated vcpu"); - - kvm_vm_free(vm); -} - -int main(int argc, char *argv[]) -{ - TEST_REQUIRE(kvm_has_cap(KVM_CAP_SET_BOOT_CPU_ID)); - - run_vm_bsp(0); - run_vm_bsp(1); - run_vm_bsp(0); - - check_set_bsp_busy(); -} diff --git a/tools/testing/selftests/kvm/x86_64/set_sregs_test.c b/tools/testing/selftests/kvm/x86_64/set_sregs_test.c deleted file mode 100644 index c021c0795a96..000000000000 --- a/tools/testing/selftests/kvm/x86_64/set_sregs_test.c +++ /dev/null @@ -1,141 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * KVM_SET_SREGS tests - * - * Copyright (C) 2018, Google LLC. - * - * This is a regression test for the bug fixed by the following commit: - * d3802286fa0f ("kvm: x86: Disallow illegal IA32_APIC_BASE MSR values") - * - * That bug allowed a user-mode program that called the KVM_SET_SREGS - * ioctl to put a VCPU's local APIC into an invalid state. - */ -#include -#include -#include -#include -#include - -#include "test_util.h" - -#include "kvm_util.h" -#include "processor.h" - -#define TEST_INVALID_CR_BIT(vcpu, cr, orig, bit) \ -do { \ - struct kvm_sregs new; \ - int rc; \ - \ - /* Skip the sub-test, the feature/bit is supported. */ \ - if (orig.cr & bit) \ - break; \ - \ - memcpy(&new, &orig, sizeof(sregs)); \ - new.cr |= bit; \ - \ - rc = _vcpu_sregs_set(vcpu, &new); \ - TEST_ASSERT(rc, "KVM allowed invalid " #cr " bit (0x%lx)", bit); \ - \ - /* Sanity check that KVM didn't change anything. */ \ - vcpu_sregs_get(vcpu, &new); \ - TEST_ASSERT(!memcmp(&new, &orig, sizeof(new)), "KVM modified sregs"); \ -} while (0) - -static uint64_t calc_supported_cr4_feature_bits(void) -{ - uint64_t cr4; - - cr4 = X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE | - X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE | X86_CR4_PGE | - X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT; - if (kvm_cpu_has(X86_FEATURE_UMIP)) - cr4 |= X86_CR4_UMIP; - if (kvm_cpu_has(X86_FEATURE_LA57)) - cr4 |= X86_CR4_LA57; - if (kvm_cpu_has(X86_FEATURE_VMX)) - cr4 |= X86_CR4_VMXE; - if (kvm_cpu_has(X86_FEATURE_SMX)) - cr4 |= X86_CR4_SMXE; - if (kvm_cpu_has(X86_FEATURE_FSGSBASE)) - cr4 |= X86_CR4_FSGSBASE; - if (kvm_cpu_has(X86_FEATURE_PCID)) - cr4 |= X86_CR4_PCIDE; - if (kvm_cpu_has(X86_FEATURE_XSAVE)) - cr4 |= X86_CR4_OSXSAVE; - if (kvm_cpu_has(X86_FEATURE_SMEP)) - cr4 |= X86_CR4_SMEP; - if (kvm_cpu_has(X86_FEATURE_SMAP)) - cr4 |= X86_CR4_SMAP; - if (kvm_cpu_has(X86_FEATURE_PKU)) - cr4 |= X86_CR4_PKE; - - return cr4; -} - -int main(int argc, char *argv[]) -{ - struct kvm_sregs sregs; - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - uint64_t cr4; - int rc, i; - - /* - * Create a dummy VM, specifically to avoid doing KVM_SET_CPUID2, and - * use it to verify all supported CR4 bits can be set prior to defining - * the vCPU model, i.e. without doing KVM_SET_CPUID2. - */ - vm = vm_create_barebones(); - vcpu = __vm_vcpu_add(vm, 0); - - vcpu_sregs_get(vcpu, &sregs); - - sregs.cr0 = 0; - sregs.cr4 |= calc_supported_cr4_feature_bits(); - cr4 = sregs.cr4; - - rc = _vcpu_sregs_set(vcpu, &sregs); - TEST_ASSERT(!rc, "Failed to set supported CR4 bits (0x%lx)", cr4); - - vcpu_sregs_get(vcpu, &sregs); - TEST_ASSERT(sregs.cr4 == cr4, "sregs.CR4 (0x%llx) != CR4 (0x%lx)", - sregs.cr4, cr4); - - /* Verify all unsupported features are rejected by KVM. */ - TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_UMIP); - TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_LA57); - TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_VMXE); - TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_SMXE); - TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_FSGSBASE); - TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_PCIDE); - TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_OSXSAVE); - TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_SMEP); - TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_SMAP); - TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_PKE); - - for (i = 32; i < 64; i++) - TEST_INVALID_CR_BIT(vcpu, cr0, sregs, BIT(i)); - - /* NW without CD is illegal, as is PG without PE. */ - TEST_INVALID_CR_BIT(vcpu, cr0, sregs, X86_CR0_NW); - TEST_INVALID_CR_BIT(vcpu, cr0, sregs, X86_CR0_PG); - - kvm_vm_free(vm); - - /* Create a "real" VM and verify APIC_BASE can be set. */ - vm = vm_create_with_one_vcpu(&vcpu, NULL); - - vcpu_sregs_get(vcpu, &sregs); - sregs.apic_base = 1 << 10; - rc = _vcpu_sregs_set(vcpu, &sregs); - TEST_ASSERT(rc, "Set IA32_APIC_BASE to %llx (invalid)", - sregs.apic_base); - sregs.apic_base = 1 << 11; - rc = _vcpu_sregs_set(vcpu, &sregs); - TEST_ASSERT(!rc, "Couldn't set IA32_APIC_BASE to %llx (valid)", - sregs.apic_base); - - kvm_vm_free(vm); - - return 0; -} diff --git a/tools/testing/selftests/kvm/x86_64/sev_init2_tests.c b/tools/testing/selftests/kvm/x86_64/sev_init2_tests.c deleted file mode 100644 index 3fb967f40c6a..000000000000 --- a/tools/testing/selftests/kvm/x86_64/sev_init2_tests.c +++ /dev/null @@ -1,152 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -#include -#include -#include -#include -#include -#include -#include - -#include "test_util.h" -#include "kvm_util.h" -#include "processor.h" -#include "svm_util.h" -#include "kselftest.h" - -#define SVM_SEV_FEAT_DEBUG_SWAP 32u - -/* - * Some features may have hidden dependencies, or may only work - * for certain VM types. Err on the side of safety and don't - * expect that all supported features can be passed one by one - * to KVM_SEV_INIT2. - * - * (Well, right now there's only one...) - */ -#define KNOWN_FEATURES SVM_SEV_FEAT_DEBUG_SWAP - -int kvm_fd; -u64 supported_vmsa_features; -bool have_sev_es; - -static int __sev_ioctl(int vm_fd, int cmd_id, void *data) -{ - struct kvm_sev_cmd cmd = { - .id = cmd_id, - .data = (uint64_t)data, - .sev_fd = open_sev_dev_path_or_exit(), - }; - int ret; - - ret = ioctl(vm_fd, KVM_MEMORY_ENCRYPT_OP, &cmd); - TEST_ASSERT(ret < 0 || cmd.error == SEV_RET_SUCCESS, - "%d failed: fw error: %d\n", - cmd_id, cmd.error); - - return ret; -} - -static void test_init2(unsigned long vm_type, struct kvm_sev_init *init) -{ - struct kvm_vm *vm; - int ret; - - vm = vm_create_barebones_type(vm_type); - ret = __sev_ioctl(vm->fd, KVM_SEV_INIT2, init); - TEST_ASSERT(ret == 0, - "KVM_SEV_INIT2 return code is %d (expected 0), errno: %d", - ret, errno); - kvm_vm_free(vm); -} - -static void test_init2_invalid(unsigned long vm_type, struct kvm_sev_init *init, const char *msg) -{ - struct kvm_vm *vm; - int ret; - - vm = vm_create_barebones_type(vm_type); - ret = __sev_ioctl(vm->fd, KVM_SEV_INIT2, init); - TEST_ASSERT(ret == -1 && errno == EINVAL, - "KVM_SEV_INIT2 should fail, %s.", - msg); - kvm_vm_free(vm); -} - -void test_vm_types(void) -{ - test_init2(KVM_X86_SEV_VM, &(struct kvm_sev_init){}); - - /* - * TODO: check that unsupported types cannot be created. Probably - * a separate selftest. - */ - if (have_sev_es) - test_init2(KVM_X86_SEV_ES_VM, &(struct kvm_sev_init){}); - - test_init2_invalid(0, &(struct kvm_sev_init){}, - "VM type is KVM_X86_DEFAULT_VM"); - if (kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM)) - test_init2_invalid(KVM_X86_SW_PROTECTED_VM, &(struct kvm_sev_init){}, - "VM type is KVM_X86_SW_PROTECTED_VM"); -} - -void test_flags(uint32_t vm_type) -{ - int i; - - for (i = 0; i < 32; i++) - test_init2_invalid(vm_type, - &(struct kvm_sev_init){ .flags = BIT(i) }, - "invalid flag"); -} - -void test_features(uint32_t vm_type, uint64_t supported_features) -{ - int i; - - for (i = 0; i < 64; i++) { - if (!(supported_features & BIT_ULL(i))) - test_init2_invalid(vm_type, - &(struct kvm_sev_init){ .vmsa_features = BIT_ULL(i) }, - "unknown feature"); - else if (KNOWN_FEATURES & BIT_ULL(i)) - test_init2(vm_type, - &(struct kvm_sev_init){ .vmsa_features = BIT_ULL(i) }); - } -} - -int main(int argc, char *argv[]) -{ - int kvm_fd = open_kvm_dev_path_or_exit(); - bool have_sev; - - TEST_REQUIRE(__kvm_has_device_attr(kvm_fd, KVM_X86_GRP_SEV, - KVM_X86_SEV_VMSA_FEATURES) == 0); - kvm_device_attr_get(kvm_fd, KVM_X86_GRP_SEV, - KVM_X86_SEV_VMSA_FEATURES, - &supported_vmsa_features); - - have_sev = kvm_cpu_has(X86_FEATURE_SEV); - TEST_ASSERT(have_sev == !!(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SEV_VM)), - "sev: KVM_CAP_VM_TYPES (%x) does not match cpuid (checking %x)", - kvm_check_cap(KVM_CAP_VM_TYPES), 1 << KVM_X86_SEV_VM); - - TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SEV_VM)); - have_sev_es = kvm_cpu_has(X86_FEATURE_SEV_ES); - - TEST_ASSERT(have_sev_es == !!(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SEV_ES_VM)), - "sev-es: KVM_CAP_VM_TYPES (%x) does not match cpuid (checking %x)", - kvm_check_cap(KVM_CAP_VM_TYPES), 1 << KVM_X86_SEV_ES_VM); - - test_vm_types(); - - test_flags(KVM_X86_SEV_VM); - if (have_sev_es) - test_flags(KVM_X86_SEV_ES_VM); - - test_features(KVM_X86_SEV_VM, 0); - if (have_sev_es) - test_features(KVM_X86_SEV_ES_VM, supported_vmsa_features); - - return 0; -} diff --git a/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c b/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c deleted file mode 100644 index 0a6dfba3905b..000000000000 --- a/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c +++ /dev/null @@ -1,397 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -#include -#include -#include -#include -#include -#include -#include - -#include "test_util.h" -#include "kvm_util.h" -#include "processor.h" -#include "sev.h" -#include "kselftest.h" - -#define NR_MIGRATE_TEST_VCPUS 4 -#define NR_MIGRATE_TEST_VMS 3 -#define NR_LOCK_TESTING_THREADS 3 -#define NR_LOCK_TESTING_ITERATIONS 10000 - -bool have_sev_es; - -static struct kvm_vm *sev_vm_create(bool es) -{ - struct kvm_vm *vm; - int i; - - vm = vm_create_barebones(); - if (!es) - sev_vm_init(vm); - else - sev_es_vm_init(vm); - - for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i) - __vm_vcpu_add(vm, i); - - sev_vm_launch(vm, es ? SEV_POLICY_ES : 0); - - if (es) - vm_sev_ioctl(vm, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL); - return vm; -} - -static struct kvm_vm *aux_vm_create(bool with_vcpus) -{ - struct kvm_vm *vm; - int i; - - vm = vm_create_barebones(); - if (!with_vcpus) - return vm; - - for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i) - __vm_vcpu_add(vm, i); - - return vm; -} - -static int __sev_migrate_from(struct kvm_vm *dst, struct kvm_vm *src) -{ - return __vm_enable_cap(dst, KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM, src->fd); -} - - -static void sev_migrate_from(struct kvm_vm *dst, struct kvm_vm *src) -{ - int ret; - - ret = __sev_migrate_from(dst, src); - TEST_ASSERT(!ret, "Migration failed, ret: %d, errno: %d", ret, errno); -} - -static void test_sev_migrate_from(bool es) -{ - struct kvm_vm *src_vm; - struct kvm_vm *dst_vms[NR_MIGRATE_TEST_VMS]; - int i, ret; - - src_vm = sev_vm_create(es); - for (i = 0; i < NR_MIGRATE_TEST_VMS; ++i) - dst_vms[i] = aux_vm_create(true); - - /* Initial migration from the src to the first dst. */ - sev_migrate_from(dst_vms[0], src_vm); - - for (i = 1; i < NR_MIGRATE_TEST_VMS; i++) - sev_migrate_from(dst_vms[i], dst_vms[i - 1]); - - /* Migrate the guest back to the original VM. */ - ret = __sev_migrate_from(src_vm, dst_vms[NR_MIGRATE_TEST_VMS - 1]); - TEST_ASSERT(ret == -1 && errno == EIO, - "VM that was migrated from should be dead. ret %d, errno: %d", ret, - errno); - - kvm_vm_free(src_vm); - for (i = 0; i < NR_MIGRATE_TEST_VMS; ++i) - kvm_vm_free(dst_vms[i]); -} - -struct locking_thread_input { - struct kvm_vm *vm; - struct kvm_vm *source_vms[NR_LOCK_TESTING_THREADS]; -}; - -static void *locking_test_thread(void *arg) -{ - int i, j; - struct locking_thread_input *input = (struct locking_thread_input *)arg; - - for (i = 0; i < NR_LOCK_TESTING_ITERATIONS; ++i) { - j = i % NR_LOCK_TESTING_THREADS; - __sev_migrate_from(input->vm, input->source_vms[j]); - } - - return NULL; -} - -static void test_sev_migrate_locking(void) -{ - struct locking_thread_input input[NR_LOCK_TESTING_THREADS]; - pthread_t pt[NR_LOCK_TESTING_THREADS]; - int i; - - for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i) { - input[i].vm = sev_vm_create(/* es= */ false); - input[0].source_vms[i] = input[i].vm; - } - for (i = 1; i < NR_LOCK_TESTING_THREADS; ++i) - memcpy(input[i].source_vms, input[0].source_vms, - sizeof(input[i].source_vms)); - - for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i) - pthread_create(&pt[i], NULL, locking_test_thread, &input[i]); - - for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i) - pthread_join(pt[i], NULL); - for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i) - kvm_vm_free(input[i].vm); -} - -static void test_sev_migrate_parameters(void) -{ - struct kvm_vm *sev_vm, *sev_es_vm, *vm_no_vcpu, *vm_no_sev, - *sev_es_vm_no_vmsa; - int ret; - - vm_no_vcpu = vm_create_barebones(); - vm_no_sev = aux_vm_create(true); - ret = __sev_migrate_from(vm_no_vcpu, vm_no_sev); - TEST_ASSERT(ret == -1 && errno == EINVAL, - "Migrations require SEV enabled. ret %d, errno: %d", ret, - errno); - - if (!have_sev_es) - goto out; - - sev_vm = sev_vm_create(/* es= */ false); - sev_es_vm = sev_vm_create(/* es= */ true); - sev_es_vm_no_vmsa = vm_create_barebones(); - sev_es_vm_init(sev_es_vm_no_vmsa); - __vm_vcpu_add(sev_es_vm_no_vmsa, 1); - - ret = __sev_migrate_from(sev_vm, sev_es_vm); - TEST_ASSERT( - ret == -1 && errno == EINVAL, - "Should not be able migrate to SEV enabled VM. ret: %d, errno: %d", - ret, errno); - - ret = __sev_migrate_from(sev_es_vm, sev_vm); - TEST_ASSERT( - ret == -1 && errno == EINVAL, - "Should not be able migrate to SEV-ES enabled VM. ret: %d, errno: %d", - ret, errno); - - ret = __sev_migrate_from(vm_no_vcpu, sev_es_vm); - TEST_ASSERT( - ret == -1 && errno == EINVAL, - "SEV-ES migrations require same number of vCPUS. ret: %d, errno: %d", - ret, errno); - - ret = __sev_migrate_from(vm_no_vcpu, sev_es_vm_no_vmsa); - TEST_ASSERT( - ret == -1 && errno == EINVAL, - "SEV-ES migrations require UPDATE_VMSA. ret %d, errno: %d", - ret, errno); - - kvm_vm_free(sev_vm); - kvm_vm_free(sev_es_vm); - kvm_vm_free(sev_es_vm_no_vmsa); -out: - kvm_vm_free(vm_no_vcpu); - kvm_vm_free(vm_no_sev); -} - -static int __sev_mirror_create(struct kvm_vm *dst, struct kvm_vm *src) -{ - return __vm_enable_cap(dst, KVM_CAP_VM_COPY_ENC_CONTEXT_FROM, src->fd); -} - - -static void sev_mirror_create(struct kvm_vm *dst, struct kvm_vm *src) -{ - int ret; - - ret = __sev_mirror_create(dst, src); - TEST_ASSERT(!ret, "Copying context failed, ret: %d, errno: %d", ret, errno); -} - -static void verify_mirror_allowed_cmds(struct kvm_vm *vm) -{ - struct kvm_sev_guest_status status; - int cmd_id; - - for (cmd_id = KVM_SEV_INIT; cmd_id < KVM_SEV_NR_MAX; ++cmd_id) { - int ret; - - /* - * These commands are allowed for mirror VMs, all others are - * not. - */ - switch (cmd_id) { - case KVM_SEV_LAUNCH_UPDATE_VMSA: - case KVM_SEV_GUEST_STATUS: - case KVM_SEV_DBG_DECRYPT: - case KVM_SEV_DBG_ENCRYPT: - continue; - default: - break; - } - - /* - * These commands should be disallowed before the data - * parameter is examined so NULL is OK here. - */ - ret = __vm_sev_ioctl(vm, cmd_id, NULL); - TEST_ASSERT( - ret == -1 && errno == EINVAL, - "Should not be able call command: %d. ret: %d, errno: %d", - cmd_id, ret, errno); - } - - vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &status); -} - -static void test_sev_mirror(bool es) -{ - struct kvm_vm *src_vm, *dst_vm; - int i; - - src_vm = sev_vm_create(es); - dst_vm = aux_vm_create(false); - - sev_mirror_create(dst_vm, src_vm); - - /* Check that we can complete creation of the mirror VM. */ - for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i) - __vm_vcpu_add(dst_vm, i); - - if (es) - vm_sev_ioctl(dst_vm, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL); - - verify_mirror_allowed_cmds(dst_vm); - - kvm_vm_free(src_vm); - kvm_vm_free(dst_vm); -} - -static void test_sev_mirror_parameters(void) -{ - struct kvm_vm *sev_vm, *sev_es_vm, *vm_no_vcpu, *vm_with_vcpu; - int ret; - - sev_vm = sev_vm_create(/* es= */ false); - vm_with_vcpu = aux_vm_create(true); - vm_no_vcpu = aux_vm_create(false); - - ret = __sev_mirror_create(sev_vm, sev_vm); - TEST_ASSERT( - ret == -1 && errno == EINVAL, - "Should not be able copy context to self. ret: %d, errno: %d", - ret, errno); - - ret = __sev_mirror_create(vm_no_vcpu, vm_with_vcpu); - TEST_ASSERT(ret == -1 && errno == EINVAL, - "Copy context requires SEV enabled. ret %d, errno: %d", ret, - errno); - - ret = __sev_mirror_create(vm_with_vcpu, sev_vm); - TEST_ASSERT( - ret == -1 && errno == EINVAL, - "SEV copy context requires no vCPUS on the destination. ret: %d, errno: %d", - ret, errno); - - if (!have_sev_es) - goto out; - - sev_es_vm = sev_vm_create(/* es= */ true); - ret = __sev_mirror_create(sev_vm, sev_es_vm); - TEST_ASSERT( - ret == -1 && errno == EINVAL, - "Should not be able copy context to SEV enabled VM. ret: %d, errno: %d", - ret, errno); - - ret = __sev_mirror_create(sev_es_vm, sev_vm); - TEST_ASSERT( - ret == -1 && errno == EINVAL, - "Should not be able copy context to SEV-ES enabled VM. ret: %d, errno: %d", - ret, errno); - - kvm_vm_free(sev_es_vm); - -out: - kvm_vm_free(sev_vm); - kvm_vm_free(vm_with_vcpu); - kvm_vm_free(vm_no_vcpu); -} - -static void test_sev_move_copy(void) -{ - struct kvm_vm *dst_vm, *dst2_vm, *dst3_vm, *sev_vm, *mirror_vm, - *dst_mirror_vm, *dst2_mirror_vm, *dst3_mirror_vm; - - sev_vm = sev_vm_create(/* es= */ false); - dst_vm = aux_vm_create(true); - dst2_vm = aux_vm_create(true); - dst3_vm = aux_vm_create(true); - mirror_vm = aux_vm_create(false); - dst_mirror_vm = aux_vm_create(false); - dst2_mirror_vm = aux_vm_create(false); - dst3_mirror_vm = aux_vm_create(false); - - sev_mirror_create(mirror_vm, sev_vm); - - sev_migrate_from(dst_mirror_vm, mirror_vm); - sev_migrate_from(dst_vm, sev_vm); - - sev_migrate_from(dst2_vm, dst_vm); - sev_migrate_from(dst2_mirror_vm, dst_mirror_vm); - - sev_migrate_from(dst3_mirror_vm, dst2_mirror_vm); - sev_migrate_from(dst3_vm, dst2_vm); - - kvm_vm_free(dst_vm); - kvm_vm_free(sev_vm); - kvm_vm_free(dst2_vm); - kvm_vm_free(dst3_vm); - kvm_vm_free(mirror_vm); - kvm_vm_free(dst_mirror_vm); - kvm_vm_free(dst2_mirror_vm); - kvm_vm_free(dst3_mirror_vm); - - /* - * Run similar test be destroy mirrors before mirrored VMs to ensure - * destruction is done safely. - */ - sev_vm = sev_vm_create(/* es= */ false); - dst_vm = aux_vm_create(true); - mirror_vm = aux_vm_create(false); - dst_mirror_vm = aux_vm_create(false); - - sev_mirror_create(mirror_vm, sev_vm); - - sev_migrate_from(dst_mirror_vm, mirror_vm); - sev_migrate_from(dst_vm, sev_vm); - - kvm_vm_free(mirror_vm); - kvm_vm_free(dst_mirror_vm); - kvm_vm_free(dst_vm); - kvm_vm_free(sev_vm); -} - -int main(int argc, char *argv[]) -{ - TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM)); - TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM)); - - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SEV)); - - have_sev_es = kvm_cpu_has(X86_FEATURE_SEV_ES); - - if (kvm_has_cap(KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM)) { - test_sev_migrate_from(/* es= */ false); - if (have_sev_es) - test_sev_migrate_from(/* es= */ true); - test_sev_migrate_locking(); - test_sev_migrate_parameters(); - if (kvm_has_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM)) - test_sev_move_copy(); - } - if (kvm_has_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM)) { - test_sev_mirror(/* es= */ false); - if (have_sev_es) - test_sev_mirror(/* es= */ true); - test_sev_mirror_parameters(); - } - return 0; -} diff --git a/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c b/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c deleted file mode 100644 index ae77698e6e97..000000000000 --- a/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c +++ /dev/null @@ -1,205 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -#include -#include -#include -#include -#include -#include - -#include "test_util.h" -#include "kvm_util.h" -#include "processor.h" -#include "svm_util.h" -#include "linux/psp-sev.h" -#include "sev.h" - - -#define XFEATURE_MASK_X87_AVX (XFEATURE_MASK_FP | XFEATURE_MASK_SSE | XFEATURE_MASK_YMM) - -static void guest_sev_es_code(void) -{ - /* TODO: Check CPUID after GHCB-based hypercall support is added. */ - GUEST_ASSERT(rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ENABLED); - GUEST_ASSERT(rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ES_ENABLED); - - /* - * TODO: Add GHCB and ucall support for SEV-ES guests. For now, simply - * force "termination" to signal "done" via the GHCB MSR protocol. - */ - wrmsr(MSR_AMD64_SEV_ES_GHCB, GHCB_MSR_TERM_REQ); - __asm__ __volatile__("rep; vmmcall"); -} - -static void guest_sev_code(void) -{ - GUEST_ASSERT(this_cpu_has(X86_FEATURE_SEV)); - GUEST_ASSERT(rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ENABLED); - - GUEST_DONE(); -} - -/* Stash state passed via VMSA before any compiled code runs. */ -extern void guest_code_xsave(void); -asm("guest_code_xsave:\n" - "mov $" __stringify(XFEATURE_MASK_X87_AVX) ", %eax\n" - "xor %edx, %edx\n" - "xsave (%rdi)\n" - "jmp guest_sev_es_code"); - -static void compare_xsave(u8 *from_host, u8 *from_guest) -{ - int i; - bool bad = false; - for (i = 0; i < 4095; i++) { - if (from_host[i] != from_guest[i]) { - printf("mismatch at %02hhx | %02hhx %02hhx\n", i, from_host[i], from_guest[i]); - bad = true; - } - } - - if (bad) - abort(); -} - -static void test_sync_vmsa(uint32_t policy) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - vm_vaddr_t gva; - void *hva; - - double x87val = M_PI; - struct kvm_xsave __attribute__((aligned(64))) xsave = { 0 }; - - vm = vm_sev_create_with_one_vcpu(KVM_X86_SEV_ES_VM, guest_code_xsave, &vcpu); - gva = vm_vaddr_alloc_shared(vm, PAGE_SIZE, KVM_UTIL_MIN_VADDR, - MEM_REGION_TEST_DATA); - hva = addr_gva2hva(vm, gva); - - vcpu_args_set(vcpu, 1, gva); - - asm("fninit\n" - "vpcmpeqb %%ymm4, %%ymm4, %%ymm4\n" - "fldl %3\n" - "xsave (%2)\n" - "fstp %%st\n" - : "=m"(xsave) - : "A"(XFEATURE_MASK_X87_AVX), "r"(&xsave), "m" (x87val) - : "ymm4", "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)"); - vcpu_xsave_set(vcpu, &xsave); - - vm_sev_launch(vm, SEV_POLICY_ES | policy, NULL); - - /* This page is shared, so make it decrypted. */ - memset(hva, 0, 4096); - - vcpu_run(vcpu); - - TEST_ASSERT(vcpu->run->exit_reason == KVM_EXIT_SYSTEM_EVENT, - "Wanted SYSTEM_EVENT, got %s", - exit_reason_str(vcpu->run->exit_reason)); - TEST_ASSERT_EQ(vcpu->run->system_event.type, KVM_SYSTEM_EVENT_SEV_TERM); - TEST_ASSERT_EQ(vcpu->run->system_event.ndata, 1); - TEST_ASSERT_EQ(vcpu->run->system_event.data[0], GHCB_MSR_TERM_REQ); - - compare_xsave((u8 *)&xsave, (u8 *)hva); - - kvm_vm_free(vm); -} - -static void test_sev(void *guest_code, uint64_t policy) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - struct ucall uc; - - uint32_t type = policy & SEV_POLICY_ES ? KVM_X86_SEV_ES_VM : KVM_X86_SEV_VM; - - vm = vm_sev_create_with_one_vcpu(type, guest_code, &vcpu); - - /* TODO: Validate the measurement is as expected. */ - vm_sev_launch(vm, policy, NULL); - - for (;;) { - vcpu_run(vcpu); - - if (policy & SEV_POLICY_ES) { - TEST_ASSERT(vcpu->run->exit_reason == KVM_EXIT_SYSTEM_EVENT, - "Wanted SYSTEM_EVENT, got %s", - exit_reason_str(vcpu->run->exit_reason)); - TEST_ASSERT_EQ(vcpu->run->system_event.type, KVM_SYSTEM_EVENT_SEV_TERM); - TEST_ASSERT_EQ(vcpu->run->system_event.ndata, 1); - TEST_ASSERT_EQ(vcpu->run->system_event.data[0], GHCB_MSR_TERM_REQ); - break; - } - - switch (get_ucall(vcpu, &uc)) { - case UCALL_SYNC: - continue; - case UCALL_DONE: - return; - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - default: - TEST_FAIL("Unexpected exit: %s", - exit_reason_str(vcpu->run->exit_reason)); - } - } - - kvm_vm_free(vm); -} - -static void guest_shutdown_code(void) -{ - struct desc_ptr idt; - - /* Clobber the IDT so that #UD is guaranteed to trigger SHUTDOWN. */ - memset(&idt, 0, sizeof(idt)); - __asm__ __volatile__("lidt %0" :: "m"(idt)); - - __asm__ __volatile__("ud2"); -} - -static void test_sev_es_shutdown(void) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - - uint32_t type = KVM_X86_SEV_ES_VM; - - vm = vm_sev_create_with_one_vcpu(type, guest_shutdown_code, &vcpu); - - vm_sev_launch(vm, SEV_POLICY_ES, NULL); - - vcpu_run(vcpu); - TEST_ASSERT(vcpu->run->exit_reason == KVM_EXIT_SHUTDOWN, - "Wanted SHUTDOWN, got %s", - exit_reason_str(vcpu->run->exit_reason)); - - kvm_vm_free(vm); -} - -int main(int argc, char *argv[]) -{ - const u64 xf_mask = XFEATURE_MASK_X87_AVX; - - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SEV)); - - test_sev(guest_sev_code, SEV_POLICY_NO_DBG); - test_sev(guest_sev_code, 0); - - if (kvm_cpu_has(X86_FEATURE_SEV_ES)) { - test_sev(guest_sev_es_code, SEV_POLICY_ES | SEV_POLICY_NO_DBG); - test_sev(guest_sev_es_code, SEV_POLICY_ES); - - test_sev_es_shutdown(); - - if (kvm_has_cap(KVM_CAP_XCRS) && - (xgetbv(0) & kvm_cpu_supported_xcr0() & xf_mask) == xf_mask) { - test_sync_vmsa(0); - test_sync_vmsa(SEV_POLICY_NO_DBG); - } - } - - return 0; -} diff --git a/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c b/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c deleted file mode 100644 index fabeeaddfb3a..000000000000 --- a/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c +++ /dev/null @@ -1,105 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2020, Google LLC. - * - * Test that KVM emulates instructions in response to EPT violations when - * allow_smaller_maxphyaddr is enabled and guest.MAXPHYADDR < host.MAXPHYADDR. - */ -#include "flds_emulation.h" - -#include "test_util.h" -#include "kvm_util.h" -#include "vmx.h" - -#define MAXPHYADDR 36 - -#define MEM_REGION_GVA 0x0000123456789000 -#define MEM_REGION_GPA 0x0000000700000000 -#define MEM_REGION_SLOT 10 -#define MEM_REGION_SIZE PAGE_SIZE - -static void guest_code(bool tdp_enabled) -{ - uint64_t error_code; - uint64_t vector; - - vector = kvm_asm_safe_ec(FLDS_MEM_EAX, error_code, "a"(MEM_REGION_GVA)); - - /* - * When TDP is enabled, flds will trigger an emulation failure, exit to - * userspace, and then the selftest host "VMM" skips the instruction. - * - * When TDP is disabled, no instruction emulation is required so flds - * should generate #PF(RSVD). - */ - if (tdp_enabled) { - GUEST_ASSERT(!vector); - } else { - GUEST_ASSERT_EQ(vector, PF_VECTOR); - GUEST_ASSERT(error_code & PFERR_RSVD_MASK); - } - - GUEST_DONE(); -} - -int main(int argc, char *argv[]) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - struct ucall uc; - uint64_t *pte; - uint64_t *hva; - uint64_t gpa; - int rc; - - TEST_REQUIRE(kvm_has_cap(KVM_CAP_SMALLER_MAXPHYADDR)); - - vm = vm_create_with_one_vcpu(&vcpu, guest_code); - vcpu_args_set(vcpu, 1, kvm_is_tdp_enabled()); - - vcpu_set_cpuid_property(vcpu, X86_PROPERTY_MAX_PHY_ADDR, MAXPHYADDR); - - rc = kvm_check_cap(KVM_CAP_EXIT_ON_EMULATION_FAILURE); - TEST_ASSERT(rc, "KVM_CAP_EXIT_ON_EMULATION_FAILURE is unavailable"); - vm_enable_cap(vm, KVM_CAP_EXIT_ON_EMULATION_FAILURE, 1); - - vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, - MEM_REGION_GPA, MEM_REGION_SLOT, - MEM_REGION_SIZE / PAGE_SIZE, 0); - gpa = vm_phy_pages_alloc(vm, MEM_REGION_SIZE / PAGE_SIZE, - MEM_REGION_GPA, MEM_REGION_SLOT); - TEST_ASSERT(gpa == MEM_REGION_GPA, "Failed vm_phy_pages_alloc"); - virt_map(vm, MEM_REGION_GVA, MEM_REGION_GPA, 1); - hva = addr_gpa2hva(vm, MEM_REGION_GPA); - memset(hva, 0, PAGE_SIZE); - - pte = vm_get_page_table_entry(vm, MEM_REGION_GVA); - *pte |= BIT_ULL(MAXPHYADDR); - - vcpu_run(vcpu); - - /* - * When TDP is enabled, KVM must emulate in response the guest physical - * address that is illegal from the guest's perspective, but is legal - * from hardware's perspeective. This should result in an emulation - * failure exit to userspace since KVM doesn't support emulating flds. - */ - if (kvm_is_tdp_enabled()) { - handle_flds_emulation_failure_exit(vcpu); - vcpu_run(vcpu); - } - - switch (get_ucall(vcpu, &uc)) { - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - break; - case UCALL_DONE: - break; - default: - TEST_FAIL("Unrecognized ucall: %lu", uc.cmd); - } - - kvm_vm_free(vm); - - return 0; -} diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c deleted file mode 100644 index 55c88d664a94..000000000000 --- a/tools/testing/selftests/kvm/x86_64/smm_test.c +++ /dev/null @@ -1,209 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2018, Red Hat, Inc. - * - * Tests for SMM. - */ -#include -#include -#include -#include -#include -#include - -#include "test_util.h" - -#include "kvm_util.h" - -#include "vmx.h" -#include "svm_util.h" - -#define SMRAM_SIZE 65536 -#define SMRAM_MEMSLOT ((1 << 16) | 1) -#define SMRAM_PAGES (SMRAM_SIZE / PAGE_SIZE) -#define SMRAM_GPA 0x1000000 -#define SMRAM_STAGE 0xfe - -#define STR(x) #x -#define XSTR(s) STR(s) - -#define SYNC_PORT 0xe -#define DONE 0xff - -/* - * This is compiled as normal 64-bit code, however, SMI handler is executed - * in real-address mode. To stay simple we're limiting ourselves to a mode - * independent subset of asm here. - * SMI handler always report back fixed stage SMRAM_STAGE. - */ -uint8_t smi_handler[] = { - 0xb0, SMRAM_STAGE, /* mov $SMRAM_STAGE, %al */ - 0xe4, SYNC_PORT, /* in $SYNC_PORT, %al */ - 0x0f, 0xaa, /* rsm */ -}; - -static inline void sync_with_host(uint64_t phase) -{ - asm volatile("in $" XSTR(SYNC_PORT)", %%al \n" - : "+a" (phase)); -} - -static void self_smi(void) -{ - x2apic_write_reg(APIC_ICR, - APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_SMI); -} - -static void l2_guest_code(void) -{ - sync_with_host(8); - - sync_with_host(10); - - vmcall(); -} - -static void guest_code(void *arg) -{ - #define L2_GUEST_STACK_SIZE 64 - unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; - uint64_t apicbase = rdmsr(MSR_IA32_APICBASE); - struct svm_test_data *svm = arg; - struct vmx_pages *vmx_pages = arg; - - sync_with_host(1); - - wrmsr(MSR_IA32_APICBASE, apicbase | X2APIC_ENABLE); - - sync_with_host(2); - - self_smi(); - - sync_with_host(4); - - if (arg) { - if (this_cpu_has(X86_FEATURE_SVM)) { - generic_svm_setup(svm, l2_guest_code, - &l2_guest_stack[L2_GUEST_STACK_SIZE]); - } else { - GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); - GUEST_ASSERT(load_vmcs(vmx_pages)); - prepare_vmcs(vmx_pages, l2_guest_code, - &l2_guest_stack[L2_GUEST_STACK_SIZE]); - } - - sync_with_host(5); - - self_smi(); - - sync_with_host(7); - - if (this_cpu_has(X86_FEATURE_SVM)) { - run_guest(svm->vmcb, svm->vmcb_gpa); - run_guest(svm->vmcb, svm->vmcb_gpa); - } else { - vmlaunch(); - vmresume(); - } - - /* Stages 8-11 are eaten by SMM (SMRAM_STAGE reported instead) */ - sync_with_host(12); - } - - sync_with_host(DONE); -} - -void inject_smi(struct kvm_vcpu *vcpu) -{ - struct kvm_vcpu_events events; - - vcpu_events_get(vcpu, &events); - - events.smi.pending = 1; - events.flags |= KVM_VCPUEVENT_VALID_SMM; - - vcpu_events_set(vcpu, &events); -} - -int main(int argc, char *argv[]) -{ - vm_vaddr_t nested_gva = 0; - - struct kvm_vcpu *vcpu; - struct kvm_regs regs; - struct kvm_vm *vm; - struct kvm_x86_state *state; - int stage, stage_reported; - - TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_SMM)); - - /* Create VM */ - vm = vm_create_with_one_vcpu(&vcpu, guest_code); - - vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, SMRAM_GPA, - SMRAM_MEMSLOT, SMRAM_PAGES, 0); - TEST_ASSERT(vm_phy_pages_alloc(vm, SMRAM_PAGES, SMRAM_GPA, SMRAM_MEMSLOT) - == SMRAM_GPA, "could not allocate guest physical addresses?"); - - memset(addr_gpa2hva(vm, SMRAM_GPA), 0x0, SMRAM_SIZE); - memcpy(addr_gpa2hva(vm, SMRAM_GPA) + 0x8000, smi_handler, - sizeof(smi_handler)); - - vcpu_set_msr(vcpu, MSR_IA32_SMBASE, SMRAM_GPA); - - if (kvm_has_cap(KVM_CAP_NESTED_STATE)) { - if (kvm_cpu_has(X86_FEATURE_SVM)) - vcpu_alloc_svm(vm, &nested_gva); - else if (kvm_cpu_has(X86_FEATURE_VMX)) - vcpu_alloc_vmx(vm, &nested_gva); - } - - if (!nested_gva) - pr_info("will skip SMM test with VMX enabled\n"); - - vcpu_args_set(vcpu, 1, nested_gva); - - for (stage = 1;; stage++) { - vcpu_run(vcpu); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); - - memset(®s, 0, sizeof(regs)); - vcpu_regs_get(vcpu, ®s); - - stage_reported = regs.rax & 0xff; - - if (stage_reported == DONE) - goto done; - - TEST_ASSERT(stage_reported == stage || - stage_reported == SMRAM_STAGE, - "Unexpected stage: #%x, got %x", - stage, stage_reported); - - /* - * Enter SMM during L2 execution and check that we correctly - * return from it. Do not perform save/restore while in SMM yet. - */ - if (stage == 8) { - inject_smi(vcpu); - continue; - } - - /* - * Perform save/restore while the guest is in SMM triggered - * during L2 execution. - */ - if (stage == 10) - inject_smi(vcpu); - - state = vcpu_save_state(vcpu); - kvm_vm_release(vm); - - vcpu = vm_recreate_with_one_vcpu(vm); - vcpu_load_state(vcpu, state); - kvm_x86_state_cleanup(state); - } - -done: - kvm_vm_free(vm); -} diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c deleted file mode 100644 index 141b7fc0c965..000000000000 --- a/tools/testing/selftests/kvm/x86_64/state_test.c +++ /dev/null @@ -1,323 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * KVM_GET/SET_* tests - * - * Copyright (C) 2018, Red Hat, Inc. - * - * Tests for vCPU state save/restore, including nested guest state. - */ -#include -#include -#include -#include -#include - -#include "test_util.h" - -#include "kvm_util.h" -#include "processor.h" -#include "vmx.h" -#include "svm_util.h" - -#define L2_GUEST_STACK_SIZE 256 - -void svm_l2_guest_code(void) -{ - GUEST_SYNC(4); - /* Exit to L1 */ - vmcall(); - GUEST_SYNC(6); - /* Done, exit to L1 and never come back. */ - vmcall(); -} - -static void svm_l1_guest_code(struct svm_test_data *svm) -{ - unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; - struct vmcb *vmcb = svm->vmcb; - - GUEST_ASSERT(svm->vmcb_gpa); - /* Prepare for L2 execution. */ - generic_svm_setup(svm, svm_l2_guest_code, - &l2_guest_stack[L2_GUEST_STACK_SIZE]); - - GUEST_SYNC(3); - run_guest(vmcb, svm->vmcb_gpa); - GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL); - GUEST_SYNC(5); - vmcb->save.rip += 3; - run_guest(vmcb, svm->vmcb_gpa); - GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL); - GUEST_SYNC(7); -} - -void vmx_l2_guest_code(void) -{ - GUEST_SYNC(6); - - /* Exit to L1 */ - vmcall(); - - /* L1 has now set up a shadow VMCS for us. */ - GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee); - GUEST_SYNC(10); - GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee); - GUEST_ASSERT(!vmwrite(GUEST_RIP, 0xc0fffee)); - GUEST_SYNC(11); - GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0fffee); - GUEST_ASSERT(!vmwrite(GUEST_RIP, 0xc0ffffee)); - GUEST_SYNC(12); - - /* Done, exit to L1 and never come back. */ - vmcall(); -} - -static void vmx_l1_guest_code(struct vmx_pages *vmx_pages) -{ - unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; - - GUEST_ASSERT(vmx_pages->vmcs_gpa); - GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); - GUEST_SYNC(3); - GUEST_ASSERT(load_vmcs(vmx_pages)); - GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa); - - GUEST_SYNC(4); - GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa); - - prepare_vmcs(vmx_pages, vmx_l2_guest_code, - &l2_guest_stack[L2_GUEST_STACK_SIZE]); - - GUEST_SYNC(5); - GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa); - GUEST_ASSERT(!vmlaunch()); - GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa); - GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); - - /* Check that the launched state is preserved. */ - GUEST_ASSERT(vmlaunch()); - - GUEST_ASSERT(!vmresume()); - GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); - - GUEST_SYNC(7); - GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); - - GUEST_ASSERT(!vmresume()); - GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); - - vmwrite(GUEST_RIP, vmreadz(GUEST_RIP) + 3); - - vmwrite(SECONDARY_VM_EXEC_CONTROL, SECONDARY_EXEC_SHADOW_VMCS); - vmwrite(VMCS_LINK_POINTER, vmx_pages->shadow_vmcs_gpa); - - GUEST_ASSERT(!vmptrld(vmx_pages->shadow_vmcs_gpa)); - GUEST_ASSERT(vmlaunch()); - GUEST_SYNC(8); - GUEST_ASSERT(vmlaunch()); - GUEST_ASSERT(vmresume()); - - vmwrite(GUEST_RIP, 0xc0ffee); - GUEST_SYNC(9); - GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee); - - GUEST_ASSERT(!vmptrld(vmx_pages->vmcs_gpa)); - GUEST_ASSERT(!vmresume()); - GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); - - GUEST_ASSERT(!vmptrld(vmx_pages->shadow_vmcs_gpa)); - GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffffee); - GUEST_ASSERT(vmlaunch()); - GUEST_ASSERT(vmresume()); - GUEST_SYNC(13); - GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffffee); - GUEST_ASSERT(vmlaunch()); - GUEST_ASSERT(vmresume()); -} - -static void __attribute__((__flatten__)) guest_code(void *arg) -{ - GUEST_SYNC(1); - - if (this_cpu_has(X86_FEATURE_XSAVE)) { - uint64_t supported_xcr0 = this_cpu_supported_xcr0(); - uint8_t buffer[4096]; - - memset(buffer, 0xcc, sizeof(buffer)); - - /* - * Modify state for all supported xfeatures to take them out of - * their "init" state, i.e. to make them show up in XSTATE_BV. - * - * Note off-by-default features, e.g. AMX, are out of scope for - * this particular testcase as they have a different ABI. - */ - GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_FP); - asm volatile ("fincstp"); - - GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_SSE); - asm volatile ("vmovdqu %0, %%xmm0" :: "m" (buffer)); - - if (supported_xcr0 & XFEATURE_MASK_YMM) - asm volatile ("vmovdqu %0, %%ymm0" :: "m" (buffer)); - - if (supported_xcr0 & XFEATURE_MASK_AVX512) { - asm volatile ("kmovq %0, %%k1" :: "r" (-1ull)); - asm volatile ("vmovupd %0, %%zmm0" :: "m" (buffer)); - asm volatile ("vmovupd %0, %%zmm16" :: "m" (buffer)); - } - - if (this_cpu_has(X86_FEATURE_MPX)) { - uint64_t bounds[2] = { 10, 0xffffffffull }; - uint64_t output[2] = { }; - - GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDREGS); - GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDCSR); - - /* - * Don't bother trying to get BNDCSR into the INUSE - * state. MSR_IA32_BNDCFGS doesn't count as it isn't - * managed via XSAVE/XRSTOR, and BNDCFGU can only be - * modified by XRSTOR. Stuffing XSTATE_BV in the host - * is simpler than doing XRSTOR here in the guest. - * - * However, temporarily enable MPX in BNDCFGS so that - * BNDMOV actually loads BND1. If MPX isn't *fully* - * enabled, all MPX instructions are treated as NOPs. - * - * Hand encode "bndmov (%rax),%bnd1" as support for MPX - * mnemonics/registers has been removed from gcc and - * clang (and was never fully supported by clang). - */ - wrmsr(MSR_IA32_BNDCFGS, BIT_ULL(0)); - asm volatile (".byte 0x66,0x0f,0x1a,0x08" :: "a" (bounds)); - /* - * Hand encode "bndmov %bnd1, (%rax)" to sanity check - * that BND1 actually got loaded. - */ - asm volatile (".byte 0x66,0x0f,0x1b,0x08" :: "a" (output)); - wrmsr(MSR_IA32_BNDCFGS, 0); - - GUEST_ASSERT_EQ(bounds[0], output[0]); - GUEST_ASSERT_EQ(bounds[1], output[1]); - } - if (this_cpu_has(X86_FEATURE_PKU)) { - GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_PKRU); - set_cr4(get_cr4() | X86_CR4_PKE); - GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSPKE)); - - wrpkru(-1u); - } - } - - GUEST_SYNC(2); - - if (arg) { - if (this_cpu_has(X86_FEATURE_SVM)) - svm_l1_guest_code(arg); - else - vmx_l1_guest_code(arg); - } - - GUEST_DONE(); -} - -int main(int argc, char *argv[]) -{ - uint64_t *xstate_bv, saved_xstate_bv; - vm_vaddr_t nested_gva = 0; - struct kvm_cpuid2 empty_cpuid = {}; - struct kvm_regs regs1, regs2; - struct kvm_vcpu *vcpu, *vcpuN; - struct kvm_vm *vm; - struct kvm_x86_state *state; - struct ucall uc; - int stage; - - /* Create VM */ - vm = vm_create_with_one_vcpu(&vcpu, guest_code); - - vcpu_regs_get(vcpu, ®s1); - - if (kvm_has_cap(KVM_CAP_NESTED_STATE)) { - if (kvm_cpu_has(X86_FEATURE_SVM)) - vcpu_alloc_svm(vm, &nested_gva); - else if (kvm_cpu_has(X86_FEATURE_VMX)) - vcpu_alloc_vmx(vm, &nested_gva); - } - - if (!nested_gva) - pr_info("will skip nested state checks\n"); - - vcpu_args_set(vcpu, 1, nested_gva); - - for (stage = 1;; stage++) { - vcpu_run(vcpu); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); - - switch (get_ucall(vcpu, &uc)) { - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - /* NOT REACHED */ - case UCALL_SYNC: - break; - case UCALL_DONE: - goto done; - default: - TEST_FAIL("Unknown ucall %lu", uc.cmd); - } - - /* UCALL_SYNC is handled here. */ - TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") && - uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx", - stage, (ulong)uc.args[1]); - - state = vcpu_save_state(vcpu); - memset(®s1, 0, sizeof(regs1)); - vcpu_regs_get(vcpu, ®s1); - - kvm_vm_release(vm); - - /* Restore state in a new VM. */ - vcpu = vm_recreate_with_one_vcpu(vm); - vcpu_load_state(vcpu, state); - - /* - * Restore XSAVE state in a dummy vCPU, first without doing - * KVM_SET_CPUID2, and then with an empty guest CPUID. Except - * for off-by-default xfeatures, e.g. AMX, KVM is supposed to - * allow KVM_SET_XSAVE regardless of guest CPUID. Manually - * load only XSAVE state, MSRs in particular have a much more - * convoluted ABI. - * - * Load two versions of XSAVE state: one with the actual guest - * XSAVE state, and one with all supported features forced "on" - * in xstate_bv, e.g. to ensure that KVM allows loading all - * supported features, even if something goes awry in saving - * the original snapshot. - */ - xstate_bv = (void *)&((uint8_t *)state->xsave->region)[512]; - saved_xstate_bv = *xstate_bv; - - vcpuN = __vm_vcpu_add(vm, vcpu->id + 1); - vcpu_xsave_set(vcpuN, state->xsave); - *xstate_bv = kvm_cpu_supported_xcr0(); - vcpu_xsave_set(vcpuN, state->xsave); - - vcpu_init_cpuid(vcpuN, &empty_cpuid); - vcpu_xsave_set(vcpuN, state->xsave); - *xstate_bv = saved_xstate_bv; - vcpu_xsave_set(vcpuN, state->xsave); - - kvm_x86_state_cleanup(state); - - memset(®s2, 0, sizeof(regs2)); - vcpu_regs_get(vcpu, ®s2); - TEST_ASSERT(!memcmp(®s1, ®s2, sizeof(regs2)), - "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx", - (ulong) regs2.rdi, (ulong) regs2.rsi); - } - -done: - kvm_vm_free(vm); -} diff --git a/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c b/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c deleted file mode 100644 index 916e04248fbb..000000000000 --- a/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c +++ /dev/null @@ -1,118 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * svm_int_ctl_test - * - * Copyright (C) 2021, Red Hat, Inc. - * - * Nested SVM testing: test simultaneous use of V_IRQ from L1 and L0. - */ - -#include "test_util.h" -#include "kvm_util.h" -#include "processor.h" -#include "svm_util.h" -#include "apic.h" - -bool vintr_irq_called; -bool intr_irq_called; - -#define VINTR_IRQ_NUMBER 0x20 -#define INTR_IRQ_NUMBER 0x30 - -static void vintr_irq_handler(struct ex_regs *regs) -{ - vintr_irq_called = true; -} - -static void intr_irq_handler(struct ex_regs *regs) -{ - x2apic_write_reg(APIC_EOI, 0x00); - intr_irq_called = true; -} - -static void l2_guest_code(struct svm_test_data *svm) -{ - /* This code raises interrupt INTR_IRQ_NUMBER in the L1's LAPIC, - * and since L1 didn't enable virtual interrupt masking, - * L2 should receive it and not L1. - * - * L2 also has virtual interrupt 'VINTR_IRQ_NUMBER' pending in V_IRQ - * so it should also receive it after the following 'sti'. - */ - x2apic_write_reg(APIC_ICR, - APIC_DEST_SELF | APIC_INT_ASSERT | INTR_IRQ_NUMBER); - - __asm__ __volatile__( - "sti\n" - "nop\n" - ); - - GUEST_ASSERT(vintr_irq_called); - GUEST_ASSERT(intr_irq_called); - - __asm__ __volatile__( - "vmcall\n" - ); -} - -static void l1_guest_code(struct svm_test_data *svm) -{ - #define L2_GUEST_STACK_SIZE 64 - unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; - struct vmcb *vmcb = svm->vmcb; - - x2apic_enable(); - - /* Prepare for L2 execution. */ - generic_svm_setup(svm, l2_guest_code, - &l2_guest_stack[L2_GUEST_STACK_SIZE]); - - /* No virtual interrupt masking */ - vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK; - - /* No intercepts for real and virtual interrupts */ - vmcb->control.intercept &= ~(BIT(INTERCEPT_INTR) | BIT(INTERCEPT_VINTR)); - - /* Make a virtual interrupt VINTR_IRQ_NUMBER pending */ - vmcb->control.int_ctl |= V_IRQ_MASK | (0x1 << V_INTR_PRIO_SHIFT); - vmcb->control.int_vector = VINTR_IRQ_NUMBER; - - run_guest(vmcb, svm->vmcb_gpa); - GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL); - GUEST_DONE(); -} - -int main(int argc, char *argv[]) -{ - struct kvm_vcpu *vcpu; - vm_vaddr_t svm_gva; - struct kvm_vm *vm; - struct ucall uc; - - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM)); - - vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); - - vm_install_exception_handler(vm, VINTR_IRQ_NUMBER, vintr_irq_handler); - vm_install_exception_handler(vm, INTR_IRQ_NUMBER, intr_irq_handler); - - vcpu_alloc_svm(vm, &svm_gva); - vcpu_args_set(vcpu, 1, svm_gva); - - vcpu_run(vcpu); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); - - switch (get_ucall(vcpu, &uc)) { - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - break; - /* NOT REACHED */ - case UCALL_DONE: - goto done; - default: - TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd); - } -done: - kvm_vm_free(vm); - return 0; -} diff --git a/tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c b/tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c deleted file mode 100644 index 00135cbba35e..000000000000 --- a/tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c +++ /dev/null @@ -1,59 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * svm_nested_shutdown_test - * - * Copyright (C) 2022, Red Hat, Inc. - * - * Nested SVM testing: test that unintercepted shutdown in L2 doesn't crash the host - */ - -#include "test_util.h" -#include "kvm_util.h" -#include "processor.h" -#include "svm_util.h" - -static void l2_guest_code(struct svm_test_data *svm) -{ - __asm__ __volatile__("ud2"); -} - -static void l1_guest_code(struct svm_test_data *svm, struct idt_entry *idt) -{ - #define L2_GUEST_STACK_SIZE 64 - unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; - struct vmcb *vmcb = svm->vmcb; - - generic_svm_setup(svm, l2_guest_code, - &l2_guest_stack[L2_GUEST_STACK_SIZE]); - - vmcb->control.intercept &= ~(BIT(INTERCEPT_SHUTDOWN)); - - idt[6].p = 0; // #UD is intercepted but its injection will cause #NP - idt[11].p = 0; // #NP is not intercepted and will cause another - // #NP that will be converted to #DF - idt[8].p = 0; // #DF will cause #NP which will cause SHUTDOWN - - run_guest(vmcb, svm->vmcb_gpa); - - /* should not reach here */ - GUEST_ASSERT(0); -} - -int main(int argc, char *argv[]) -{ - struct kvm_vcpu *vcpu; - vm_vaddr_t svm_gva; - struct kvm_vm *vm; - - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM)); - - vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); - vcpu_alloc_svm(vm, &svm_gva); - - vcpu_args_set(vcpu, 2, svm_gva, vm->arch.idt); - - vcpu_run(vcpu); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_SHUTDOWN); - - kvm_vm_free(vm); -} diff --git a/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c b/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c deleted file mode 100644 index 7b6481d6c0d3..000000000000 --- a/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c +++ /dev/null @@ -1,210 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2022 Oracle and/or its affiliates. - * - * Based on: - * svm_int_ctl_test - * - * Copyright (C) 2021, Red Hat, Inc. - * - */ -#include -#include -#include -#include "apic.h" -#include "kvm_util.h" -#include "processor.h" -#include "svm_util.h" -#include "test_util.h" - -#define INT_NR 0x20 - -static_assert(ATOMIC_INT_LOCK_FREE == 2, "atomic int is not lockless"); - -static unsigned int bp_fired; -static void guest_bp_handler(struct ex_regs *regs) -{ - bp_fired++; -} - -static unsigned int int_fired; -static void l2_guest_code_int(void); - -static void guest_int_handler(struct ex_regs *regs) -{ - int_fired++; - GUEST_ASSERT_EQ(regs->rip, (unsigned long)l2_guest_code_int); -} - -static void l2_guest_code_int(void) -{ - GUEST_ASSERT_EQ(int_fired, 1); - - /* - * Same as the vmmcall() function, but with a ud2 sneaked after the - * vmmcall. The caller injects an exception with the return address - * increased by 2, so the "pop rbp" must be after the ud2 and we cannot - * use vmmcall() directly. - */ - __asm__ __volatile__("push %%rbp; vmmcall; ud2; pop %%rbp" - : : "a"(0xdeadbeef), "c"(0xbeefdead) - : "rbx", "rdx", "rsi", "rdi", "r8", "r9", - "r10", "r11", "r12", "r13", "r14", "r15"); - - GUEST_ASSERT_EQ(bp_fired, 1); - hlt(); -} - -static atomic_int nmi_stage; -#define nmi_stage_get() atomic_load_explicit(&nmi_stage, memory_order_acquire) -#define nmi_stage_inc() atomic_fetch_add_explicit(&nmi_stage, 1, memory_order_acq_rel) -static void guest_nmi_handler(struct ex_regs *regs) -{ - nmi_stage_inc(); - - if (nmi_stage_get() == 1) { - vmmcall(); - GUEST_FAIL("Unexpected resume after VMMCALL"); - } else { - GUEST_ASSERT_EQ(nmi_stage_get(), 3); - GUEST_DONE(); - } -} - -static void l2_guest_code_nmi(void) -{ - ud2(); -} - -static void l1_guest_code(struct svm_test_data *svm, uint64_t is_nmi, uint64_t idt_alt) -{ - #define L2_GUEST_STACK_SIZE 64 - unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; - struct vmcb *vmcb = svm->vmcb; - - if (is_nmi) - x2apic_enable(); - - /* Prepare for L2 execution. */ - generic_svm_setup(svm, - is_nmi ? l2_guest_code_nmi : l2_guest_code_int, - &l2_guest_stack[L2_GUEST_STACK_SIZE]); - - vmcb->control.intercept_exceptions |= BIT(PF_VECTOR) | BIT(UD_VECTOR); - vmcb->control.intercept |= BIT(INTERCEPT_NMI) | BIT(INTERCEPT_HLT); - - if (is_nmi) { - vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI; - } else { - vmcb->control.event_inj = INT_NR | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_SOFT; - /* The return address pushed on stack */ - vmcb->control.next_rip = vmcb->save.rip; - } - - run_guest(vmcb, svm->vmcb_gpa); - __GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL, - "Expected VMMCAL #VMEXIT, got '0x%x', info1 = '0x%lx, info2 = '0x%lx'", - vmcb->control.exit_code, - vmcb->control.exit_info_1, vmcb->control.exit_info_2); - - if (is_nmi) { - clgi(); - x2apic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_NMI); - - GUEST_ASSERT_EQ(nmi_stage_get(), 1); - nmi_stage_inc(); - - stgi(); - /* self-NMI happens here */ - while (true) - cpu_relax(); - } - - /* Skip over VMMCALL */ - vmcb->save.rip += 3; - - /* Switch to alternate IDT to cause intervening NPF again */ - vmcb->save.idtr.base = idt_alt; - vmcb->control.clean = 0; /* &= ~BIT(VMCB_DT) would be enough */ - - vmcb->control.event_inj = BP_VECTOR | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_EXEPT; - /* The return address pushed on stack, skip over UD2 */ - vmcb->control.next_rip = vmcb->save.rip + 2; - - run_guest(vmcb, svm->vmcb_gpa); - __GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_HLT, - "Expected HLT #VMEXIT, got '0x%x', info1 = '0x%lx, info2 = '0x%lx'", - vmcb->control.exit_code, - vmcb->control.exit_info_1, vmcb->control.exit_info_2); - - GUEST_DONE(); -} - -static void run_test(bool is_nmi) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - vm_vaddr_t svm_gva; - vm_vaddr_t idt_alt_vm; - struct kvm_guest_debug debug; - - pr_info("Running %s test\n", is_nmi ? "NMI" : "soft int"); - - vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); - - vm_install_exception_handler(vm, NMI_VECTOR, guest_nmi_handler); - vm_install_exception_handler(vm, BP_VECTOR, guest_bp_handler); - vm_install_exception_handler(vm, INT_NR, guest_int_handler); - - vcpu_alloc_svm(vm, &svm_gva); - - if (!is_nmi) { - void *idt, *idt_alt; - - idt_alt_vm = vm_vaddr_alloc_page(vm); - idt_alt = addr_gva2hva(vm, idt_alt_vm); - idt = addr_gva2hva(vm, vm->arch.idt); - memcpy(idt_alt, idt, getpagesize()); - } else { - idt_alt_vm = 0; - } - vcpu_args_set(vcpu, 3, svm_gva, (uint64_t)is_nmi, (uint64_t)idt_alt_vm); - - memset(&debug, 0, sizeof(debug)); - vcpu_guest_debug_set(vcpu, &debug); - - struct ucall uc; - - alarm(2); - vcpu_run(vcpu); - alarm(0); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); - - switch (get_ucall(vcpu, &uc)) { - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - break; - /* NOT REACHED */ - case UCALL_DONE: - goto done; - default: - TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd); - } -done: - kvm_vm_free(vm); -} - -int main(int argc, char *argv[]) -{ - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM)); - - TEST_ASSERT(kvm_cpu_has(X86_FEATURE_NRIPS), - "KVM with nSVM is supposed to unconditionally advertise nRIP Save"); - - atomic_init(&nmi_stage, 0); - - run_test(false); - run_test(true); - - return 0; -} diff --git a/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c b/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c deleted file mode 100644 index 8a62cca28cfb..000000000000 --- a/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c +++ /dev/null @@ -1,70 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * svm_vmcall_test - * - * Copyright (C) 2020, Red Hat, Inc. - * - * Nested SVM testing: VMCALL - */ - -#include "test_util.h" -#include "kvm_util.h" -#include "processor.h" -#include "svm_util.h" - -static void l2_guest_code(struct svm_test_data *svm) -{ - __asm__ __volatile__("vmcall"); -} - -static void l1_guest_code(struct svm_test_data *svm) -{ - #define L2_GUEST_STACK_SIZE 64 - unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; - struct vmcb *vmcb = svm->vmcb; - - /* Prepare for L2 execution. */ - generic_svm_setup(svm, l2_guest_code, - &l2_guest_stack[L2_GUEST_STACK_SIZE]); - - run_guest(vmcb, svm->vmcb_gpa); - - GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL); - GUEST_DONE(); -} - -int main(int argc, char *argv[]) -{ - struct kvm_vcpu *vcpu; - vm_vaddr_t svm_gva; - struct kvm_vm *vm; - - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM)); - - vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); - - vcpu_alloc_svm(vm, &svm_gva); - vcpu_args_set(vcpu, 1, svm_gva); - - for (;;) { - struct ucall uc; - - vcpu_run(vcpu); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); - - switch (get_ucall(vcpu, &uc)) { - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - /* NOT REACHED */ - case UCALL_SYNC: - break; - case UCALL_DONE: - goto done; - default: - TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd); - } - } -done: - kvm_vm_free(vm); - return 0; -} diff --git a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c deleted file mode 100644 index 8fa3948b0170..000000000000 --- a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c +++ /dev/null @@ -1,411 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Test for x86 KVM_CAP_SYNC_REGS - * - * Copyright (C) 2018, Google LLC. - * - * Verifies expected behavior of x86 KVM_CAP_SYNC_REGS functionality, - * including requesting an invalid register set, updates to/from values - * in kvm_run.s.regs when kvm_valid_regs and kvm_dirty_regs are toggled. - */ -#include -#include -#include -#include -#include -#include - -#include "kvm_test_harness.h" -#include "test_util.h" -#include "kvm_util.h" -#include "processor.h" - -#define UCALL_PIO_PORT ((uint16_t)0x1000) - -struct ucall uc_none = { - .cmd = UCALL_NONE, -}; - -/* - * ucall is embedded here to protect against compiler reshuffling registers - * before calling a function. In this test we only need to get KVM_EXIT_IO - * vmexit and preserve RBX, no additional information is needed. - */ -void guest_code(void) -{ - asm volatile("1: in %[port], %%al\n" - "add $0x1, %%rbx\n" - "jmp 1b" - : : [port] "d" (UCALL_PIO_PORT), "D" (&uc_none) - : "rax", "rbx"); -} - -KVM_ONE_VCPU_TEST_SUITE(sync_regs_test); - -static void compare_regs(struct kvm_regs *left, struct kvm_regs *right) -{ -#define REG_COMPARE(reg) \ - TEST_ASSERT(left->reg == right->reg, \ - "Register " #reg \ - " values did not match: 0x%llx, 0x%llx", \ - left->reg, right->reg) - REG_COMPARE(rax); - REG_COMPARE(rbx); - REG_COMPARE(rcx); - REG_COMPARE(rdx); - REG_COMPARE(rsi); - REG_COMPARE(rdi); - REG_COMPARE(rsp); - REG_COMPARE(rbp); - REG_COMPARE(r8); - REG_COMPARE(r9); - REG_COMPARE(r10); - REG_COMPARE(r11); - REG_COMPARE(r12); - REG_COMPARE(r13); - REG_COMPARE(r14); - REG_COMPARE(r15); - REG_COMPARE(rip); - REG_COMPARE(rflags); -#undef REG_COMPARE -} - -static void compare_sregs(struct kvm_sregs *left, struct kvm_sregs *right) -{ -} - -static void compare_vcpu_events(struct kvm_vcpu_events *left, - struct kvm_vcpu_events *right) -{ -} - -#define TEST_SYNC_FIELDS (KVM_SYNC_X86_REGS|KVM_SYNC_X86_SREGS|KVM_SYNC_X86_EVENTS) -#define INVALID_SYNC_FIELD 0x80000000 - -/* - * Set an exception as pending *and* injected while KVM is processing events. - * KVM is supposed to ignore/drop pending exceptions if userspace is also - * requesting that an exception be injected. - */ -static void *race_events_inj_pen(void *arg) -{ - struct kvm_run *run = (struct kvm_run *)arg; - struct kvm_vcpu_events *events = &run->s.regs.events; - - WRITE_ONCE(events->exception.nr, UD_VECTOR); - - for (;;) { - WRITE_ONCE(run->kvm_dirty_regs, KVM_SYNC_X86_EVENTS); - WRITE_ONCE(events->flags, 0); - WRITE_ONCE(events->exception.injected, 1); - WRITE_ONCE(events->exception.pending, 1); - - pthread_testcancel(); - } - - return NULL; -} - -/* - * Set an invalid exception vector while KVM is processing events. KVM is - * supposed to reject any vector >= 32, as well as NMIs (vector 2). - */ -static void *race_events_exc(void *arg) -{ - struct kvm_run *run = (struct kvm_run *)arg; - struct kvm_vcpu_events *events = &run->s.regs.events; - - for (;;) { - WRITE_ONCE(run->kvm_dirty_regs, KVM_SYNC_X86_EVENTS); - WRITE_ONCE(events->flags, 0); - WRITE_ONCE(events->exception.nr, UD_VECTOR); - WRITE_ONCE(events->exception.pending, 1); - WRITE_ONCE(events->exception.nr, 255); - - pthread_testcancel(); - } - - return NULL; -} - -/* - * Toggle CR4.PAE while KVM is processing SREGS, EFER.LME=1 with CR4.PAE=0 is - * illegal, and KVM's MMU heavily relies on vCPU state being valid. - */ -static noinline void *race_sregs_cr4(void *arg) -{ - struct kvm_run *run = (struct kvm_run *)arg; - __u64 *cr4 = &run->s.regs.sregs.cr4; - __u64 pae_enabled = *cr4; - __u64 pae_disabled = *cr4 & ~X86_CR4_PAE; - - for (;;) { - WRITE_ONCE(run->kvm_dirty_regs, KVM_SYNC_X86_SREGS); - WRITE_ONCE(*cr4, pae_enabled); - asm volatile(".rept 512\n\t" - "nop\n\t" - ".endr"); - WRITE_ONCE(*cr4, pae_disabled); - - pthread_testcancel(); - } - - return NULL; -} - -static void race_sync_regs(struct kvm_vcpu *vcpu, void *racer) -{ - const time_t TIMEOUT = 2; /* seconds, roughly */ - struct kvm_x86_state *state; - struct kvm_translation tr; - struct kvm_run *run; - pthread_t thread; - time_t t; - - run = vcpu->run; - - run->kvm_valid_regs = KVM_SYNC_X86_SREGS; - vcpu_run(vcpu); - run->kvm_valid_regs = 0; - - /* Save state *before* spawning the thread that mucks with vCPU state. */ - state = vcpu_save_state(vcpu); - - /* - * Selftests run 64-bit guests by default, both EFER.LME and CR4.PAE - * should already be set in guest state. - */ - TEST_ASSERT((run->s.regs.sregs.cr4 & X86_CR4_PAE) && - (run->s.regs.sregs.efer & EFER_LME), - "vCPU should be in long mode, CR4.PAE=%d, EFER.LME=%d", - !!(run->s.regs.sregs.cr4 & X86_CR4_PAE), - !!(run->s.regs.sregs.efer & EFER_LME)); - - TEST_ASSERT_EQ(pthread_create(&thread, NULL, racer, (void *)run), 0); - - for (t = time(NULL) + TIMEOUT; time(NULL) < t;) { - /* - * Reload known good state if the vCPU triple faults, e.g. due - * to the unhandled #GPs being injected. VMX preserves state - * on shutdown, but SVM synthesizes an INIT as the VMCB state - * is architecturally undefined on triple fault. - */ - if (!__vcpu_run(vcpu) && run->exit_reason == KVM_EXIT_SHUTDOWN) - vcpu_load_state(vcpu, state); - - if (racer == race_sregs_cr4) { - tr = (struct kvm_translation) { .linear_address = 0 }; - __vcpu_ioctl(vcpu, KVM_TRANSLATE, &tr); - } - } - - TEST_ASSERT_EQ(pthread_cancel(thread), 0); - TEST_ASSERT_EQ(pthread_join(thread, NULL), 0); - - kvm_x86_state_cleanup(state); -} - -KVM_ONE_VCPU_TEST(sync_regs_test, read_invalid, guest_code) -{ - struct kvm_run *run = vcpu->run; - int rv; - - /* Request reading invalid register set from VCPU. */ - run->kvm_valid_regs = INVALID_SYNC_FIELD; - rv = _vcpu_run(vcpu); - TEST_ASSERT(rv < 0 && errno == EINVAL, - "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d", - rv); - run->kvm_valid_regs = 0; - - run->kvm_valid_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS; - rv = _vcpu_run(vcpu); - TEST_ASSERT(rv < 0 && errno == EINVAL, - "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d", - rv); - run->kvm_valid_regs = 0; -} - -KVM_ONE_VCPU_TEST(sync_regs_test, set_invalid, guest_code) -{ - struct kvm_run *run = vcpu->run; - int rv; - - /* Request setting invalid register set into VCPU. */ - run->kvm_dirty_regs = INVALID_SYNC_FIELD; - rv = _vcpu_run(vcpu); - TEST_ASSERT(rv < 0 && errno == EINVAL, - "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d", - rv); - run->kvm_dirty_regs = 0; - - run->kvm_dirty_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS; - rv = _vcpu_run(vcpu); - TEST_ASSERT(rv < 0 && errno == EINVAL, - "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d", - rv); - run->kvm_dirty_regs = 0; -} - -KVM_ONE_VCPU_TEST(sync_regs_test, req_and_verify_all_valid, guest_code) -{ - struct kvm_run *run = vcpu->run; - struct kvm_vcpu_events events; - struct kvm_sregs sregs; - struct kvm_regs regs; - - /* Request and verify all valid register sets. */ - /* TODO: BUILD TIME CHECK: TEST_ASSERT(KVM_SYNC_X86_NUM_FIELDS != 3); */ - run->kvm_valid_regs = TEST_SYNC_FIELDS; - vcpu_run(vcpu); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); - - vcpu_regs_get(vcpu, ®s); - compare_regs(®s, &run->s.regs.regs); - - vcpu_sregs_get(vcpu, &sregs); - compare_sregs(&sregs, &run->s.regs.sregs); - - vcpu_events_get(vcpu, &events); - compare_vcpu_events(&events, &run->s.regs.events); -} - -KVM_ONE_VCPU_TEST(sync_regs_test, set_and_verify_various, guest_code) -{ - struct kvm_run *run = vcpu->run; - struct kvm_vcpu_events events; - struct kvm_sregs sregs; - struct kvm_regs regs; - - /* Run once to get register set */ - run->kvm_valid_regs = TEST_SYNC_FIELDS; - vcpu_run(vcpu); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); - - /* Set and verify various register values. */ - run->s.regs.regs.rbx = 0xBAD1DEA; - run->s.regs.sregs.apic_base = 1 << 11; - /* TODO run->s.regs.events.XYZ = ABC; */ - - run->kvm_valid_regs = TEST_SYNC_FIELDS; - run->kvm_dirty_regs = KVM_SYNC_X86_REGS | KVM_SYNC_X86_SREGS; - vcpu_run(vcpu); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); - TEST_ASSERT(run->s.regs.regs.rbx == 0xBAD1DEA + 1, - "rbx sync regs value incorrect 0x%llx.", - run->s.regs.regs.rbx); - TEST_ASSERT(run->s.regs.sregs.apic_base == 1 << 11, - "apic_base sync regs value incorrect 0x%llx.", - run->s.regs.sregs.apic_base); - - vcpu_regs_get(vcpu, ®s); - compare_regs(®s, &run->s.regs.regs); - - vcpu_sregs_get(vcpu, &sregs); - compare_sregs(&sregs, &run->s.regs.sregs); - - vcpu_events_get(vcpu, &events); - compare_vcpu_events(&events, &run->s.regs.events); -} - -KVM_ONE_VCPU_TEST(sync_regs_test, clear_kvm_dirty_regs_bits, guest_code) -{ - struct kvm_run *run = vcpu->run; - - /* Clear kvm_dirty_regs bits, verify new s.regs values are - * overwritten with existing guest values. - */ - run->kvm_valid_regs = TEST_SYNC_FIELDS; - run->kvm_dirty_regs = 0; - run->s.regs.regs.rbx = 0xDEADBEEF; - vcpu_run(vcpu); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); - TEST_ASSERT(run->s.regs.regs.rbx != 0xDEADBEEF, - "rbx sync regs value incorrect 0x%llx.", - run->s.regs.regs.rbx); -} - -KVM_ONE_VCPU_TEST(sync_regs_test, clear_kvm_valid_and_dirty_regs, guest_code) -{ - struct kvm_run *run = vcpu->run; - struct kvm_regs regs; - - /* Run once to get register set */ - run->kvm_valid_regs = TEST_SYNC_FIELDS; - vcpu_run(vcpu); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); - - /* Clear kvm_valid_regs bits and kvm_dirty_bits. - * Verify s.regs values are not overwritten with existing guest values - * and that guest values are not overwritten with kvm_sync_regs values. - */ - run->kvm_valid_regs = 0; - run->kvm_dirty_regs = 0; - run->s.regs.regs.rbx = 0xAAAA; - vcpu_regs_get(vcpu, ®s); - regs.rbx = 0xBAC0; - vcpu_regs_set(vcpu, ®s); - vcpu_run(vcpu); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); - TEST_ASSERT(run->s.regs.regs.rbx == 0xAAAA, - "rbx sync regs value incorrect 0x%llx.", - run->s.regs.regs.rbx); - vcpu_regs_get(vcpu, ®s); - TEST_ASSERT(regs.rbx == 0xBAC0 + 1, - "rbx guest value incorrect 0x%llx.", - regs.rbx); -} - -KVM_ONE_VCPU_TEST(sync_regs_test, clear_kvm_valid_regs_bits, guest_code) -{ - struct kvm_run *run = vcpu->run; - struct kvm_regs regs; - - /* Run once to get register set */ - run->kvm_valid_regs = TEST_SYNC_FIELDS; - vcpu_run(vcpu); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); - - /* Clear kvm_valid_regs bits. Verify s.regs values are not overwritten - * with existing guest values but that guest values are overwritten - * with kvm_sync_regs values. - */ - run->kvm_valid_regs = 0; - run->kvm_dirty_regs = TEST_SYNC_FIELDS; - run->s.regs.regs.rbx = 0xBBBB; - vcpu_run(vcpu); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); - TEST_ASSERT(run->s.regs.regs.rbx == 0xBBBB, - "rbx sync regs value incorrect 0x%llx.", - run->s.regs.regs.rbx); - vcpu_regs_get(vcpu, ®s); - TEST_ASSERT(regs.rbx == 0xBBBB + 1, - "rbx guest value incorrect 0x%llx.", - regs.rbx); -} - -KVM_ONE_VCPU_TEST(sync_regs_test, race_cr4, guest_code) -{ - race_sync_regs(vcpu, race_sregs_cr4); -} - -KVM_ONE_VCPU_TEST(sync_regs_test, race_exc, guest_code) -{ - race_sync_regs(vcpu, race_events_exc); -} - -KVM_ONE_VCPU_TEST(sync_regs_test, race_inj_pen, guest_code) -{ - race_sync_regs(vcpu, race_events_inj_pen); -} - -int main(int argc, char *argv[]) -{ - int cap; - - cap = kvm_check_cap(KVM_CAP_SYNC_REGS); - TEST_REQUIRE((cap & TEST_SYNC_FIELDS) == TEST_SYNC_FIELDS); - TEST_REQUIRE(!(cap & INVALID_SYNC_FIELD)); - - return test_harness_run(argc, argv); -} diff --git a/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c b/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c deleted file mode 100644 index 56306a19144a..000000000000 --- a/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c +++ /dev/null @@ -1,124 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -#include "test_util.h" -#include "kvm_util.h" -#include "processor.h" -#include "vmx.h" -#include "svm_util.h" - -#include -#include - -#include "kselftest.h" - -#define ARBITRARY_IO_PORT 0x2000 - -/* The virtual machine object. */ -static struct kvm_vm *vm; - -static void l2_guest_code(void) -{ - asm volatile("inb %%dx, %%al" - : : [port] "d" (ARBITRARY_IO_PORT) : "rax"); -} - -#define L2_GUEST_STACK_SIZE 64 -unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; - -void l1_guest_code_vmx(struct vmx_pages *vmx) -{ - - GUEST_ASSERT(vmx->vmcs_gpa); - GUEST_ASSERT(prepare_for_vmx_operation(vmx)); - GUEST_ASSERT(load_vmcs(vmx)); - - prepare_vmcs(vmx, l2_guest_code, - &l2_guest_stack[L2_GUEST_STACK_SIZE]); - - GUEST_ASSERT(!vmlaunch()); - /* L2 should triple fault after a triple fault event injected. */ - GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_TRIPLE_FAULT); - GUEST_DONE(); -} - -void l1_guest_code_svm(struct svm_test_data *svm) -{ - struct vmcb *vmcb = svm->vmcb; - - generic_svm_setup(svm, l2_guest_code, - &l2_guest_stack[L2_GUEST_STACK_SIZE]); - - /* don't intercept shutdown to test the case of SVM allowing to do so */ - vmcb->control.intercept &= ~(BIT(INTERCEPT_SHUTDOWN)); - - run_guest(vmcb, svm->vmcb_gpa); - - /* should not reach here, L1 should crash */ - GUEST_ASSERT(0); -} - -int main(void) -{ - struct kvm_vcpu *vcpu; - struct kvm_run *run; - struct kvm_vcpu_events events; - struct ucall uc; - - bool has_vmx = kvm_cpu_has(X86_FEATURE_VMX); - bool has_svm = kvm_cpu_has(X86_FEATURE_SVM); - - TEST_REQUIRE(has_vmx || has_svm); - - TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_TRIPLE_FAULT_EVENT)); - - - if (has_vmx) { - vm_vaddr_t vmx_pages_gva; - - vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code_vmx); - vcpu_alloc_vmx(vm, &vmx_pages_gva); - vcpu_args_set(vcpu, 1, vmx_pages_gva); - } else { - vm_vaddr_t svm_gva; - - vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code_svm); - vcpu_alloc_svm(vm, &svm_gva); - vcpu_args_set(vcpu, 1, svm_gva); - } - - vm_enable_cap(vm, KVM_CAP_X86_TRIPLE_FAULT_EVENT, 1); - run = vcpu->run; - vcpu_run(vcpu); - - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); - TEST_ASSERT(run->io.port == ARBITRARY_IO_PORT, - "Expected IN from port %d from L2, got port %d", - ARBITRARY_IO_PORT, run->io.port); - vcpu_events_get(vcpu, &events); - events.flags |= KVM_VCPUEVENT_VALID_TRIPLE_FAULT; - events.triple_fault.pending = true; - vcpu_events_set(vcpu, &events); - run->immediate_exit = true; - vcpu_run_complete_io(vcpu); - - vcpu_events_get(vcpu, &events); - TEST_ASSERT(events.flags & KVM_VCPUEVENT_VALID_TRIPLE_FAULT, - "Triple fault event invalid"); - TEST_ASSERT(events.triple_fault.pending, - "No triple fault pending"); - vcpu_run(vcpu); - - - if (has_svm) { - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_SHUTDOWN); - } else { - switch (get_ucall(vcpu, &uc)) { - case UCALL_DONE: - break; - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - default: - TEST_FAIL("Unexpected ucall: %lu", uc.cmd); - } - } - return 0; -} diff --git a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c b/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c deleted file mode 100644 index 12b0964f4f13..000000000000 --- a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c +++ /dev/null @@ -1,161 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Tests for MSR_IA32_TSC and MSR_IA32_TSC_ADJUST. - * - * Copyright (C) 2020, Red Hat, Inc. - */ -#include -#include -#include "kvm_util.h" -#include "processor.h" - -#define UNITY (1ull << 30) -#define HOST_ADJUST (UNITY * 64) -#define GUEST_STEP (UNITY * 4) -#define ROUND(x) ((x + UNITY / 2) & -UNITY) -#define rounded_rdmsr(x) ROUND(rdmsr(x)) -#define rounded_host_rdmsr(x) ROUND(vcpu_get_msr(vcpu, x)) - -static void guest_code(void) -{ - u64 val = 0; - - GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), val); - GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val); - - /* Guest: writes to MSR_IA32_TSC affect both MSRs. */ - val = 1ull * GUEST_STEP; - wrmsr(MSR_IA32_TSC, val); - GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), val); - GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val); - - /* Guest: writes to MSR_IA32_TSC_ADJUST affect both MSRs. */ - GUEST_SYNC(2); - val = 2ull * GUEST_STEP; - wrmsr(MSR_IA32_TSC_ADJUST, val); - GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), val); - GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val); - - /* Host: setting the TSC offset. */ - GUEST_SYNC(3); - GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val); - GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val); - - /* - * Guest: writes to MSR_IA32_TSC_ADJUST do not destroy the - * host-side offset and affect both MSRs. - */ - GUEST_SYNC(4); - val = 3ull * GUEST_STEP; - wrmsr(MSR_IA32_TSC_ADJUST, val); - GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val); - GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val); - - /* - * Guest: writes to MSR_IA32_TSC affect both MSRs, so the host-side - * offset is now visible in MSR_IA32_TSC_ADJUST. - */ - GUEST_SYNC(5); - val = 4ull * GUEST_STEP; - wrmsr(MSR_IA32_TSC, val); - GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), val); - GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val - HOST_ADJUST); - - GUEST_DONE(); -} - -static void run_vcpu(struct kvm_vcpu *vcpu, int stage) -{ - struct ucall uc; - - vcpu_run(vcpu); - - switch (get_ucall(vcpu, &uc)) { - case UCALL_SYNC: - if (!strcmp((const char *)uc.args[0], "hello") && - uc.args[1] == stage + 1) - ksft_test_result_pass("stage %d passed\n", stage + 1); - else - ksft_test_result_fail( - "stage %d: Unexpected register values vmexit, got %lx", - stage + 1, (ulong)uc.args[1]); - return; - case UCALL_DONE: - ksft_test_result_pass("stage %d passed\n", stage + 1); - return; - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - default: - TEST_ASSERT(false, "Unexpected exit: %s", - exit_reason_str(vcpu->run->exit_reason)); - } -} - -int main(void) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - uint64_t val; - - ksft_print_header(); - ksft_set_plan(5); - - vm = vm_create_with_one_vcpu(&vcpu, guest_code); - - val = 0; - TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val); - TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); - - /* Guest: writes to MSR_IA32_TSC affect both MSRs. */ - run_vcpu(vcpu, 1); - val = 1ull * GUEST_STEP; - TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val); - TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); - - /* Guest: writes to MSR_IA32_TSC_ADJUST affect both MSRs. */ - run_vcpu(vcpu, 2); - val = 2ull * GUEST_STEP; - TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val); - TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); - - /* - * Host: writes to MSR_IA32_TSC set the host-side offset - * and therefore do not change MSR_IA32_TSC_ADJUST. - */ - vcpu_set_msr(vcpu, MSR_IA32_TSC, HOST_ADJUST + val); - TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val); - TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); - run_vcpu(vcpu, 3); - - /* Host: writes to MSR_IA32_TSC_ADJUST do not modify the TSC. */ - vcpu_set_msr(vcpu, MSR_IA32_TSC_ADJUST, UNITY * 123456); - TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val); - TEST_ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_TSC_ADJUST), UNITY * 123456); - - /* Restore previous value. */ - vcpu_set_msr(vcpu, MSR_IA32_TSC_ADJUST, val); - TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val); - TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); - - /* - * Guest: writes to MSR_IA32_TSC_ADJUST do not destroy the - * host-side offset and affect both MSRs. - */ - run_vcpu(vcpu, 4); - val = 3ull * GUEST_STEP; - TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val); - TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); - - /* - * Guest: writes to MSR_IA32_TSC affect both MSRs, so the host-side - * offset is now visible in MSR_IA32_TSC_ADJUST. - */ - run_vcpu(vcpu, 5); - val = 4ull * GUEST_STEP; - TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val); - TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val - HOST_ADJUST); - - kvm_vm_free(vm); - - ksft_finished(); /* Print results and exit() accordingly */ -} diff --git a/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c b/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c deleted file mode 100644 index 59c7304f805e..000000000000 --- a/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c +++ /dev/null @@ -1,110 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright © 2021 Amazon.com, Inc. or its affiliates. - */ - -#include "test_util.h" -#include "kvm_util.h" -#include "processor.h" - -#include -#include -#include -#include -#include - -#define NR_TEST_VCPUS 20 - -static struct kvm_vm *vm; -pthread_spinlock_t create_lock; - -#define TEST_TSC_KHZ 2345678UL -#define TEST_TSC_OFFSET 200000000 - -uint64_t tsc_sync; -static void guest_code(void) -{ - uint64_t start_tsc, local_tsc, tmp; - - start_tsc = rdtsc(); - do { - tmp = READ_ONCE(tsc_sync); - local_tsc = rdtsc(); - WRITE_ONCE(tsc_sync, local_tsc); - if (unlikely(local_tsc < tmp)) - GUEST_SYNC_ARGS(0, local_tsc, tmp, 0, 0); - - } while (local_tsc - start_tsc < 5000 * TEST_TSC_KHZ); - - GUEST_DONE(); -} - - -static void *run_vcpu(void *_cpu_nr) -{ - unsigned long vcpu_id = (unsigned long)_cpu_nr; - unsigned long failures = 0; - static bool first_cpu_done; - struct kvm_vcpu *vcpu; - - /* The kernel is fine, but vm_vcpu_add() needs locking */ - pthread_spin_lock(&create_lock); - - vcpu = vm_vcpu_add(vm, vcpu_id, guest_code); - - if (!first_cpu_done) { - first_cpu_done = true; - vcpu_set_msr(vcpu, MSR_IA32_TSC, TEST_TSC_OFFSET); - } - - pthread_spin_unlock(&create_lock); - - for (;;) { - struct ucall uc; - - vcpu_run(vcpu); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); - - switch (get_ucall(vcpu, &uc)) { - case UCALL_DONE: - goto out; - - case UCALL_SYNC: - printf("Guest %d sync %lx %lx %ld\n", vcpu->id, - uc.args[2], uc.args[3], uc.args[2] - uc.args[3]); - failures++; - break; - - default: - TEST_FAIL("Unknown ucall %lu", uc.cmd); - } - } - out: - return (void *)failures; -} - -int main(int argc, char *argv[]) -{ - TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_TSC_CONTROL)); - - vm = vm_create(NR_TEST_VCPUS); - vm_ioctl(vm, KVM_SET_TSC_KHZ, (void *) TEST_TSC_KHZ); - - pthread_spin_init(&create_lock, PTHREAD_PROCESS_PRIVATE); - pthread_t cpu_threads[NR_TEST_VCPUS]; - unsigned long cpu; - for (cpu = 0; cpu < NR_TEST_VCPUS; cpu++) - pthread_create(&cpu_threads[cpu], NULL, run_vcpu, (void *)cpu); - - unsigned long failures = 0; - for (cpu = 0; cpu < NR_TEST_VCPUS; cpu++) { - void *this_cpu_failures; - pthread_join(cpu_threads[cpu], &this_cpu_failures); - failures += (unsigned long)this_cpu_failures; - } - - TEST_ASSERT(!failures, "TSC sync failed"); - pthread_spin_destroy(&create_lock); - kvm_vm_free(vm); - return 0; -} diff --git a/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c b/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c deleted file mode 100644 index 57f157c06b39..000000000000 --- a/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c +++ /dev/null @@ -1,295 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * ucna_injection_test - * - * Copyright (C) 2022, Google LLC. - * - * This work is licensed under the terms of the GNU GPL, version 2. - * - * Test that user space can inject UnCorrectable No Action required (UCNA) - * memory errors to the guest. - * - * The test starts one vCPU with the MCG_CMCI_P enabled. It verifies that - * proper UCNA errors can be injected to a vCPU with MCG_CMCI_P and - * corresponding per-bank control register (MCI_CTL2) bit enabled. - * The test also checks that the UCNA errors get recorded in the - * Machine Check bank registers no matter the error signal interrupts get - * delivered into the guest or not. - * - */ -#include -#include -#include -#include - -#include "kvm_util.h" -#include "mce.h" -#include "processor.h" -#include "test_util.h" -#include "apic.h" - -#define SYNC_FIRST_UCNA 9 -#define SYNC_SECOND_UCNA 10 -#define SYNC_GP 11 -#define FIRST_UCNA_ADDR 0xdeadbeef -#define SECOND_UCNA_ADDR 0xcafeb0ba - -/* - * Vector for the CMCI interrupt. - * Value is arbitrary. Any value in 0x20-0xFF should work: - * https://wiki.osdev.org/Interrupt_Vector_Table - */ -#define CMCI_VECTOR 0xa9 - -#define UCNA_BANK 0x7 // IMC0 bank - -#define MCI_CTL2_RESERVED_BIT BIT_ULL(29) - -static uint64_t supported_mcg_caps; - -/* - * Record states about the injected UCNA. - * The variables started with the 'i_' prefixes are recorded in interrupt - * handler. Variables without the 'i_' prefixes are recorded in guest main - * execution thread. - */ -static volatile uint64_t i_ucna_rcvd; -static volatile uint64_t i_ucna_addr; -static volatile uint64_t ucna_addr; -static volatile uint64_t ucna_addr2; - -struct thread_params { - struct kvm_vcpu *vcpu; - uint64_t *p_i_ucna_rcvd; - uint64_t *p_i_ucna_addr; - uint64_t *p_ucna_addr; - uint64_t *p_ucna_addr2; -}; - -static void verify_apic_base_addr(void) -{ - uint64_t msr = rdmsr(MSR_IA32_APICBASE); - uint64_t base = GET_APIC_BASE(msr); - - GUEST_ASSERT(base == APIC_DEFAULT_GPA); -} - -static void ucna_injection_guest_code(void) -{ - uint64_t ctl2; - verify_apic_base_addr(); - xapic_enable(); - - /* Sets up the interrupt vector and enables per-bank CMCI sigaling. */ - xapic_write_reg(APIC_LVTCMCI, CMCI_VECTOR | APIC_DM_FIXED); - ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK)); - wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_CMCI_EN); - - /* Enables interrupt in guest. */ - asm volatile("sti"); - - /* Let user space inject the first UCNA */ - GUEST_SYNC(SYNC_FIRST_UCNA); - - ucna_addr = rdmsr(MSR_IA32_MCx_ADDR(UCNA_BANK)); - - /* Disables the per-bank CMCI signaling. */ - ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK)); - wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 & ~MCI_CTL2_CMCI_EN); - - /* Let the user space inject the second UCNA */ - GUEST_SYNC(SYNC_SECOND_UCNA); - - ucna_addr2 = rdmsr(MSR_IA32_MCx_ADDR(UCNA_BANK)); - GUEST_DONE(); -} - -static void cmci_disabled_guest_code(void) -{ - uint64_t ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK)); - wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_CMCI_EN); - - GUEST_DONE(); -} - -static void cmci_enabled_guest_code(void) -{ - uint64_t ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK)); - wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_RESERVED_BIT); - - GUEST_DONE(); -} - -static void guest_cmci_handler(struct ex_regs *regs) -{ - i_ucna_rcvd++; - i_ucna_addr = rdmsr(MSR_IA32_MCx_ADDR(UCNA_BANK)); - xapic_write_reg(APIC_EOI, 0); -} - -static void guest_gp_handler(struct ex_regs *regs) -{ - GUEST_SYNC(SYNC_GP); -} - -static void run_vcpu_expect_gp(struct kvm_vcpu *vcpu) -{ - struct ucall uc; - - vcpu_run(vcpu); - - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); - TEST_ASSERT(get_ucall(vcpu, &uc) == UCALL_SYNC, - "Expect UCALL_SYNC"); - TEST_ASSERT(uc.args[1] == SYNC_GP, "#GP is expected."); - printf("vCPU received GP in guest.\n"); -} - -static void inject_ucna(struct kvm_vcpu *vcpu, uint64_t addr) { - /* - * A UCNA error is indicated with VAL=1, UC=1, PCC=0, S=0 and AR=0 in - * the IA32_MCi_STATUS register. - * MSCOD=1 (BIT[16] - MscodDataRdErr). - * MCACOD=0x0090 (Memory controller error format, channel 0) - */ - uint64_t status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN | - MCI_STATUS_MISCV | MCI_STATUS_ADDRV | 0x10090; - struct kvm_x86_mce mce = {}; - mce.status = status; - mce.mcg_status = 0; - /* - * MCM_ADDR_PHYS indicates the reported address is a physical address. - * Lowest 6 bits is the recoverable address LSB, i.e., the injected MCE - * is at 4KB granularity. - */ - mce.misc = (MCM_ADDR_PHYS << 6) | 0xc; - mce.addr = addr; - mce.bank = UCNA_BANK; - - vcpu_ioctl(vcpu, KVM_X86_SET_MCE, &mce); -} - -static void *run_ucna_injection(void *arg) -{ - struct thread_params *params = (struct thread_params *)arg; - struct ucall uc; - int old; - int r; - - r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old); - TEST_ASSERT(r == 0, - "pthread_setcanceltype failed with errno=%d", - r); - - vcpu_run(params->vcpu); - - TEST_ASSERT_KVM_EXIT_REASON(params->vcpu, KVM_EXIT_IO); - TEST_ASSERT(get_ucall(params->vcpu, &uc) == UCALL_SYNC, - "Expect UCALL_SYNC"); - TEST_ASSERT(uc.args[1] == SYNC_FIRST_UCNA, "Injecting first UCNA."); - - printf("Injecting first UCNA at %#x.\n", FIRST_UCNA_ADDR); - - inject_ucna(params->vcpu, FIRST_UCNA_ADDR); - vcpu_run(params->vcpu); - - TEST_ASSERT_KVM_EXIT_REASON(params->vcpu, KVM_EXIT_IO); - TEST_ASSERT(get_ucall(params->vcpu, &uc) == UCALL_SYNC, - "Expect UCALL_SYNC"); - TEST_ASSERT(uc.args[1] == SYNC_SECOND_UCNA, "Injecting second UCNA."); - - printf("Injecting second UCNA at %#x.\n", SECOND_UCNA_ADDR); - - inject_ucna(params->vcpu, SECOND_UCNA_ADDR); - vcpu_run(params->vcpu); - - TEST_ASSERT_KVM_EXIT_REASON(params->vcpu, KVM_EXIT_IO); - if (get_ucall(params->vcpu, &uc) == UCALL_ABORT) { - TEST_ASSERT(false, "vCPU assertion failure: %s.", - (const char *)uc.args[0]); - } - - return NULL; -} - -static void test_ucna_injection(struct kvm_vcpu *vcpu, struct thread_params *params) -{ - struct kvm_vm *vm = vcpu->vm; - params->vcpu = vcpu; - params->p_i_ucna_rcvd = (uint64_t *)addr_gva2hva(vm, (uint64_t)&i_ucna_rcvd); - params->p_i_ucna_addr = (uint64_t *)addr_gva2hva(vm, (uint64_t)&i_ucna_addr); - params->p_ucna_addr = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ucna_addr); - params->p_ucna_addr2 = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ucna_addr2); - - run_ucna_injection(params); - - TEST_ASSERT(*params->p_i_ucna_rcvd == 1, "Only first UCNA get signaled."); - TEST_ASSERT(*params->p_i_ucna_addr == FIRST_UCNA_ADDR, - "Only first UCNA reported addr get recorded via interrupt."); - TEST_ASSERT(*params->p_ucna_addr == FIRST_UCNA_ADDR, - "First injected UCNAs should get exposed via registers."); - TEST_ASSERT(*params->p_ucna_addr2 == SECOND_UCNA_ADDR, - "Second injected UCNAs should get exposed via registers."); - - printf("Test successful.\n" - "UCNA CMCI interrupts received: %ld\n" - "Last UCNA address received via CMCI: %lx\n" - "First UCNA address in vCPU thread: %lx\n" - "Second UCNA address in vCPU thread: %lx\n", - *params->p_i_ucna_rcvd, *params->p_i_ucna_addr, - *params->p_ucna_addr, *params->p_ucna_addr2); -} - -static void setup_mce_cap(struct kvm_vcpu *vcpu, bool enable_cmci_p) -{ - uint64_t mcg_caps = MCG_CTL_P | MCG_SER_P | MCG_LMCE_P | KVM_MAX_MCE_BANKS; - if (enable_cmci_p) - mcg_caps |= MCG_CMCI_P; - - mcg_caps &= supported_mcg_caps | MCG_CAP_BANKS_MASK; - vcpu_ioctl(vcpu, KVM_X86_SETUP_MCE, &mcg_caps); -} - -static struct kvm_vcpu *create_vcpu_with_mce_cap(struct kvm_vm *vm, uint32_t vcpuid, - bool enable_cmci_p, void *guest_code) -{ - struct kvm_vcpu *vcpu = vm_vcpu_add(vm, vcpuid, guest_code); - setup_mce_cap(vcpu, enable_cmci_p); - return vcpu; -} - -int main(int argc, char *argv[]) -{ - struct thread_params params; - struct kvm_vm *vm; - struct kvm_vcpu *ucna_vcpu; - struct kvm_vcpu *cmcidis_vcpu; - struct kvm_vcpu *cmci_vcpu; - - kvm_check_cap(KVM_CAP_MCE); - - vm = __vm_create(VM_SHAPE_DEFAULT, 3, 0); - - kvm_ioctl(vm->kvm_fd, KVM_X86_GET_MCE_CAP_SUPPORTED, - &supported_mcg_caps); - - if (!(supported_mcg_caps & MCG_CMCI_P)) { - print_skip("MCG_CMCI_P is not supported"); - exit(KSFT_SKIP); - } - - ucna_vcpu = create_vcpu_with_mce_cap(vm, 0, true, ucna_injection_guest_code); - cmcidis_vcpu = create_vcpu_with_mce_cap(vm, 1, false, cmci_disabled_guest_code); - cmci_vcpu = create_vcpu_with_mce_cap(vm, 2, true, cmci_enabled_guest_code); - - vm_install_exception_handler(vm, CMCI_VECTOR, guest_cmci_handler); - vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler); - - virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA); - - test_ucna_injection(ucna_vcpu, ¶ms); - run_vcpu_expect_gp(cmcidis_vcpu); - run_vcpu_expect_gp(cmci_vcpu); - - kvm_vm_free(vm); -} diff --git a/tools/testing/selftests/kvm/x86_64/userspace_io_test.c b/tools/testing/selftests/kvm/x86_64/userspace_io_test.c deleted file mode 100644 index 9481cbcf284f..000000000000 --- a/tools/testing/selftests/kvm/x86_64/userspace_io_test.c +++ /dev/null @@ -1,103 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include -#include -#include -#include - -#include "test_util.h" - -#include "kvm_util.h" -#include "processor.h" - -static void guest_ins_port80(uint8_t *buffer, unsigned int count) -{ - unsigned long end; - - if (count == 2) - end = (unsigned long)buffer + 1; - else - end = (unsigned long)buffer + 8192; - - asm volatile("cld; rep; insb" : "+D"(buffer), "+c"(count) : "d"(0x80) : "memory"); - GUEST_ASSERT_EQ(count, 0); - GUEST_ASSERT_EQ((unsigned long)buffer, end); -} - -static void guest_code(void) -{ - uint8_t buffer[8192]; - int i; - - /* - * Special case tests. main() will adjust RCX 2 => 1 and 3 => 8192 to - * test that KVM doesn't explode when userspace modifies the "count" on - * a userspace I/O exit. KVM isn't required to play nice with the I/O - * itself as KVM doesn't support manipulating the count, it just needs - * to not explode or overflow a buffer. - */ - guest_ins_port80(buffer, 2); - guest_ins_port80(buffer, 3); - - /* Verify KVM fills the buffer correctly when not stuffing RCX. */ - memset(buffer, 0, sizeof(buffer)); - guest_ins_port80(buffer, 8192); - for (i = 0; i < 8192; i++) - __GUEST_ASSERT(buffer[i] == 0xaa, - "Expected '0xaa', got '0x%x' at buffer[%u]", - buffer[i], i); - - GUEST_DONE(); -} - -int main(int argc, char *argv[]) -{ - struct kvm_vcpu *vcpu; - struct kvm_regs regs; - struct kvm_run *run; - struct kvm_vm *vm; - struct ucall uc; - - vm = vm_create_with_one_vcpu(&vcpu, guest_code); - run = vcpu->run; - - memset(®s, 0, sizeof(regs)); - - while (1) { - vcpu_run(vcpu); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); - - if (get_ucall(vcpu, &uc)) - break; - - TEST_ASSERT(run->io.port == 0x80, - "Expected I/O at port 0x80, got port 0x%x", run->io.port); - - /* - * Modify the rep string count in RCX: 2 => 1 and 3 => 8192. - * Note, this abuses KVM's batching of rep string I/O to avoid - * getting stuck in an infinite loop. That behavior isn't in - * scope from a testing perspective as it's not ABI in any way, - * i.e. it really is abusing internal KVM knowledge. - */ - vcpu_regs_get(vcpu, ®s); - if (regs.rcx == 2) - regs.rcx = 1; - if (regs.rcx == 3) - regs.rcx = 8192; - memset((void *)run + run->io.data_offset, 0xaa, 4096); - vcpu_regs_set(vcpu, ®s); - } - - switch (uc.cmd) { - case UCALL_DONE: - break; - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - default: - TEST_FAIL("Unknown ucall %lu", uc.cmd); - } - - kvm_vm_free(vm); - return 0; -} diff --git a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c deleted file mode 100644 index 32b2794b78fe..000000000000 --- a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c +++ /dev/null @@ -1,769 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2020, Google LLC. - * - * Tests for exiting into userspace on registered MSRs - */ -#include - -#include "kvm_test_harness.h" -#include "test_util.h" -#include "kvm_util.h" -#include "vmx.h" - -#define MSR_NON_EXISTENT 0x474f4f00 - -static u64 deny_bits = 0; -struct kvm_msr_filter filter_allow = { - .flags = KVM_MSR_FILTER_DEFAULT_ALLOW, - .ranges = { - { - .flags = KVM_MSR_FILTER_READ | - KVM_MSR_FILTER_WRITE, - .nmsrs = 1, - /* Test an MSR the kernel knows about. */ - .base = MSR_IA32_XSS, - .bitmap = (uint8_t*)&deny_bits, - }, { - .flags = KVM_MSR_FILTER_READ | - KVM_MSR_FILTER_WRITE, - .nmsrs = 1, - /* Test an MSR the kernel doesn't know about. */ - .base = MSR_IA32_FLUSH_CMD, - .bitmap = (uint8_t*)&deny_bits, - }, { - .flags = KVM_MSR_FILTER_READ | - KVM_MSR_FILTER_WRITE, - .nmsrs = 1, - /* Test a fabricated MSR that no one knows about. */ - .base = MSR_NON_EXISTENT, - .bitmap = (uint8_t*)&deny_bits, - }, - }, -}; - -struct kvm_msr_filter filter_fs = { - .flags = KVM_MSR_FILTER_DEFAULT_ALLOW, - .ranges = { - { - .flags = KVM_MSR_FILTER_READ, - .nmsrs = 1, - .base = MSR_FS_BASE, - .bitmap = (uint8_t*)&deny_bits, - }, - }, -}; - -struct kvm_msr_filter filter_gs = { - .flags = KVM_MSR_FILTER_DEFAULT_ALLOW, - .ranges = { - { - .flags = KVM_MSR_FILTER_READ, - .nmsrs = 1, - .base = MSR_GS_BASE, - .bitmap = (uint8_t*)&deny_bits, - }, - }, -}; - -static uint64_t msr_non_existent_data; -static int guest_exception_count; -static u32 msr_reads, msr_writes; - -static u8 bitmap_00000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE]; -static u8 bitmap_00000000_write[KVM_MSR_FILTER_MAX_BITMAP_SIZE]; -static u8 bitmap_40000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE]; -static u8 bitmap_c0000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE]; -static u8 bitmap_c0000000_read[KVM_MSR_FILTER_MAX_BITMAP_SIZE]; -static u8 bitmap_deadbeef[1] = { 0x1 }; - -static void deny_msr(uint8_t *bitmap, u32 msr) -{ - u32 idx = msr & (KVM_MSR_FILTER_MAX_BITMAP_SIZE - 1); - - bitmap[idx / 8] &= ~(1 << (idx % 8)); -} - -static void prepare_bitmaps(void) -{ - memset(bitmap_00000000, 0xff, sizeof(bitmap_00000000)); - memset(bitmap_00000000_write, 0xff, sizeof(bitmap_00000000_write)); - memset(bitmap_40000000, 0xff, sizeof(bitmap_40000000)); - memset(bitmap_c0000000, 0xff, sizeof(bitmap_c0000000)); - memset(bitmap_c0000000_read, 0xff, sizeof(bitmap_c0000000_read)); - - deny_msr(bitmap_00000000_write, MSR_IA32_POWER_CTL); - deny_msr(bitmap_c0000000_read, MSR_SYSCALL_MASK); - deny_msr(bitmap_c0000000_read, MSR_GS_BASE); -} - -struct kvm_msr_filter filter_deny = { - .flags = KVM_MSR_FILTER_DEFAULT_DENY, - .ranges = { - { - .flags = KVM_MSR_FILTER_READ, - .base = 0x00000000, - .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE, - .bitmap = bitmap_00000000, - }, { - .flags = KVM_MSR_FILTER_WRITE, - .base = 0x00000000, - .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE, - .bitmap = bitmap_00000000_write, - }, { - .flags = KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE, - .base = 0x40000000, - .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE, - .bitmap = bitmap_40000000, - }, { - .flags = KVM_MSR_FILTER_READ, - .base = 0xc0000000, - .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE, - .bitmap = bitmap_c0000000_read, - }, { - .flags = KVM_MSR_FILTER_WRITE, - .base = 0xc0000000, - .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE, - .bitmap = bitmap_c0000000, - }, { - .flags = KVM_MSR_FILTER_WRITE | KVM_MSR_FILTER_READ, - .base = 0xdeadbeef, - .nmsrs = 1, - .bitmap = bitmap_deadbeef, - }, - }, -}; - -struct kvm_msr_filter no_filter_deny = { - .flags = KVM_MSR_FILTER_DEFAULT_ALLOW, -}; - -/* - * Note: Force test_rdmsr() to not be inlined to prevent the labels, - * rdmsr_start and rdmsr_end, from being defined multiple times. - */ -static noinline uint64_t test_rdmsr(uint32_t msr) -{ - uint32_t a, d; - - guest_exception_count = 0; - - __asm__ __volatile__("rdmsr_start: rdmsr; rdmsr_end:" : - "=a"(a), "=d"(d) : "c"(msr) : "memory"); - - return a | ((uint64_t) d << 32); -} - -/* - * Note: Force test_wrmsr() to not be inlined to prevent the labels, - * wrmsr_start and wrmsr_end, from being defined multiple times. - */ -static noinline void test_wrmsr(uint32_t msr, uint64_t value) -{ - uint32_t a = value; - uint32_t d = value >> 32; - - guest_exception_count = 0; - - __asm__ __volatile__("wrmsr_start: wrmsr; wrmsr_end:" :: - "a"(a), "d"(d), "c"(msr) : "memory"); -} - -extern char rdmsr_start, rdmsr_end; -extern char wrmsr_start, wrmsr_end; - -/* - * Note: Force test_em_rdmsr() to not be inlined to prevent the labels, - * rdmsr_start and rdmsr_end, from being defined multiple times. - */ -static noinline uint64_t test_em_rdmsr(uint32_t msr) -{ - uint32_t a, d; - - guest_exception_count = 0; - - __asm__ __volatile__(KVM_FEP "em_rdmsr_start: rdmsr; em_rdmsr_end:" : - "=a"(a), "=d"(d) : "c"(msr) : "memory"); - - return a | ((uint64_t) d << 32); -} - -/* - * Note: Force test_em_wrmsr() to not be inlined to prevent the labels, - * wrmsr_start and wrmsr_end, from being defined multiple times. - */ -static noinline void test_em_wrmsr(uint32_t msr, uint64_t value) -{ - uint32_t a = value; - uint32_t d = value >> 32; - - guest_exception_count = 0; - - __asm__ __volatile__(KVM_FEP "em_wrmsr_start: wrmsr; em_wrmsr_end:" :: - "a"(a), "d"(d), "c"(msr) : "memory"); -} - -extern char em_rdmsr_start, em_rdmsr_end; -extern char em_wrmsr_start, em_wrmsr_end; - -static void guest_code_filter_allow(void) -{ - uint64_t data; - - /* - * Test userspace intercepting rdmsr / wrmsr for MSR_IA32_XSS. - * - * A GP is thrown if anything other than 0 is written to - * MSR_IA32_XSS. - */ - data = test_rdmsr(MSR_IA32_XSS); - GUEST_ASSERT(data == 0); - GUEST_ASSERT(guest_exception_count == 0); - - test_wrmsr(MSR_IA32_XSS, 0); - GUEST_ASSERT(guest_exception_count == 0); - - test_wrmsr(MSR_IA32_XSS, 1); - GUEST_ASSERT(guest_exception_count == 1); - - /* - * Test userspace intercepting rdmsr / wrmsr for MSR_IA32_FLUSH_CMD. - * - * A GP is thrown if MSR_IA32_FLUSH_CMD is read - * from or if a value other than 1 is written to it. - */ - test_rdmsr(MSR_IA32_FLUSH_CMD); - GUEST_ASSERT(guest_exception_count == 1); - - test_wrmsr(MSR_IA32_FLUSH_CMD, 0); - GUEST_ASSERT(guest_exception_count == 1); - - test_wrmsr(MSR_IA32_FLUSH_CMD, 1); - GUEST_ASSERT(guest_exception_count == 0); - - /* - * Test userspace intercepting rdmsr / wrmsr for MSR_NON_EXISTENT. - * - * Test that a fabricated MSR can pass through the kernel - * and be handled in userspace. - */ - test_wrmsr(MSR_NON_EXISTENT, 2); - GUEST_ASSERT(guest_exception_count == 0); - - data = test_rdmsr(MSR_NON_EXISTENT); - GUEST_ASSERT(data == 2); - GUEST_ASSERT(guest_exception_count == 0); - - if (is_forced_emulation_enabled) { - /* Let userspace know we aren't done. */ - GUEST_SYNC(0); - - /* - * Now run the same tests with the instruction emulator. - */ - data = test_em_rdmsr(MSR_IA32_XSS); - GUEST_ASSERT(data == 0); - GUEST_ASSERT(guest_exception_count == 0); - test_em_wrmsr(MSR_IA32_XSS, 0); - GUEST_ASSERT(guest_exception_count == 0); - test_em_wrmsr(MSR_IA32_XSS, 1); - GUEST_ASSERT(guest_exception_count == 1); - - test_em_rdmsr(MSR_IA32_FLUSH_CMD); - GUEST_ASSERT(guest_exception_count == 1); - test_em_wrmsr(MSR_IA32_FLUSH_CMD, 0); - GUEST_ASSERT(guest_exception_count == 1); - test_em_wrmsr(MSR_IA32_FLUSH_CMD, 1); - GUEST_ASSERT(guest_exception_count == 0); - - test_em_wrmsr(MSR_NON_EXISTENT, 2); - GUEST_ASSERT(guest_exception_count == 0); - data = test_em_rdmsr(MSR_NON_EXISTENT); - GUEST_ASSERT(data == 2); - GUEST_ASSERT(guest_exception_count == 0); - } - - GUEST_DONE(); -} - -static void guest_msr_calls(bool trapped) -{ - /* This goes into the in-kernel emulation */ - wrmsr(MSR_SYSCALL_MASK, 0); - - if (trapped) { - /* This goes into user space emulation */ - GUEST_ASSERT(rdmsr(MSR_SYSCALL_MASK) == MSR_SYSCALL_MASK); - GUEST_ASSERT(rdmsr(MSR_GS_BASE) == MSR_GS_BASE); - } else { - GUEST_ASSERT(rdmsr(MSR_SYSCALL_MASK) != MSR_SYSCALL_MASK); - GUEST_ASSERT(rdmsr(MSR_GS_BASE) != MSR_GS_BASE); - } - - /* If trapped == true, this goes into user space emulation */ - wrmsr(MSR_IA32_POWER_CTL, 0x1234); - - /* This goes into the in-kernel emulation */ - rdmsr(MSR_IA32_POWER_CTL); - - /* Invalid MSR, should always be handled by user space exit */ - GUEST_ASSERT(rdmsr(0xdeadbeef) == 0xdeadbeef); - wrmsr(0xdeadbeef, 0x1234); -} - -static void guest_code_filter_deny(void) -{ - guest_msr_calls(true); - - /* - * Disable msr filtering, so that the kernel - * handles everything in the next round - */ - GUEST_SYNC(0); - - guest_msr_calls(false); - - GUEST_DONE(); -} - -static void guest_code_permission_bitmap(void) -{ - uint64_t data; - - data = test_rdmsr(MSR_FS_BASE); - GUEST_ASSERT(data == MSR_FS_BASE); - data = test_rdmsr(MSR_GS_BASE); - GUEST_ASSERT(data != MSR_GS_BASE); - - /* Let userspace know to switch the filter */ - GUEST_SYNC(0); - - data = test_rdmsr(MSR_FS_BASE); - GUEST_ASSERT(data != MSR_FS_BASE); - data = test_rdmsr(MSR_GS_BASE); - GUEST_ASSERT(data == MSR_GS_BASE); - - GUEST_DONE(); -} - -static void __guest_gp_handler(struct ex_regs *regs, - char *r_start, char *r_end, - char *w_start, char *w_end) -{ - if (regs->rip == (uintptr_t)r_start) { - regs->rip = (uintptr_t)r_end; - regs->rax = 0; - regs->rdx = 0; - } else if (regs->rip == (uintptr_t)w_start) { - regs->rip = (uintptr_t)w_end; - } else { - GUEST_ASSERT(!"RIP is at an unknown location!"); - } - - ++guest_exception_count; -} - -static void guest_gp_handler(struct ex_regs *regs) -{ - __guest_gp_handler(regs, &rdmsr_start, &rdmsr_end, - &wrmsr_start, &wrmsr_end); -} - -static void guest_fep_gp_handler(struct ex_regs *regs) -{ - __guest_gp_handler(regs, &em_rdmsr_start, &em_rdmsr_end, - &em_wrmsr_start, &em_wrmsr_end); -} - -static void check_for_guest_assert(struct kvm_vcpu *vcpu) -{ - struct ucall uc; - - if (vcpu->run->exit_reason == KVM_EXIT_IO && - get_ucall(vcpu, &uc) == UCALL_ABORT) { - REPORT_GUEST_ASSERT(uc); - } -} - -static void process_rdmsr(struct kvm_vcpu *vcpu, uint32_t msr_index) -{ - struct kvm_run *run = vcpu->run; - - check_for_guest_assert(vcpu); - - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_X86_RDMSR); - TEST_ASSERT(run->msr.index == msr_index, - "Unexpected msr (0x%04x), expected 0x%04x", - run->msr.index, msr_index); - - switch (run->msr.index) { - case MSR_IA32_XSS: - run->msr.data = 0; - break; - case MSR_IA32_FLUSH_CMD: - run->msr.error = 1; - break; - case MSR_NON_EXISTENT: - run->msr.data = msr_non_existent_data; - break; - case MSR_FS_BASE: - run->msr.data = MSR_FS_BASE; - break; - case MSR_GS_BASE: - run->msr.data = MSR_GS_BASE; - break; - default: - TEST_ASSERT(false, "Unexpected MSR: 0x%04x", run->msr.index); - } -} - -static void process_wrmsr(struct kvm_vcpu *vcpu, uint32_t msr_index) -{ - struct kvm_run *run = vcpu->run; - - check_for_guest_assert(vcpu); - - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_X86_WRMSR); - TEST_ASSERT(run->msr.index == msr_index, - "Unexpected msr (0x%04x), expected 0x%04x", - run->msr.index, msr_index); - - switch (run->msr.index) { - case MSR_IA32_XSS: - if (run->msr.data != 0) - run->msr.error = 1; - break; - case MSR_IA32_FLUSH_CMD: - if (run->msr.data != 1) - run->msr.error = 1; - break; - case MSR_NON_EXISTENT: - msr_non_existent_data = run->msr.data; - break; - default: - TEST_ASSERT(false, "Unexpected MSR: 0x%04x", run->msr.index); - } -} - -static void process_ucall_done(struct kvm_vcpu *vcpu) -{ - struct ucall uc; - - check_for_guest_assert(vcpu); - - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); - - TEST_ASSERT(get_ucall(vcpu, &uc) == UCALL_DONE, - "Unexpected ucall command: %lu, expected UCALL_DONE (%d)", - uc.cmd, UCALL_DONE); -} - -static uint64_t process_ucall(struct kvm_vcpu *vcpu) -{ - struct ucall uc = {}; - - check_for_guest_assert(vcpu); - - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); - - switch (get_ucall(vcpu, &uc)) { - case UCALL_SYNC: - break; - case UCALL_ABORT: - check_for_guest_assert(vcpu); - break; - case UCALL_DONE: - process_ucall_done(vcpu); - break; - default: - TEST_ASSERT(false, "Unexpected ucall"); - } - - return uc.cmd; -} - -static void run_guest_then_process_rdmsr(struct kvm_vcpu *vcpu, - uint32_t msr_index) -{ - vcpu_run(vcpu); - process_rdmsr(vcpu, msr_index); -} - -static void run_guest_then_process_wrmsr(struct kvm_vcpu *vcpu, - uint32_t msr_index) -{ - vcpu_run(vcpu); - process_wrmsr(vcpu, msr_index); -} - -static uint64_t run_guest_then_process_ucall(struct kvm_vcpu *vcpu) -{ - vcpu_run(vcpu); - return process_ucall(vcpu); -} - -static void run_guest_then_process_ucall_done(struct kvm_vcpu *vcpu) -{ - vcpu_run(vcpu); - process_ucall_done(vcpu); -} - -KVM_ONE_VCPU_TEST_SUITE(user_msr); - -KVM_ONE_VCPU_TEST(user_msr, msr_filter_allow, guest_code_filter_allow) -{ - struct kvm_vm *vm = vcpu->vm; - uint64_t cmd; - int rc; - - rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR); - TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available"); - vm_enable_cap(vm, KVM_CAP_X86_USER_SPACE_MSR, KVM_MSR_EXIT_REASON_FILTER); - - rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER); - TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available"); - - vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_allow); - - vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler); - - /* Process guest code userspace exits. */ - run_guest_then_process_rdmsr(vcpu, MSR_IA32_XSS); - run_guest_then_process_wrmsr(vcpu, MSR_IA32_XSS); - run_guest_then_process_wrmsr(vcpu, MSR_IA32_XSS); - - run_guest_then_process_rdmsr(vcpu, MSR_IA32_FLUSH_CMD); - run_guest_then_process_wrmsr(vcpu, MSR_IA32_FLUSH_CMD); - run_guest_then_process_wrmsr(vcpu, MSR_IA32_FLUSH_CMD); - - run_guest_then_process_wrmsr(vcpu, MSR_NON_EXISTENT); - run_guest_then_process_rdmsr(vcpu, MSR_NON_EXISTENT); - - vcpu_run(vcpu); - cmd = process_ucall(vcpu); - - if (is_forced_emulation_enabled) { - TEST_ASSERT_EQ(cmd, UCALL_SYNC); - vm_install_exception_handler(vm, GP_VECTOR, guest_fep_gp_handler); - - /* Process emulated rdmsr and wrmsr instructions. */ - run_guest_then_process_rdmsr(vcpu, MSR_IA32_XSS); - run_guest_then_process_wrmsr(vcpu, MSR_IA32_XSS); - run_guest_then_process_wrmsr(vcpu, MSR_IA32_XSS); - - run_guest_then_process_rdmsr(vcpu, MSR_IA32_FLUSH_CMD); - run_guest_then_process_wrmsr(vcpu, MSR_IA32_FLUSH_CMD); - run_guest_then_process_wrmsr(vcpu, MSR_IA32_FLUSH_CMD); - - run_guest_then_process_wrmsr(vcpu, MSR_NON_EXISTENT); - run_guest_then_process_rdmsr(vcpu, MSR_NON_EXISTENT); - - /* Confirm the guest completed without issues. */ - run_guest_then_process_ucall_done(vcpu); - } else { - TEST_ASSERT_EQ(cmd, UCALL_DONE); - printf("To run the instruction emulated tests set the module parameter 'kvm.force_emulation_prefix=1'\n"); - } -} - -static int handle_ucall(struct kvm_vcpu *vcpu) -{ - struct ucall uc; - - switch (get_ucall(vcpu, &uc)) { - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - break; - case UCALL_SYNC: - vm_ioctl(vcpu->vm, KVM_X86_SET_MSR_FILTER, &no_filter_deny); - break; - case UCALL_DONE: - return 1; - default: - TEST_FAIL("Unknown ucall %lu", uc.cmd); - } - - return 0; -} - -static void handle_rdmsr(struct kvm_run *run) -{ - run->msr.data = run->msr.index; - msr_reads++; - - if (run->msr.index == MSR_SYSCALL_MASK || - run->msr.index == MSR_GS_BASE) { - TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_FILTER, - "MSR read trap w/o access fault"); - } - - if (run->msr.index == 0xdeadbeef) { - TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_UNKNOWN, - "MSR deadbeef read trap w/o inval fault"); - } -} - -static void handle_wrmsr(struct kvm_run *run) -{ - /* ignore */ - msr_writes++; - - if (run->msr.index == MSR_IA32_POWER_CTL) { - TEST_ASSERT(run->msr.data == 0x1234, - "MSR data for MSR_IA32_POWER_CTL incorrect"); - TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_FILTER, - "MSR_IA32_POWER_CTL trap w/o access fault"); - } - - if (run->msr.index == 0xdeadbeef) { - TEST_ASSERT(run->msr.data == 0x1234, - "MSR data for deadbeef incorrect"); - TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_UNKNOWN, - "deadbeef trap w/o inval fault"); - } -} - -KVM_ONE_VCPU_TEST(user_msr, msr_filter_deny, guest_code_filter_deny) -{ - struct kvm_vm *vm = vcpu->vm; - struct kvm_run *run = vcpu->run; - int rc; - - rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR); - TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available"); - vm_enable_cap(vm, KVM_CAP_X86_USER_SPACE_MSR, KVM_MSR_EXIT_REASON_INVAL | - KVM_MSR_EXIT_REASON_UNKNOWN | - KVM_MSR_EXIT_REASON_FILTER); - - rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER); - TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available"); - - prepare_bitmaps(); - vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_deny); - - while (1) { - vcpu_run(vcpu); - - switch (run->exit_reason) { - case KVM_EXIT_X86_RDMSR: - handle_rdmsr(run); - break; - case KVM_EXIT_X86_WRMSR: - handle_wrmsr(run); - break; - case KVM_EXIT_IO: - if (handle_ucall(vcpu)) - goto done; - break; - } - - } - -done: - TEST_ASSERT(msr_reads == 4, "Handled 4 rdmsr in user space"); - TEST_ASSERT(msr_writes == 3, "Handled 3 wrmsr in user space"); -} - -KVM_ONE_VCPU_TEST(user_msr, msr_permission_bitmap, guest_code_permission_bitmap) -{ - struct kvm_vm *vm = vcpu->vm; - int rc; - - rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR); - TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available"); - vm_enable_cap(vm, KVM_CAP_X86_USER_SPACE_MSR, KVM_MSR_EXIT_REASON_FILTER); - - rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER); - TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available"); - - vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_fs); - run_guest_then_process_rdmsr(vcpu, MSR_FS_BASE); - TEST_ASSERT(run_guest_then_process_ucall(vcpu) == UCALL_SYNC, - "Expected ucall state to be UCALL_SYNC."); - vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_gs); - run_guest_then_process_rdmsr(vcpu, MSR_GS_BASE); - run_guest_then_process_ucall_done(vcpu); -} - -#define test_user_exit_msr_ioctl(vm, cmd, arg, flag, valid_mask) \ -({ \ - int r = __vm_ioctl(vm, cmd, arg); \ - \ - if (flag & valid_mask) \ - TEST_ASSERT(!r, __KVM_IOCTL_ERROR(#cmd, r)); \ - else \ - TEST_ASSERT(r == -1 && errno == EINVAL, \ - "Wanted EINVAL for %s with flag = 0x%llx, got rc: %i errno: %i (%s)", \ - #cmd, flag, r, errno, strerror(errno)); \ -}) - -static void run_user_space_msr_flag_test(struct kvm_vm *vm) -{ - struct kvm_enable_cap cap = { .cap = KVM_CAP_X86_USER_SPACE_MSR }; - int nflags = sizeof(cap.args[0]) * BITS_PER_BYTE; - int rc; - int i; - - rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR); - TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available"); - - for (i = 0; i < nflags; i++) { - cap.args[0] = BIT_ULL(i); - test_user_exit_msr_ioctl(vm, KVM_ENABLE_CAP, &cap, - BIT_ULL(i), KVM_MSR_EXIT_REASON_VALID_MASK); - } -} - -static void run_msr_filter_flag_test(struct kvm_vm *vm) -{ - u64 deny_bits = 0; - struct kvm_msr_filter filter = { - .flags = KVM_MSR_FILTER_DEFAULT_ALLOW, - .ranges = { - { - .flags = KVM_MSR_FILTER_READ, - .nmsrs = 1, - .base = 0, - .bitmap = (uint8_t *)&deny_bits, - }, - }, - }; - int nflags; - int rc; - int i; - - rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER); - TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available"); - - nflags = sizeof(filter.flags) * BITS_PER_BYTE; - for (i = 0; i < nflags; i++) { - filter.flags = BIT_ULL(i); - test_user_exit_msr_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter, - BIT_ULL(i), KVM_MSR_FILTER_VALID_MASK); - } - - filter.flags = KVM_MSR_FILTER_DEFAULT_ALLOW; - nflags = sizeof(filter.ranges[0].flags) * BITS_PER_BYTE; - for (i = 0; i < nflags; i++) { - filter.ranges[0].flags = BIT_ULL(i); - test_user_exit_msr_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter, - BIT_ULL(i), KVM_MSR_FILTER_RANGE_VALID_MASK); - } -} - -/* Test that attempts to write to the unused bits in a flag fails. */ -KVM_ONE_VCPU_TEST(user_msr, user_exit_msr_flags, NULL) -{ - struct kvm_vm *vm = vcpu->vm; - - /* Test flags for KVM_CAP_X86_USER_SPACE_MSR. */ - run_user_space_msr_flag_test(vm); - - /* Test flags and range flags for KVM_X86_SET_MSR_FILTER. */ - run_msr_filter_flag_test(vm); -} - -int main(int argc, char *argv[]) -{ - return test_harness_run(argc, argv); -} diff --git a/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c b/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c deleted file mode 100644 index a81a24761aac..000000000000 --- a/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c +++ /dev/null @@ -1,124 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * vmx_apic_access_test - * - * Copyright (C) 2020, Google LLC. - * - * This work is licensed under the terms of the GNU GPL, version 2. - * - * The first subtest simply checks to see that an L2 guest can be - * launched with a valid APIC-access address that is backed by a - * page of L1 physical memory. - * - * The second subtest sets the APIC-access address to a (valid) L1 - * physical address that is not backed by memory. KVM can't handle - * this situation, so resuming L2 should result in a KVM exit for - * internal error (emulation). This is not an architectural - * requirement. It is just a shortcoming of KVM. The internal error - * is unfortunate, but it's better than what used to happen! - */ - -#include "test_util.h" -#include "kvm_util.h" -#include "processor.h" -#include "vmx.h" - -#include -#include - -#include "kselftest.h" - -static void l2_guest_code(void) -{ - /* Exit to L1 */ - __asm__ __volatile__("vmcall"); -} - -static void l1_guest_code(struct vmx_pages *vmx_pages, unsigned long high_gpa) -{ -#define L2_GUEST_STACK_SIZE 64 - unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; - uint32_t control; - - GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); - GUEST_ASSERT(load_vmcs(vmx_pages)); - - /* Prepare the VMCS for L2 execution. */ - prepare_vmcs(vmx_pages, l2_guest_code, - &l2_guest_stack[L2_GUEST_STACK_SIZE]); - control = vmreadz(CPU_BASED_VM_EXEC_CONTROL); - control |= CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; - vmwrite(CPU_BASED_VM_EXEC_CONTROL, control); - control = vmreadz(SECONDARY_VM_EXEC_CONTROL); - control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; - vmwrite(SECONDARY_VM_EXEC_CONTROL, control); - vmwrite(APIC_ACCESS_ADDR, vmx_pages->apic_access_gpa); - - /* Try to launch L2 with the memory-backed APIC-access address. */ - GUEST_SYNC(vmreadz(APIC_ACCESS_ADDR)); - GUEST_ASSERT(!vmlaunch()); - GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); - - vmwrite(APIC_ACCESS_ADDR, high_gpa); - - /* Try to resume L2 with the unbacked APIC-access address. */ - GUEST_SYNC(vmreadz(APIC_ACCESS_ADDR)); - GUEST_ASSERT(!vmresume()); - GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); - - GUEST_DONE(); -} - -int main(int argc, char *argv[]) -{ - unsigned long apic_access_addr = ~0ul; - vm_vaddr_t vmx_pages_gva; - unsigned long high_gpa; - struct vmx_pages *vmx; - bool done = false; - - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); - - vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); - - high_gpa = (vm->max_gfn - 1) << vm->page_shift; - - vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva); - prepare_virtualize_apic_accesses(vmx, vm); - vcpu_args_set(vcpu, 2, vmx_pages_gva, high_gpa); - - while (!done) { - volatile struct kvm_run *run = vcpu->run; - struct ucall uc; - - vcpu_run(vcpu); - if (apic_access_addr == high_gpa) { - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR); - TEST_ASSERT(run->internal.suberror == - KVM_INTERNAL_ERROR_EMULATION, - "Got internal suberror other than KVM_INTERNAL_ERROR_EMULATION: %u", - run->internal.suberror); - break; - } - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); - - switch (get_ucall(vcpu, &uc)) { - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - /* NOT REACHED */ - case UCALL_SYNC: - apic_access_addr = uc.args[1]; - break; - case UCALL_DONE: - done = true; - break; - default: - TEST_ASSERT(false, "Unknown ucall %lu", uc.cmd); - } - } - kvm_vm_free(vm); - return 0; -} diff --git a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c b/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c deleted file mode 100644 index dad988351493..000000000000 --- a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c +++ /dev/null @@ -1,80 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * vmx_close_while_nested - * - * Copyright (C) 2019, Red Hat, Inc. - * - * Verify that nothing bad happens if a KVM user exits with open - * file descriptors while executing a nested guest. - */ - -#include "test_util.h" -#include "kvm_util.h" -#include "processor.h" -#include "vmx.h" - -#include -#include - -#include "kselftest.h" - -enum { - PORT_L0_EXIT = 0x2000, -}; - -static void l2_guest_code(void) -{ - /* Exit to L0 */ - asm volatile("inb %%dx, %%al" - : : [port] "d" (PORT_L0_EXIT) : "rax"); -} - -static void l1_guest_code(struct vmx_pages *vmx_pages) -{ -#define L2_GUEST_STACK_SIZE 64 - unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; - - GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); - GUEST_ASSERT(load_vmcs(vmx_pages)); - - /* Prepare the VMCS for L2 execution. */ - prepare_vmcs(vmx_pages, l2_guest_code, - &l2_guest_stack[L2_GUEST_STACK_SIZE]); - - GUEST_ASSERT(!vmlaunch()); - GUEST_ASSERT(0); -} - -int main(int argc, char *argv[]) -{ - vm_vaddr_t vmx_pages_gva; - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); - - vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); - - /* Allocate VMX pages and shared descriptors (vmx_pages). */ - vcpu_alloc_vmx(vm, &vmx_pages_gva); - vcpu_args_set(vcpu, 1, vmx_pages_gva); - - for (;;) { - volatile struct kvm_run *run = vcpu->run; - struct ucall uc; - - vcpu_run(vcpu); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); - - if (run->io.port == PORT_L0_EXIT) - break; - - switch (get_ucall(vcpu, &uc)) { - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - /* NOT REACHED */ - default: - TEST_FAIL("Unknown ucall %lu", uc.cmd); - } - } -} diff --git a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c deleted file mode 100644 index fa512d033205..000000000000 --- a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c +++ /dev/null @@ -1,179 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * KVM dirty page logging test - * - * Copyright (C) 2018, Red Hat, Inc. - */ -#include -#include -#include -#include - -#include "test_util.h" -#include "kvm_util.h" -#include "processor.h" -#include "vmx.h" - -/* The memory slot index to track dirty pages */ -#define TEST_MEM_SLOT_INDEX 1 -#define TEST_MEM_PAGES 3 - -/* L1 guest test virtual memory offset */ -#define GUEST_TEST_MEM 0xc0000000 - -/* L2 guest test virtual memory offset */ -#define NESTED_TEST_MEM1 0xc0001000 -#define NESTED_TEST_MEM2 0xc0002000 - -static void l2_guest_code(u64 *a, u64 *b) -{ - READ_ONCE(*a); - WRITE_ONCE(*a, 1); - GUEST_SYNC(true); - GUEST_SYNC(false); - - WRITE_ONCE(*b, 1); - GUEST_SYNC(true); - WRITE_ONCE(*b, 1); - GUEST_SYNC(true); - GUEST_SYNC(false); - - /* Exit to L1 and never come back. */ - vmcall(); -} - -static void l2_guest_code_ept_enabled(void) -{ - l2_guest_code((u64 *)NESTED_TEST_MEM1, (u64 *)NESTED_TEST_MEM2); -} - -static void l2_guest_code_ept_disabled(void) -{ - /* Access the same L1 GPAs as l2_guest_code_ept_enabled() */ - l2_guest_code((u64 *)GUEST_TEST_MEM, (u64 *)GUEST_TEST_MEM); -} - -void l1_guest_code(struct vmx_pages *vmx) -{ -#define L2_GUEST_STACK_SIZE 64 - unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; - void *l2_rip; - - GUEST_ASSERT(vmx->vmcs_gpa); - GUEST_ASSERT(prepare_for_vmx_operation(vmx)); - GUEST_ASSERT(load_vmcs(vmx)); - - if (vmx->eptp_gpa) - l2_rip = l2_guest_code_ept_enabled; - else - l2_rip = l2_guest_code_ept_disabled; - - prepare_vmcs(vmx, l2_rip, &l2_guest_stack[L2_GUEST_STACK_SIZE]); - - GUEST_SYNC(false); - GUEST_ASSERT(!vmlaunch()); - GUEST_SYNC(false); - GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); - GUEST_DONE(); -} - -static void test_vmx_dirty_log(bool enable_ept) -{ - vm_vaddr_t vmx_pages_gva = 0; - struct vmx_pages *vmx; - unsigned long *bmap; - uint64_t *host_test_mem; - - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - struct ucall uc; - bool done = false; - - pr_info("Nested EPT: %s\n", enable_ept ? "enabled" : "disabled"); - - /* Create VM */ - vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); - vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva); - vcpu_args_set(vcpu, 1, vmx_pages_gva); - - /* Add an extra memory slot for testing dirty logging */ - vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, - GUEST_TEST_MEM, - TEST_MEM_SLOT_INDEX, - TEST_MEM_PAGES, - KVM_MEM_LOG_DIRTY_PAGES); - - /* - * Add an identity map for GVA range [0xc0000000, 0xc0002000). This - * affects both L1 and L2. However... - */ - virt_map(vm, GUEST_TEST_MEM, GUEST_TEST_MEM, TEST_MEM_PAGES); - - /* - * ... pages in the L2 GPA range [0xc0001000, 0xc0003000) will map to - * 0xc0000000. - * - * Note that prepare_eptp should be called only L1's GPA map is done, - * meaning after the last call to virt_map. - * - * When EPT is disabled, the L2 guest code will still access the same L1 - * GPAs as the EPT enabled case. - */ - if (enable_ept) { - prepare_eptp(vmx, vm, 0); - nested_map_memslot(vmx, vm, 0); - nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, 4096); - nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, 4096); - } - - bmap = bitmap_zalloc(TEST_MEM_PAGES); - host_test_mem = addr_gpa2hva(vm, GUEST_TEST_MEM); - - while (!done) { - memset(host_test_mem, 0xaa, TEST_MEM_PAGES * 4096); - vcpu_run(vcpu); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); - - switch (get_ucall(vcpu, &uc)) { - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - /* NOT REACHED */ - case UCALL_SYNC: - /* - * The nested guest wrote at offset 0x1000 in the memslot, but the - * dirty bitmap must be filled in according to L1 GPA, not L2. - */ - kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap); - if (uc.args[1]) { - TEST_ASSERT(test_bit(0, bmap), "Page 0 incorrectly reported clean"); - TEST_ASSERT(host_test_mem[0] == 1, "Page 0 not written by guest"); - } else { - TEST_ASSERT(!test_bit(0, bmap), "Page 0 incorrectly reported dirty"); - TEST_ASSERT(host_test_mem[0] == 0xaaaaaaaaaaaaaaaaULL, "Page 0 written by guest"); - } - - TEST_ASSERT(!test_bit(1, bmap), "Page 1 incorrectly reported dirty"); - TEST_ASSERT(host_test_mem[4096 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 1 written by guest"); - TEST_ASSERT(!test_bit(2, bmap), "Page 2 incorrectly reported dirty"); - TEST_ASSERT(host_test_mem[8192 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 2 written by guest"); - break; - case UCALL_DONE: - done = true; - break; - default: - TEST_FAIL("Unknown ucall %lu", uc.cmd); - } - } -} - -int main(int argc, char *argv[]) -{ - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); - - test_vmx_dirty_log(/*enable_ept=*/false); - - if (kvm_cpu_has_ept()) - test_vmx_dirty_log(/*enable_ept=*/true); - - return 0; -} diff --git a/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c b/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c deleted file mode 100644 index 3fd6eceab46f..000000000000 --- a/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c +++ /dev/null @@ -1,142 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -#include "test_util.h" -#include "kvm_util.h" -#include "processor.h" - -#include -#include -#include -#include - -#include "kselftest.h" - -static void guest_ud_handler(struct ex_regs *regs) -{ - /* Loop on the ud2 until guest state is made invalid. */ -} - -static void guest_code(void) -{ - asm volatile("ud2"); -} - -static void __run_vcpu_with_invalid_state(struct kvm_vcpu *vcpu) -{ - struct kvm_run *run = vcpu->run; - - vcpu_run(vcpu); - - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR); - TEST_ASSERT(run->emulation_failure.suberror == KVM_INTERNAL_ERROR_EMULATION, - "Expected emulation failure, got %d", - run->emulation_failure.suberror); -} - -static void run_vcpu_with_invalid_state(struct kvm_vcpu *vcpu) -{ - /* - * Always run twice to verify KVM handles the case where _KVM_ queues - * an exception with invalid state and then exits to userspace, i.e. - * that KVM doesn't explode if userspace ignores the initial error. - */ - __run_vcpu_with_invalid_state(vcpu); - __run_vcpu_with_invalid_state(vcpu); -} - -static void set_timer(void) -{ - struct itimerval timer; - - timer.it_value.tv_sec = 0; - timer.it_value.tv_usec = 200; - timer.it_interval = timer.it_value; - TEST_ASSERT_EQ(setitimer(ITIMER_REAL, &timer, NULL), 0); -} - -static void set_or_clear_invalid_guest_state(struct kvm_vcpu *vcpu, bool set) -{ - static struct kvm_sregs sregs; - - if (!sregs.cr0) - vcpu_sregs_get(vcpu, &sregs); - sregs.tr.unusable = !!set; - vcpu_sregs_set(vcpu, &sregs); -} - -static void set_invalid_guest_state(struct kvm_vcpu *vcpu) -{ - set_or_clear_invalid_guest_state(vcpu, true); -} - -static void clear_invalid_guest_state(struct kvm_vcpu *vcpu) -{ - set_or_clear_invalid_guest_state(vcpu, false); -} - -static struct kvm_vcpu *get_set_sigalrm_vcpu(struct kvm_vcpu *__vcpu) -{ - static struct kvm_vcpu *vcpu = NULL; - - if (__vcpu) - vcpu = __vcpu; - return vcpu; -} - -static void sigalrm_handler(int sig) -{ - struct kvm_vcpu *vcpu = get_set_sigalrm_vcpu(NULL); - struct kvm_vcpu_events events; - - TEST_ASSERT(sig == SIGALRM, "Unexpected signal = %d", sig); - - vcpu_events_get(vcpu, &events); - - /* - * If an exception is pending, attempt KVM_RUN with invalid guest, - * otherwise rearm the timer and keep doing so until the timer fires - * between KVM queueing an exception and re-entering the guest. - */ - if (events.exception.pending) { - set_invalid_guest_state(vcpu); - run_vcpu_with_invalid_state(vcpu); - } else { - set_timer(); - } -} - -int main(int argc, char *argv[]) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - - TEST_REQUIRE(host_cpu_is_intel); - TEST_REQUIRE(!vm_is_unrestricted_guest(NULL)); - - vm = vm_create_with_one_vcpu(&vcpu, guest_code); - get_set_sigalrm_vcpu(vcpu); - - vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler); - - /* - * Stuff invalid guest state for L2 by making TR unusuable. The next - * KVM_RUN should induce a TRIPLE_FAULT in L2 as KVM doesn't support - * emulating invalid guest state for L2. - */ - set_invalid_guest_state(vcpu); - run_vcpu_with_invalid_state(vcpu); - - /* - * Verify KVM also handles the case where userspace gains control while - * an exception is pending and stuffs invalid state. Run with valid - * guest state and a timer firing every 200us, and attempt to enter the - * guest with invalid state when the handler interrupts KVM with an - * exception pending. - */ - clear_invalid_guest_state(vcpu); - TEST_ASSERT(signal(SIGALRM, sigalrm_handler) != SIG_ERR, - "Failed to register SIGALRM handler, errno = %d (%s)", - errno, strerror(errno)); - - set_timer(); - run_vcpu_with_invalid_state(vcpu); -} diff --git a/tools/testing/selftests/kvm/x86_64/vmx_invalid_nested_guest_state.c b/tools/testing/selftests/kvm/x86_64/vmx_invalid_nested_guest_state.c deleted file mode 100644 index a100ee5f0009..000000000000 --- a/tools/testing/selftests/kvm/x86_64/vmx_invalid_nested_guest_state.c +++ /dev/null @@ -1,103 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -#include "test_util.h" -#include "kvm_util.h" -#include "processor.h" -#include "vmx.h" - -#include -#include - -#include "kselftest.h" - -#define ARBITRARY_IO_PORT 0x2000 - -static struct kvm_vm *vm; - -static void l2_guest_code(void) -{ - /* - * Generate an exit to L0 userspace, i.e. main(), via I/O to an - * arbitrary port. - */ - asm volatile("inb %%dx, %%al" - : : [port] "d" (ARBITRARY_IO_PORT) : "rax"); -} - -static void l1_guest_code(struct vmx_pages *vmx_pages) -{ -#define L2_GUEST_STACK_SIZE 64 - unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; - - GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); - GUEST_ASSERT(load_vmcs(vmx_pages)); - - /* Prepare the VMCS for L2 execution. */ - prepare_vmcs(vmx_pages, l2_guest_code, - &l2_guest_stack[L2_GUEST_STACK_SIZE]); - - /* - * L2 must be run without unrestricted guest, verify that the selftests - * library hasn't enabled it. Because KVM selftests jump directly to - * 64-bit mode, unrestricted guest support isn't required. - */ - GUEST_ASSERT(!(vmreadz(CPU_BASED_VM_EXEC_CONTROL) & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) || - !(vmreadz(SECONDARY_VM_EXEC_CONTROL) & SECONDARY_EXEC_UNRESTRICTED_GUEST)); - - GUEST_ASSERT(!vmlaunch()); - - /* L2 should triple fault after main() stuffs invalid guest state. */ - GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_TRIPLE_FAULT); - GUEST_DONE(); -} - -int main(int argc, char *argv[]) -{ - vm_vaddr_t vmx_pages_gva; - struct kvm_sregs sregs; - struct kvm_vcpu *vcpu; - struct kvm_run *run; - struct ucall uc; - - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); - - vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); - - /* Allocate VMX pages and shared descriptors (vmx_pages). */ - vcpu_alloc_vmx(vm, &vmx_pages_gva); - vcpu_args_set(vcpu, 1, vmx_pages_gva); - - vcpu_run(vcpu); - - run = vcpu->run; - - /* - * The first exit to L0 userspace should be an I/O access from L2. - * Running L1 should launch L2 without triggering an exit to userspace. - */ - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); - - TEST_ASSERT(run->io.port == ARBITRARY_IO_PORT, - "Expected IN from port %d from L2, got port %d", - ARBITRARY_IO_PORT, run->io.port); - - /* - * Stuff invalid guest state for L2 by making TR unusuable. The next - * KVM_RUN should induce a TRIPLE_FAULT in L2 as KVM doesn't support - * emulating invalid guest state for L2. - */ - memset(&sregs, 0, sizeof(sregs)); - vcpu_sregs_get(vcpu, &sregs); - sregs.tr.unusable = 1; - vcpu_sregs_set(vcpu, &sregs); - - vcpu_run(vcpu); - - switch (get_ucall(vcpu, &uc)) { - case UCALL_DONE: - break; - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - default: - TEST_FAIL("Unexpected ucall: %lu", uc.cmd); - } -} diff --git a/tools/testing/selftests/kvm/x86_64/vmx_msrs_test.c b/tools/testing/selftests/kvm/x86_64/vmx_msrs_test.c deleted file mode 100644 index 90720b6205f4..000000000000 --- a/tools/testing/selftests/kvm/x86_64/vmx_msrs_test.c +++ /dev/null @@ -1,131 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * VMX control MSR test - * - * Copyright (C) 2022 Google LLC. - * - * Tests for KVM ownership of bits in the VMX entry/exit control MSRs. Checks - * that KVM will set owned bits where appropriate, and will not if - * KVM_X86_QUIRK_TWEAK_VMX_CTRL_MSRS is disabled. - */ -#include -#include "kvm_util.h" -#include "vmx.h" - -static void vmx_fixed1_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index, - uint64_t mask) -{ - uint64_t val = vcpu_get_msr(vcpu, msr_index); - uint64_t bit; - - mask &= val; - - for_each_set_bit(bit, &mask, 64) { - vcpu_set_msr(vcpu, msr_index, val & ~BIT_ULL(bit)); - vcpu_set_msr(vcpu, msr_index, val); - } -} - -static void vmx_fixed0_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index, - uint64_t mask) -{ - uint64_t val = vcpu_get_msr(vcpu, msr_index); - uint64_t bit; - - mask = ~mask | val; - - for_each_clear_bit(bit, &mask, 64) { - vcpu_set_msr(vcpu, msr_index, val | BIT_ULL(bit)); - vcpu_set_msr(vcpu, msr_index, val); - } -} - -static void vmx_fixed0and1_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index) -{ - vmx_fixed0_msr_test(vcpu, msr_index, GENMASK_ULL(31, 0)); - vmx_fixed1_msr_test(vcpu, msr_index, GENMASK_ULL(63, 32)); -} - -static void vmx_save_restore_msrs_test(struct kvm_vcpu *vcpu) -{ - vcpu_set_msr(vcpu, MSR_IA32_VMX_VMCS_ENUM, 0); - vcpu_set_msr(vcpu, MSR_IA32_VMX_VMCS_ENUM, -1ull); - - vmx_fixed1_msr_test(vcpu, MSR_IA32_VMX_BASIC, - BIT_ULL(49) | BIT_ULL(54) | BIT_ULL(55)); - - vmx_fixed1_msr_test(vcpu, MSR_IA32_VMX_MISC, - BIT_ULL(5) | GENMASK_ULL(8, 6) | BIT_ULL(14) | - BIT_ULL(15) | BIT_ULL(28) | BIT_ULL(29) | BIT_ULL(30)); - - vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_PROCBASED_CTLS2); - vmx_fixed1_msr_test(vcpu, MSR_IA32_VMX_EPT_VPID_CAP, -1ull); - vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_TRUE_PINBASED_CTLS); - vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_TRUE_PROCBASED_CTLS); - vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_TRUE_EXIT_CTLS); - vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_TRUE_ENTRY_CTLS); - vmx_fixed1_msr_test(vcpu, MSR_IA32_VMX_VMFUNC, -1ull); -} - -static void __ia32_feature_control_msr_test(struct kvm_vcpu *vcpu, - uint64_t msr_bit, - struct kvm_x86_cpu_feature feature) -{ - uint64_t val; - - vcpu_clear_cpuid_feature(vcpu, feature); - - val = vcpu_get_msr(vcpu, MSR_IA32_FEAT_CTL); - vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, val | msr_bit | FEAT_CTL_LOCKED); - vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, (val & ~msr_bit) | FEAT_CTL_LOCKED); - vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, val | msr_bit | FEAT_CTL_LOCKED); - vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, (val & ~msr_bit) | FEAT_CTL_LOCKED); - vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, val); - - if (!kvm_cpu_has(feature)) - return; - - vcpu_set_cpuid_feature(vcpu, feature); -} - -static void ia32_feature_control_msr_test(struct kvm_vcpu *vcpu) -{ - uint64_t supported_bits = FEAT_CTL_LOCKED | - FEAT_CTL_VMX_ENABLED_INSIDE_SMX | - FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX | - FEAT_CTL_SGX_LC_ENABLED | - FEAT_CTL_SGX_ENABLED | - FEAT_CTL_LMCE_ENABLED; - int bit, r; - - __ia32_feature_control_msr_test(vcpu, FEAT_CTL_VMX_ENABLED_INSIDE_SMX, X86_FEATURE_SMX); - __ia32_feature_control_msr_test(vcpu, FEAT_CTL_VMX_ENABLED_INSIDE_SMX, X86_FEATURE_VMX); - __ia32_feature_control_msr_test(vcpu, FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX, X86_FEATURE_VMX); - __ia32_feature_control_msr_test(vcpu, FEAT_CTL_SGX_LC_ENABLED, X86_FEATURE_SGX_LC); - __ia32_feature_control_msr_test(vcpu, FEAT_CTL_SGX_LC_ENABLED, X86_FEATURE_SGX); - __ia32_feature_control_msr_test(vcpu, FEAT_CTL_SGX_ENABLED, X86_FEATURE_SGX); - __ia32_feature_control_msr_test(vcpu, FEAT_CTL_LMCE_ENABLED, X86_FEATURE_MCE); - - for_each_clear_bit(bit, &supported_bits, 64) { - r = _vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, BIT(bit)); - TEST_ASSERT(r == 0, - "Setting reserved bit %d in IA32_FEATURE_CONTROL should fail", bit); - } -} - -int main(void) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - - TEST_REQUIRE(kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2)); - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); - - /* No need to actually do KVM_RUN, thus no guest code. */ - vm = vm_create_with_one_vcpu(&vcpu, NULL); - - vmx_save_restore_msrs_test(vcpu); - ia32_feature_control_msr_test(vcpu); - - kvm_vm_free(vm); -} diff --git a/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c b/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c deleted file mode 100644 index 1759fa5cb3f2..000000000000 --- a/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c +++ /dev/null @@ -1,206 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * vmx_nested_tsc_scaling_test - * - * Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * This test case verifies that nested TSC scaling behaves as expected when - * both L1 and L2 are scaled using different ratios. For this test we scale - * L1 down and scale L2 up. - */ - -#include - -#include "kvm_util.h" -#include "vmx.h" -#include "kselftest.h" - -/* L2 is scaled up (from L1's perspective) by this factor */ -#define L2_SCALE_FACTOR 4ULL - -#define TSC_OFFSET_L2 ((uint64_t) -33125236320908) -#define TSC_MULTIPLIER_L2 (L2_SCALE_FACTOR << 48) - -#define L2_GUEST_STACK_SIZE 64 - -enum { USLEEP, UCHECK_L1, UCHECK_L2 }; -#define GUEST_SLEEP(sec) ucall(UCALL_SYNC, 2, USLEEP, sec) -#define GUEST_CHECK(level, freq) ucall(UCALL_SYNC, 2, level, freq) - - -/* - * This function checks whether the "actual" TSC frequency of a guest matches - * its expected frequency. In order to account for delays in taking the TSC - * measurements, a difference of 1% between the actual and the expected value - * is tolerated. - */ -static void compare_tsc_freq(uint64_t actual, uint64_t expected) -{ - uint64_t tolerance, thresh_low, thresh_high; - - tolerance = expected / 100; - thresh_low = expected - tolerance; - thresh_high = expected + tolerance; - - TEST_ASSERT(thresh_low < actual, - "TSC freq is expected to be between %"PRIu64" and %"PRIu64 - " but it actually is %"PRIu64, - thresh_low, thresh_high, actual); - TEST_ASSERT(thresh_high > actual, - "TSC freq is expected to be between %"PRIu64" and %"PRIu64 - " but it actually is %"PRIu64, - thresh_low, thresh_high, actual); -} - -static void check_tsc_freq(int level) -{ - uint64_t tsc_start, tsc_end, tsc_freq; - - /* - * Reading the TSC twice with about a second's difference should give - * us an approximation of the TSC frequency from the guest's - * perspective. Now, this won't be completely accurate, but it should - * be good enough for the purposes of this test. - */ - tsc_start = rdmsr(MSR_IA32_TSC); - GUEST_SLEEP(1); - tsc_end = rdmsr(MSR_IA32_TSC); - - tsc_freq = tsc_end - tsc_start; - - GUEST_CHECK(level, tsc_freq); -} - -static void l2_guest_code(void) -{ - check_tsc_freq(UCHECK_L2); - - /* exit to L1 */ - __asm__ __volatile__("vmcall"); -} - -static void l1_guest_code(struct vmx_pages *vmx_pages) -{ - unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; - uint32_t control; - - /* check that L1's frequency looks alright before launching L2 */ - check_tsc_freq(UCHECK_L1); - - GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); - GUEST_ASSERT(load_vmcs(vmx_pages)); - - /* prepare the VMCS for L2 execution */ - prepare_vmcs(vmx_pages, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]); - - /* enable TSC offsetting and TSC scaling for L2 */ - control = vmreadz(CPU_BASED_VM_EXEC_CONTROL); - control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETTING; - vmwrite(CPU_BASED_VM_EXEC_CONTROL, control); - - control = vmreadz(SECONDARY_VM_EXEC_CONTROL); - control |= SECONDARY_EXEC_TSC_SCALING; - vmwrite(SECONDARY_VM_EXEC_CONTROL, control); - - vmwrite(TSC_OFFSET, TSC_OFFSET_L2); - vmwrite(TSC_MULTIPLIER, TSC_MULTIPLIER_L2); - vmwrite(TSC_MULTIPLIER_HIGH, TSC_MULTIPLIER_L2 >> 32); - - /* launch L2 */ - GUEST_ASSERT(!vmlaunch()); - GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); - - /* check that L1's frequency still looks good */ - check_tsc_freq(UCHECK_L1); - - GUEST_DONE(); -} - -int main(int argc, char *argv[]) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - vm_vaddr_t vmx_pages_gva; - - uint64_t tsc_start, tsc_end; - uint64_t tsc_khz; - uint64_t l1_scale_factor; - uint64_t l0_tsc_freq = 0; - uint64_t l1_tsc_freq = 0; - uint64_t l2_tsc_freq = 0; - - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); - TEST_REQUIRE(kvm_has_cap(KVM_CAP_TSC_CONTROL)); - TEST_REQUIRE(sys_clocksource_is_based_on_tsc()); - - /* - * We set L1's scale factor to be a random number from 2 to 10. - * Ideally we would do the same for L2's factor but that one is - * referenced by both main() and l1_guest_code() and using a global - * variable does not work. - */ - srand(time(NULL)); - l1_scale_factor = (rand() % 9) + 2; - printf("L1's scale down factor is: %"PRIu64"\n", l1_scale_factor); - printf("L2's scale up factor is: %llu\n", L2_SCALE_FACTOR); - - tsc_start = rdtsc(); - sleep(1); - tsc_end = rdtsc(); - - l0_tsc_freq = tsc_end - tsc_start; - printf("real TSC frequency is around: %"PRIu64"\n", l0_tsc_freq); - - vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); - vcpu_alloc_vmx(vm, &vmx_pages_gva); - vcpu_args_set(vcpu, 1, vmx_pages_gva); - - tsc_khz = __vcpu_ioctl(vcpu, KVM_GET_TSC_KHZ, NULL); - TEST_ASSERT(tsc_khz != -1, "vcpu ioctl KVM_GET_TSC_KHZ failed"); - - /* scale down L1's TSC frequency */ - vcpu_ioctl(vcpu, KVM_SET_TSC_KHZ, (void *) (tsc_khz / l1_scale_factor)); - - for (;;) { - struct ucall uc; - - vcpu_run(vcpu); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); - - switch (get_ucall(vcpu, &uc)) { - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - case UCALL_SYNC: - switch (uc.args[0]) { - case USLEEP: - sleep(uc.args[1]); - break; - case UCHECK_L1: - l1_tsc_freq = uc.args[1]; - printf("L1's TSC frequency is around: %"PRIu64 - "\n", l1_tsc_freq); - - compare_tsc_freq(l1_tsc_freq, - l0_tsc_freq / l1_scale_factor); - break; - case UCHECK_L2: - l2_tsc_freq = uc.args[1]; - printf("L2's TSC frequency is around: %"PRIu64 - "\n", l2_tsc_freq); - - compare_tsc_freq(l2_tsc_freq, - l1_tsc_freq * L2_SCALE_FACTOR); - break; - } - break; - case UCALL_DONE: - goto done; - default: - TEST_FAIL("Unknown ucall %lu", uc.cmd); - } - } - -done: - kvm_vm_free(vm); - return 0; -} diff --git a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c b/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c deleted file mode 100644 index a1f5ff45d518..000000000000 --- a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c +++ /dev/null @@ -1,247 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Test for VMX-pmu perf capability msr - * - * Copyright (C) 2021 Intel Corporation - * - * Test to check the effect of various CPUID settings on - * MSR_IA32_PERF_CAPABILITIES MSR, and check that what - * we write with KVM_SET_MSR is _not_ modified by the guest - * and check it can be retrieved with KVM_GET_MSR, also test - * the invalid LBR formats are rejected. - */ -#include - -#include - -#include "kvm_test_harness.h" -#include "kvm_util.h" -#include "vmx.h" - -static union perf_capabilities { - struct { - u64 lbr_format:6; - u64 pebs_trap:1; - u64 pebs_arch_reg:1; - u64 pebs_format:4; - u64 smm_freeze:1; - u64 full_width_write:1; - u64 pebs_baseline:1; - u64 perf_metrics:1; - u64 pebs_output_pt_available:1; - u64 anythread_deprecated:1; - }; - u64 capabilities; -} host_cap; - -/* - * The LBR format and most PEBS features are immutable, all other features are - * fungible (if supported by the host and KVM). - */ -static const union perf_capabilities immutable_caps = { - .lbr_format = -1, - .pebs_trap = 1, - .pebs_arch_reg = 1, - .pebs_format = -1, - .pebs_baseline = 1, -}; - -static const union perf_capabilities format_caps = { - .lbr_format = -1, - .pebs_format = -1, -}; - -static void guest_test_perf_capabilities_gp(uint64_t val) -{ - uint8_t vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES, val); - - __GUEST_ASSERT(vector == GP_VECTOR, - "Expected #GP for value '0x%lx', got vector '0x%x'", - val, vector); -} - -static void guest_code(uint64_t current_val) -{ - int i; - - guest_test_perf_capabilities_gp(current_val); - guest_test_perf_capabilities_gp(0); - - for (i = 0; i < 64; i++) - guest_test_perf_capabilities_gp(current_val ^ BIT_ULL(i)); - - GUEST_DONE(); -} - -KVM_ONE_VCPU_TEST_SUITE(vmx_pmu_caps); - -/* - * Verify that guest WRMSRs to PERF_CAPABILITIES #GP regardless of the value - * written, that the guest always sees the userspace controlled value, and that - * PERF_CAPABILITIES is immutable after KVM_RUN. - */ -KVM_ONE_VCPU_TEST(vmx_pmu_caps, guest_wrmsr_perf_capabilities, guest_code) -{ - struct ucall uc; - int r, i; - - vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities); - - vcpu_args_set(vcpu, 1, host_cap.capabilities); - vcpu_run(vcpu); - - switch (get_ucall(vcpu, &uc)) { - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - break; - case UCALL_DONE: - break; - default: - TEST_FAIL("Unexpected ucall: %lu", uc.cmd); - } - - TEST_ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES), - host_cap.capabilities); - - vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities); - - r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, 0); - TEST_ASSERT(!r, "Post-KVM_RUN write '0' didn't fail"); - - for (i = 0; i < 64; i++) { - r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, - host_cap.capabilities ^ BIT_ULL(i)); - TEST_ASSERT(!r, "Post-KVM_RUN write '0x%llx'didn't fail", - host_cap.capabilities ^ BIT_ULL(i)); - } -} - -/* - * Verify KVM allows writing PERF_CAPABILITIES with all KVM-supported features - * enabled, as well as '0' (to disable all features). - */ -KVM_ONE_VCPU_TEST(vmx_pmu_caps, basic_perf_capabilities, guest_code) -{ - vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, 0); - vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities); -} - -KVM_ONE_VCPU_TEST(vmx_pmu_caps, fungible_perf_capabilities, guest_code) -{ - const uint64_t fungible_caps = host_cap.capabilities & ~immutable_caps.capabilities; - int bit; - - for_each_set_bit(bit, &fungible_caps, 64) { - vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, BIT_ULL(bit)); - vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, - host_cap.capabilities & ~BIT_ULL(bit)); - } - vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities); -} - -/* - * Verify KVM rejects attempts to set unsupported and/or immutable features in - * PERF_CAPABILITIES. Note, LBR format and PEBS format need to be validated - * separately as they are multi-bit values, e.g. toggling or setting a single - * bit can generate a false positive without dedicated safeguards. - */ -KVM_ONE_VCPU_TEST(vmx_pmu_caps, immutable_perf_capabilities, guest_code) -{ - const uint64_t reserved_caps = (~host_cap.capabilities | - immutable_caps.capabilities) & - ~format_caps.capabilities; - union perf_capabilities val = host_cap; - int r, bit; - - for_each_set_bit(bit, &reserved_caps, 64) { - r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, - host_cap.capabilities ^ BIT_ULL(bit)); - TEST_ASSERT(!r, "%s immutable feature 0x%llx (bit %d) didn't fail", - host_cap.capabilities & BIT_ULL(bit) ? "Setting" : "Clearing", - BIT_ULL(bit), bit); - } - - /* - * KVM only supports the host's native LBR format, as well as '0' (to - * disable LBR support). Verify KVM rejects all other LBR formats. - */ - for (val.lbr_format = 1; val.lbr_format; val.lbr_format++) { - if (val.lbr_format == host_cap.lbr_format) - continue; - - r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, val.capabilities); - TEST_ASSERT(!r, "Bad LBR FMT = 0x%x didn't fail, host = 0x%x", - val.lbr_format, host_cap.lbr_format); - } - - /* Ditto for the PEBS format. */ - for (val.pebs_format = 1; val.pebs_format; val.pebs_format++) { - if (val.pebs_format == host_cap.pebs_format) - continue; - - r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, val.capabilities); - TEST_ASSERT(!r, "Bad PEBS FMT = 0x%x didn't fail, host = 0x%x", - val.pebs_format, host_cap.pebs_format); - } -} - -/* - * Test that LBR MSRs are writable when LBRs are enabled, and then verify that - * disabling the vPMU via CPUID also disables LBR support. Set bits 2:0 of - * LBR_TOS as those bits are writable across all uarch implementations (arch - * LBRs will need to poke a different MSR). - */ -KVM_ONE_VCPU_TEST(vmx_pmu_caps, lbr_perf_capabilities, guest_code) -{ - int r; - - if (!host_cap.lbr_format) - return; - - vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities); - vcpu_set_msr(vcpu, MSR_LBR_TOS, 7); - - vcpu_clear_cpuid_entry(vcpu, X86_PROPERTY_PMU_VERSION.function); - - r = _vcpu_set_msr(vcpu, MSR_LBR_TOS, 7); - TEST_ASSERT(!r, "Writing LBR_TOS should fail after disabling vPMU"); -} - -KVM_ONE_VCPU_TEST(vmx_pmu_caps, perf_capabilities_unsupported, guest_code) -{ - uint64_t val; - int i, r; - - vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities); - val = vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES); - TEST_ASSERT_EQ(val, host_cap.capabilities); - - vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_PDCM); - - val = vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES); - TEST_ASSERT_EQ(val, 0); - - vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, 0); - - for (i = 0; i < 64; i++) { - r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, BIT_ULL(i)); - TEST_ASSERT(!r, "Setting PERF_CAPABILITIES bit %d (= 0x%llx) should fail without PDCM", - i, BIT_ULL(i)); - } -} - -int main(int argc, char *argv[]) -{ - TEST_REQUIRE(kvm_is_pmu_enabled()); - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_PDCM)); - - TEST_REQUIRE(kvm_cpu_has_p(X86_PROPERTY_PMU_VERSION)); - TEST_REQUIRE(kvm_cpu_property(X86_PROPERTY_PMU_VERSION) > 0); - - host_cap.capabilities = kvm_get_feature_msr(MSR_IA32_PERF_CAPABILITIES); - - TEST_ASSERT(host_cap.full_width_write, - "Full-width writes should always be supported"); - - return test_harness_run(argc, argv); -} diff --git a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c deleted file mode 100644 index 00dd2ac07a61..000000000000 --- a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c +++ /dev/null @@ -1,245 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * VMX-preemption timer test - * - * Copyright (C) 2020, Google, LLC. - * - * Test to ensure the VM-Enter after migration doesn't - * incorrectly restarts the timer with the full timer - * value instead of partially decayed timer value - * - */ -#include -#include -#include -#include -#include - -#include "test_util.h" - -#include "kvm_util.h" -#include "processor.h" -#include "vmx.h" - -#define PREEMPTION_TIMER_VALUE 100000000ull -#define PREEMPTION_TIMER_VALUE_THRESHOLD1 80000000ull - -u32 vmx_pt_rate; -bool l2_save_restore_done; -static u64 l2_vmx_pt_start; -volatile u64 l2_vmx_pt_finish; - -union vmx_basic basic; -union vmx_ctrl_msr ctrl_pin_rev; -union vmx_ctrl_msr ctrl_exit_rev; - -void l2_guest_code(void) -{ - u64 vmx_pt_delta; - - vmcall(); - l2_vmx_pt_start = (rdtsc() >> vmx_pt_rate) << vmx_pt_rate; - - /* - * Wait until the 1st threshold has passed - */ - do { - l2_vmx_pt_finish = rdtsc(); - vmx_pt_delta = (l2_vmx_pt_finish - l2_vmx_pt_start) >> - vmx_pt_rate; - } while (vmx_pt_delta < PREEMPTION_TIMER_VALUE_THRESHOLD1); - - /* - * Force L2 through Save and Restore cycle - */ - GUEST_SYNC(1); - - l2_save_restore_done = 1; - - /* - * Now wait for the preemption timer to fire and - * exit to L1 - */ - while ((l2_vmx_pt_finish = rdtsc())) - ; -} - -void l1_guest_code(struct vmx_pages *vmx_pages) -{ -#define L2_GUEST_STACK_SIZE 64 - unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; - u64 l1_vmx_pt_start; - u64 l1_vmx_pt_finish; - u64 l1_tsc_deadline, l2_tsc_deadline; - - GUEST_ASSERT(vmx_pages->vmcs_gpa); - GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); - GUEST_ASSERT(load_vmcs(vmx_pages)); - GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa); - - prepare_vmcs(vmx_pages, l2_guest_code, - &l2_guest_stack[L2_GUEST_STACK_SIZE]); - - /* - * Check for Preemption timer support - */ - basic.val = rdmsr(MSR_IA32_VMX_BASIC); - ctrl_pin_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PINBASED_CTLS - : MSR_IA32_VMX_PINBASED_CTLS); - ctrl_exit_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_EXIT_CTLS - : MSR_IA32_VMX_EXIT_CTLS); - - if (!(ctrl_pin_rev.clr & PIN_BASED_VMX_PREEMPTION_TIMER) || - !(ctrl_exit_rev.clr & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) - return; - - GUEST_ASSERT(!vmlaunch()); - GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); - vmwrite(GUEST_RIP, vmreadz(GUEST_RIP) + vmreadz(VM_EXIT_INSTRUCTION_LEN)); - - /* - * Turn on PIN control and resume the guest - */ - GUEST_ASSERT(!vmwrite(PIN_BASED_VM_EXEC_CONTROL, - vmreadz(PIN_BASED_VM_EXEC_CONTROL) | - PIN_BASED_VMX_PREEMPTION_TIMER)); - - GUEST_ASSERT(!vmwrite(VMX_PREEMPTION_TIMER_VALUE, - PREEMPTION_TIMER_VALUE)); - - vmx_pt_rate = rdmsr(MSR_IA32_VMX_MISC) & 0x1F; - - l2_save_restore_done = 0; - - l1_vmx_pt_start = (rdtsc() >> vmx_pt_rate) << vmx_pt_rate; - - GUEST_ASSERT(!vmresume()); - - l1_vmx_pt_finish = rdtsc(); - - /* - * Ensure exit from L2 happens after L2 goes through - * save and restore - */ - GUEST_ASSERT(l2_save_restore_done); - - /* - * Ensure the exit from L2 is due to preemption timer expiry - */ - GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_PREEMPTION_TIMER); - - l1_tsc_deadline = l1_vmx_pt_start + - (PREEMPTION_TIMER_VALUE << vmx_pt_rate); - - l2_tsc_deadline = l2_vmx_pt_start + - (PREEMPTION_TIMER_VALUE << vmx_pt_rate); - - /* - * Sync with the host and pass the l1|l2 pt_expiry_finish times and - * tsc deadlines so that host can verify they are as expected - */ - GUEST_SYNC_ARGS(2, l1_vmx_pt_finish, l1_tsc_deadline, - l2_vmx_pt_finish, l2_tsc_deadline); -} - -void guest_code(struct vmx_pages *vmx_pages) -{ - if (vmx_pages) - l1_guest_code(vmx_pages); - - GUEST_DONE(); -} - -int main(int argc, char *argv[]) -{ - vm_vaddr_t vmx_pages_gva = 0; - - struct kvm_regs regs1, regs2; - struct kvm_vm *vm; - struct kvm_vcpu *vcpu; - struct kvm_x86_state *state; - struct ucall uc; - int stage; - - /* - * AMD currently does not implement any VMX features, so for now we - * just early out. - */ - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); - - TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE)); - - /* Create VM */ - vm = vm_create_with_one_vcpu(&vcpu, guest_code); - - vcpu_regs_get(vcpu, ®s1); - - vcpu_alloc_vmx(vm, &vmx_pages_gva); - vcpu_args_set(vcpu, 1, vmx_pages_gva); - - for (stage = 1;; stage++) { - vcpu_run(vcpu); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); - - switch (get_ucall(vcpu, &uc)) { - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - /* NOT REACHED */ - case UCALL_SYNC: - break; - case UCALL_DONE: - goto done; - default: - TEST_FAIL("Unknown ucall %lu", uc.cmd); - } - - /* UCALL_SYNC is handled here. */ - TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") && - uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx", - stage, (ulong)uc.args[1]); - /* - * If this stage 2 then we should verify the vmx pt expiry - * is as expected. - * From L1's perspective verify Preemption timer hasn't - * expired too early. - * From L2's perspective verify Preemption timer hasn't - * expired too late. - */ - if (stage == 2) { - - pr_info("Stage %d: L1 PT expiry TSC (%lu) , L1 TSC deadline (%lu)\n", - stage, uc.args[2], uc.args[3]); - - pr_info("Stage %d: L2 PT expiry TSC (%lu) , L2 TSC deadline (%lu)\n", - stage, uc.args[4], uc.args[5]); - - TEST_ASSERT(uc.args[2] >= uc.args[3], - "Stage %d: L1 PT expiry TSC (%lu) < L1 TSC deadline (%lu)", - stage, uc.args[2], uc.args[3]); - - TEST_ASSERT(uc.args[4] < uc.args[5], - "Stage %d: L2 PT expiry TSC (%lu) > L2 TSC deadline (%lu)", - stage, uc.args[4], uc.args[5]); - } - - state = vcpu_save_state(vcpu); - memset(®s1, 0, sizeof(regs1)); - vcpu_regs_get(vcpu, ®s1); - - kvm_vm_release(vm); - - /* Restore state in a new VM. */ - vcpu = vm_recreate_with_one_vcpu(vm); - vcpu_load_state(vcpu, state); - kvm_x86_state_cleanup(state); - - memset(®s2, 0, sizeof(regs2)); - vcpu_regs_get(vcpu, ®s2); - TEST_ASSERT(!memcmp(®s1, ®s2, sizeof(regs2)), - "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx", - (ulong) regs2.rdi, (ulong) regs2.rsi); - } - -done: - kvm_vm_free(vm); -} diff --git a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c b/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c deleted file mode 100644 index 67a62a5a8895..000000000000 --- a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c +++ /dev/null @@ -1,304 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * vmx_set_nested_state_test - * - * Copyright (C) 2019, Google LLC. - * - * This test verifies the integrity of calling the ioctl KVM_SET_NESTED_STATE. - */ - -#include "test_util.h" -#include "kvm_util.h" -#include "processor.h" -#include "vmx.h" - -#include -#include -#include -#include -#include - -/* - * Mirror of VMCS12_REVISION in arch/x86/kvm/vmx/vmcs12.h. If that value - * changes this should be updated. - */ -#define VMCS12_REVISION 0x11e57ed0 - -bool have_evmcs; - -void test_nested_state(struct kvm_vcpu *vcpu, struct kvm_nested_state *state) -{ - vcpu_nested_state_set(vcpu, state); -} - -void test_nested_state_expect_errno(struct kvm_vcpu *vcpu, - struct kvm_nested_state *state, - int expected_errno) -{ - int rv; - - rv = __vcpu_nested_state_set(vcpu, state); - TEST_ASSERT(rv == -1 && errno == expected_errno, - "Expected %s (%d) from vcpu_nested_state_set but got rv: %i errno: %s (%d)", - strerror(expected_errno), expected_errno, rv, strerror(errno), - errno); -} - -void test_nested_state_expect_einval(struct kvm_vcpu *vcpu, - struct kvm_nested_state *state) -{ - test_nested_state_expect_errno(vcpu, state, EINVAL); -} - -void test_nested_state_expect_efault(struct kvm_vcpu *vcpu, - struct kvm_nested_state *state) -{ - test_nested_state_expect_errno(vcpu, state, EFAULT); -} - -void set_revision_id_for_vmcs12(struct kvm_nested_state *state, - u32 vmcs12_revision) -{ - /* Set revision_id in vmcs12 to vmcs12_revision. */ - memcpy(&state->data, &vmcs12_revision, sizeof(u32)); -} - -void set_default_state(struct kvm_nested_state *state) -{ - memset(state, 0, sizeof(*state)); - state->flags = KVM_STATE_NESTED_RUN_PENDING | - KVM_STATE_NESTED_GUEST_MODE; - state->format = 0; - state->size = sizeof(*state); -} - -void set_default_vmx_state(struct kvm_nested_state *state, int size) -{ - memset(state, 0, size); - if (have_evmcs) - state->flags = KVM_STATE_NESTED_EVMCS; - state->format = 0; - state->size = size; - state->hdr.vmx.vmxon_pa = 0x1000; - state->hdr.vmx.vmcs12_pa = 0x2000; - state->hdr.vmx.smm.flags = 0; - set_revision_id_for_vmcs12(state, VMCS12_REVISION); -} - -void test_vmx_nested_state(struct kvm_vcpu *vcpu) -{ - /* Add a page for VMCS12. */ - const int state_sz = sizeof(struct kvm_nested_state) + getpagesize(); - struct kvm_nested_state *state = - (struct kvm_nested_state *)malloc(state_sz); - - /* The format must be set to 0. 0 for VMX, 1 for SVM. */ - set_default_vmx_state(state, state_sz); - state->format = 1; - test_nested_state_expect_einval(vcpu, state); - - /* - * We cannot virtualize anything if the guest does not have VMX - * enabled. - */ - set_default_vmx_state(state, state_sz); - test_nested_state_expect_einval(vcpu, state); - - /* - * We cannot virtualize anything if the guest does not have VMX - * enabled. We expect KVM_SET_NESTED_STATE to return 0 if vmxon_pa - * is set to -1ull, but the flags must be zero. - */ - set_default_vmx_state(state, state_sz); - state->hdr.vmx.vmxon_pa = -1ull; - test_nested_state_expect_einval(vcpu, state); - - state->hdr.vmx.vmcs12_pa = -1ull; - state->flags = KVM_STATE_NESTED_EVMCS; - test_nested_state_expect_einval(vcpu, state); - - state->flags = 0; - test_nested_state(vcpu, state); - - /* Enable VMX in the guest CPUID. */ - vcpu_set_cpuid_feature(vcpu, X86_FEATURE_VMX); - - /* - * Setting vmxon_pa == -1ull and vmcs_pa == -1ull exits early without - * setting the nested state. When the eVMCS flag is not set, the - * expected return value is '0'. - */ - set_default_vmx_state(state, state_sz); - state->flags = 0; - state->hdr.vmx.vmxon_pa = -1ull; - state->hdr.vmx.vmcs12_pa = -1ull; - test_nested_state(vcpu, state); - - /* - * When eVMCS is supported, the eVMCS flag can only be set if the - * enlightened VMCS capability has been enabled. - */ - if (have_evmcs) { - state->flags = KVM_STATE_NESTED_EVMCS; - test_nested_state_expect_einval(vcpu, state); - vcpu_enable_evmcs(vcpu); - test_nested_state(vcpu, state); - } - - /* It is invalid to have vmxon_pa == -1ull and SMM flags non-zero. */ - state->hdr.vmx.smm.flags = 1; - test_nested_state_expect_einval(vcpu, state); - - /* Invalid flags are rejected. */ - set_default_vmx_state(state, state_sz); - state->hdr.vmx.flags = ~0; - test_nested_state_expect_einval(vcpu, state); - - /* It is invalid to have vmxon_pa == -1ull and vmcs_pa != -1ull. */ - set_default_vmx_state(state, state_sz); - state->hdr.vmx.vmxon_pa = -1ull; - state->flags = 0; - test_nested_state_expect_einval(vcpu, state); - - /* It is invalid to have vmxon_pa set to a non-page aligned address. */ - set_default_vmx_state(state, state_sz); - state->hdr.vmx.vmxon_pa = 1; - test_nested_state_expect_einval(vcpu, state); - - /* - * It is invalid to have KVM_STATE_NESTED_SMM_GUEST_MODE and - * KVM_STATE_NESTED_GUEST_MODE set together. - */ - set_default_vmx_state(state, state_sz); - state->flags = KVM_STATE_NESTED_GUEST_MODE | - KVM_STATE_NESTED_RUN_PENDING; - state->hdr.vmx.smm.flags = KVM_STATE_NESTED_SMM_GUEST_MODE; - test_nested_state_expect_einval(vcpu, state); - - /* - * It is invalid to have any of the SMM flags set besides: - * KVM_STATE_NESTED_SMM_GUEST_MODE - * KVM_STATE_NESTED_SMM_VMXON - */ - set_default_vmx_state(state, state_sz); - state->hdr.vmx.smm.flags = ~(KVM_STATE_NESTED_SMM_GUEST_MODE | - KVM_STATE_NESTED_SMM_VMXON); - test_nested_state_expect_einval(vcpu, state); - - /* Outside SMM, SMM flags must be zero. */ - set_default_vmx_state(state, state_sz); - state->flags = 0; - state->hdr.vmx.smm.flags = KVM_STATE_NESTED_SMM_GUEST_MODE; - test_nested_state_expect_einval(vcpu, state); - - /* - * Size must be large enough to fit kvm_nested_state and vmcs12 - * if VMCS12 physical address is set - */ - set_default_vmx_state(state, state_sz); - state->size = sizeof(*state); - state->flags = 0; - test_nested_state_expect_einval(vcpu, state); - - set_default_vmx_state(state, state_sz); - state->size = sizeof(*state); - state->flags = 0; - state->hdr.vmx.vmcs12_pa = -1; - test_nested_state(vcpu, state); - - /* - * KVM_SET_NESTED_STATE succeeds with invalid VMCS - * contents but L2 not running. - */ - set_default_vmx_state(state, state_sz); - state->flags = 0; - test_nested_state(vcpu, state); - - /* Invalid flags are rejected, even if no VMCS loaded. */ - set_default_vmx_state(state, state_sz); - state->size = sizeof(*state); - state->flags = 0; - state->hdr.vmx.vmcs12_pa = -1; - state->hdr.vmx.flags = ~0; - test_nested_state_expect_einval(vcpu, state); - - /* vmxon_pa cannot be the same address as vmcs_pa. */ - set_default_vmx_state(state, state_sz); - state->hdr.vmx.vmxon_pa = 0; - state->hdr.vmx.vmcs12_pa = 0; - test_nested_state_expect_einval(vcpu, state); - - /* - * Test that if we leave nesting the state reflects that when we get - * it again. - */ - set_default_vmx_state(state, state_sz); - state->hdr.vmx.vmxon_pa = -1ull; - state->hdr.vmx.vmcs12_pa = -1ull; - state->flags = 0; - test_nested_state(vcpu, state); - vcpu_nested_state_get(vcpu, state); - TEST_ASSERT(state->size >= sizeof(*state) && state->size <= state_sz, - "Size must be between %ld and %d. The size returned was %d.", - sizeof(*state), state_sz, state->size); - TEST_ASSERT(state->hdr.vmx.vmxon_pa == -1ull, "vmxon_pa must be -1ull."); - TEST_ASSERT(state->hdr.vmx.vmcs12_pa == -1ull, "vmcs_pa must be -1ull."); - - free(state); -} - -int main(int argc, char *argv[]) -{ - struct kvm_vm *vm; - struct kvm_nested_state state; - struct kvm_vcpu *vcpu; - - have_evmcs = kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS); - - TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE)); - - /* - * AMD currently does not implement set_nested_state, so for now we - * just early out. - */ - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); - - vm = vm_create_with_one_vcpu(&vcpu, NULL); - - /* - * First run tests with VMX disabled to check error handling. - */ - vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_VMX); - - /* Passing a NULL kvm_nested_state causes a EFAULT. */ - test_nested_state_expect_efault(vcpu, NULL); - - /* 'size' cannot be smaller than sizeof(kvm_nested_state). */ - set_default_state(&state); - state.size = 0; - test_nested_state_expect_einval(vcpu, &state); - - /* - * Setting the flags 0xf fails the flags check. The only flags that - * can be used are: - * KVM_STATE_NESTED_GUEST_MODE - * KVM_STATE_NESTED_RUN_PENDING - * KVM_STATE_NESTED_EVMCS - */ - set_default_state(&state); - state.flags = 0xf; - test_nested_state_expect_einval(vcpu, &state); - - /* - * If KVM_STATE_NESTED_RUN_PENDING is set then - * KVM_STATE_NESTED_GUEST_MODE has to be set as well. - */ - set_default_state(&state); - state.flags = KVM_STATE_NESTED_RUN_PENDING; - test_nested_state_expect_einval(vcpu, &state); - - test_vmx_nested_state(vcpu); - - kvm_vm_free(vm); - return 0; -} diff --git a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c deleted file mode 100644 index 2ceb5c78c442..000000000000 --- a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c +++ /dev/null @@ -1,156 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * vmx_tsc_adjust_test - * - * Copyright (C) 2018, Google LLC. - * - * IA32_TSC_ADJUST test - * - * According to the SDM, "if an execution of WRMSR to the - * IA32_TIME_STAMP_COUNTER MSR adds (or subtracts) value X from the TSC, - * the logical processor also adds (or subtracts) value X from the - * IA32_TSC_ADJUST MSR. - * - * Note that when L1 doesn't intercept writes to IA32_TSC, a - * WRMSR(IA32_TSC) from L2 sets L1's TSC value, not L2's perceived TSC - * value. - * - * This test verifies that this unusual case is handled correctly. - */ - -#include "test_util.h" -#include "kvm_util.h" -#include "processor.h" -#include "vmx.h" - -#include -#include - -#include "kselftest.h" - -#ifndef MSR_IA32_TSC_ADJUST -#define MSR_IA32_TSC_ADJUST 0x3b -#endif - -#define TSC_ADJUST_VALUE (1ll << 32) -#define TSC_OFFSET_VALUE -(1ll << 48) - -enum { - PORT_ABORT = 0x1000, - PORT_REPORT, - PORT_DONE, -}; - -enum { - VMXON_PAGE = 0, - VMCS_PAGE, - MSR_BITMAP_PAGE, - - NUM_VMX_PAGES, -}; - -/* The virtual machine object. */ -static struct kvm_vm *vm; - -static void check_ia32_tsc_adjust(int64_t max) -{ - int64_t adjust; - - adjust = rdmsr(MSR_IA32_TSC_ADJUST); - GUEST_SYNC(adjust); - GUEST_ASSERT(adjust <= max); -} - -static void l2_guest_code(void) -{ - uint64_t l1_tsc = rdtsc() - TSC_OFFSET_VALUE; - - wrmsr(MSR_IA32_TSC, l1_tsc - TSC_ADJUST_VALUE); - check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE); - - /* Exit to L1 */ - __asm__ __volatile__("vmcall"); -} - -static void l1_guest_code(struct vmx_pages *vmx_pages) -{ -#define L2_GUEST_STACK_SIZE 64 - unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; - uint32_t control; - uintptr_t save_cr3; - - GUEST_ASSERT(rdtsc() < TSC_ADJUST_VALUE); - wrmsr(MSR_IA32_TSC, rdtsc() - TSC_ADJUST_VALUE); - check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE); - - GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); - GUEST_ASSERT(load_vmcs(vmx_pages)); - - /* Prepare the VMCS for L2 execution. */ - prepare_vmcs(vmx_pages, l2_guest_code, - &l2_guest_stack[L2_GUEST_STACK_SIZE]); - control = vmreadz(CPU_BASED_VM_EXEC_CONTROL); - control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETTING; - vmwrite(CPU_BASED_VM_EXEC_CONTROL, control); - vmwrite(TSC_OFFSET, TSC_OFFSET_VALUE); - - /* Jump into L2. First, test failure to load guest CR3. */ - save_cr3 = vmreadz(GUEST_CR3); - vmwrite(GUEST_CR3, -1ull); - GUEST_ASSERT(!vmlaunch()); - GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == - (EXIT_REASON_FAILED_VMENTRY | EXIT_REASON_INVALID_STATE)); - check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE); - vmwrite(GUEST_CR3, save_cr3); - - GUEST_ASSERT(!vmlaunch()); - GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); - - check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE); - - GUEST_DONE(); -} - -static void report(int64_t val) -{ - pr_info("IA32_TSC_ADJUST is %ld (%lld * TSC_ADJUST_VALUE + %lld).\n", - val, val / TSC_ADJUST_VALUE, val % TSC_ADJUST_VALUE); -} - -int main(int argc, char *argv[]) -{ - vm_vaddr_t vmx_pages_gva; - struct kvm_vcpu *vcpu; - - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); - - vm = vm_create_with_one_vcpu(&vcpu, (void *) l1_guest_code); - - /* Allocate VMX pages and shared descriptors (vmx_pages). */ - vcpu_alloc_vmx(vm, &vmx_pages_gva); - vcpu_args_set(vcpu, 1, vmx_pages_gva); - - for (;;) { - struct ucall uc; - - vcpu_run(vcpu); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); - - switch (get_ucall(vcpu, &uc)) { - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - /* NOT REACHED */ - case UCALL_SYNC: - report(uc.args[1]); - break; - case UCALL_DONE: - goto done; - default: - TEST_FAIL("Unknown ucall %lu", uc.cmd); - } - } - -done: - kvm_vm_free(vm); - return 0; -} diff --git a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c b/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c deleted file mode 100644 index a76078a08ff8..000000000000 --- a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c +++ /dev/null @@ -1,487 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * xapic_ipi_test - * - * Copyright (C) 2020, Google LLC. - * - * This work is licensed under the terms of the GNU GPL, version 2. - * - * Test that when the APIC is in xAPIC mode, a vCPU can send an IPI to wake - * another vCPU that is halted when KVM's backing page for the APIC access - * address has been moved by mm. - * - * The test starts two vCPUs: one that sends IPIs and one that continually - * executes HLT. The sender checks that the halter has woken from the HLT and - * has reentered HLT before sending the next IPI. While the vCPUs are running, - * the host continually calls migrate_pages to move all of the process' pages - * amongst the available numa nodes on the machine. - * - * Migration is a command line option. When used on non-numa machines will - * exit with error. Test is still usefull on non-numa for testing IPIs. - */ -#include -#include -#include -#include -#include - -#include "kvm_util.h" -#include "numaif.h" -#include "processor.h" -#include "test_util.h" -#include "vmx.h" - -/* Default running time for the test */ -#define DEFAULT_RUN_SECS 3 - -/* Default delay between migrate_pages calls (microseconds) */ -#define DEFAULT_DELAY_USECS 500000 - -/* - * Vector for IPI from sender vCPU to halting vCPU. - * Value is arbitrary and was chosen for the alternating bit pattern. Any - * value should work. - */ -#define IPI_VECTOR 0xa5 - -/* - * Incremented in the IPI handler. Provides evidence to the sender that the IPI - * arrived at the destination - */ -static volatile uint64_t ipis_rcvd; - -/* Data struct shared between host main thread and vCPUs */ -struct test_data_page { - uint32_t halter_apic_id; - volatile uint64_t hlt_count; - volatile uint64_t wake_count; - uint64_t ipis_sent; - uint64_t migrations_attempted; - uint64_t migrations_completed; - uint32_t icr; - uint32_t icr2; - uint32_t halter_tpr; - uint32_t halter_ppr; - - /* - * Record local version register as a cross-check that APIC access - * worked. Value should match what KVM reports (APIC_VERSION in - * arch/x86/kvm/lapic.c). If test is failing, check that values match - * to determine whether APIC access exits are working. - */ - uint32_t halter_lvr; -}; - -struct thread_params { - struct test_data_page *data; - struct kvm_vcpu *vcpu; - uint64_t *pipis_rcvd; /* host address of ipis_rcvd global */ -}; - -void verify_apic_base_addr(void) -{ - uint64_t msr = rdmsr(MSR_IA32_APICBASE); - uint64_t base = GET_APIC_BASE(msr); - - GUEST_ASSERT(base == APIC_DEFAULT_GPA); -} - -static void halter_guest_code(struct test_data_page *data) -{ - verify_apic_base_addr(); - xapic_enable(); - - data->halter_apic_id = GET_APIC_ID_FIELD(xapic_read_reg(APIC_ID)); - data->halter_lvr = xapic_read_reg(APIC_LVR); - - /* - * Loop forever HLTing and recording halts & wakes. Disable interrupts - * each time around to minimize window between signaling the pending - * halt to the sender vCPU and executing the halt. No need to disable on - * first run as this vCPU executes first and the host waits for it to - * signal going into first halt before starting the sender vCPU. Record - * TPR and PPR for diagnostic purposes in case the test fails. - */ - for (;;) { - data->halter_tpr = xapic_read_reg(APIC_TASKPRI); - data->halter_ppr = xapic_read_reg(APIC_PROCPRI); - data->hlt_count++; - asm volatile("sti; hlt; cli"); - data->wake_count++; - } -} - -/* - * Runs on halter vCPU when IPI arrives. Write an arbitrary non-zero value to - * enable diagnosing errant writes to the APIC access address backing page in - * case of test failure. - */ -static void guest_ipi_handler(struct ex_regs *regs) -{ - ipis_rcvd++; - xapic_write_reg(APIC_EOI, 77); -} - -static void sender_guest_code(struct test_data_page *data) -{ - uint64_t last_wake_count; - uint64_t last_hlt_count; - uint64_t last_ipis_rcvd_count; - uint32_t icr_val; - uint32_t icr2_val; - uint64_t tsc_start; - - verify_apic_base_addr(); - xapic_enable(); - - /* - * Init interrupt command register for sending IPIs - * - * Delivery mode=fixed, per SDM: - * "Delivers the interrupt specified in the vector field to the target - * processor." - * - * Destination mode=physical i.e. specify target by its local APIC - * ID. This vCPU assumes that the halter vCPU has already started and - * set data->halter_apic_id. - */ - icr_val = (APIC_DEST_PHYSICAL | APIC_DM_FIXED | IPI_VECTOR); - icr2_val = SET_APIC_DEST_FIELD(data->halter_apic_id); - data->icr = icr_val; - data->icr2 = icr2_val; - - last_wake_count = data->wake_count; - last_hlt_count = data->hlt_count; - last_ipis_rcvd_count = ipis_rcvd; - for (;;) { - /* - * Send IPI to halter vCPU. - * First IPI can be sent unconditionally because halter vCPU - * starts earlier. - */ - xapic_write_reg(APIC_ICR2, icr2_val); - xapic_write_reg(APIC_ICR, icr_val); - data->ipis_sent++; - - /* - * Wait up to ~1 sec for halter to indicate that it has: - * 1. Received the IPI - * 2. Woken up from the halt - * 3. Gone back into halt - * Current CPUs typically run at 2.x Ghz which is ~2 - * billion ticks per second. - */ - tsc_start = rdtsc(); - while (rdtsc() - tsc_start < 2000000000) { - if ((ipis_rcvd != last_ipis_rcvd_count) && - (data->wake_count != last_wake_count) && - (data->hlt_count != last_hlt_count)) - break; - } - - GUEST_ASSERT((ipis_rcvd != last_ipis_rcvd_count) && - (data->wake_count != last_wake_count) && - (data->hlt_count != last_hlt_count)); - - last_wake_count = data->wake_count; - last_hlt_count = data->hlt_count; - last_ipis_rcvd_count = ipis_rcvd; - } -} - -static void *vcpu_thread(void *arg) -{ - struct thread_params *params = (struct thread_params *)arg; - struct kvm_vcpu *vcpu = params->vcpu; - struct ucall uc; - int old; - int r; - - r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old); - TEST_ASSERT(r == 0, - "pthread_setcanceltype failed on vcpu_id=%u with errno=%d", - vcpu->id, r); - - fprintf(stderr, "vCPU thread running vCPU %u\n", vcpu->id); - vcpu_run(vcpu); - - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); - - if (get_ucall(vcpu, &uc) == UCALL_ABORT) { - TEST_ASSERT(false, - "vCPU %u exited with error: %s.\n" - "Sending vCPU sent %lu IPIs to halting vCPU\n" - "Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n" - "Halter TPR=%#x PPR=%#x LVR=%#x\n" - "Migrations attempted: %lu\n" - "Migrations completed: %lu", - vcpu->id, (const char *)uc.args[0], - params->data->ipis_sent, params->data->hlt_count, - params->data->wake_count, - *params->pipis_rcvd, params->data->halter_tpr, - params->data->halter_ppr, params->data->halter_lvr, - params->data->migrations_attempted, - params->data->migrations_completed); - } - - return NULL; -} - -static void cancel_join_vcpu_thread(pthread_t thread, struct kvm_vcpu *vcpu) -{ - void *retval; - int r; - - r = pthread_cancel(thread); - TEST_ASSERT(r == 0, - "pthread_cancel on vcpu_id=%d failed with errno=%d", - vcpu->id, r); - - r = pthread_join(thread, &retval); - TEST_ASSERT(r == 0, - "pthread_join on vcpu_id=%d failed with errno=%d", - vcpu->id, r); - TEST_ASSERT(retval == PTHREAD_CANCELED, - "expected retval=%p, got %p", PTHREAD_CANCELED, - retval); -} - -void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs, - uint64_t *pipis_rcvd) -{ - long pages_not_moved; - unsigned long nodemask = 0; - unsigned long nodemasks[sizeof(nodemask) * 8]; - int nodes = 0; - time_t start_time, last_update, now; - time_t interval_secs = 1; - int i, r; - int from, to; - unsigned long bit; - uint64_t hlt_count; - uint64_t wake_count; - uint64_t ipis_sent; - - fprintf(stderr, "Calling migrate_pages every %d microseconds\n", - delay_usecs); - - /* Get set of first 64 numa nodes available */ - r = get_mempolicy(NULL, &nodemask, sizeof(nodemask) * 8, - 0, MPOL_F_MEMS_ALLOWED); - TEST_ASSERT(r == 0, "get_mempolicy failed errno=%d", errno); - - fprintf(stderr, "Numa nodes found amongst first %lu possible nodes " - "(each 1-bit indicates node is present): %#lx\n", - sizeof(nodemask) * 8, nodemask); - - /* Init array of masks containing a single-bit in each, one for each - * available node. migrate_pages called below requires specifying nodes - * as bit masks. - */ - for (i = 0, bit = 1; i < sizeof(nodemask) * 8; i++, bit <<= 1) { - if (nodemask & bit) { - nodemasks[nodes] = nodemask & bit; - nodes++; - } - } - - TEST_ASSERT(nodes > 1, - "Did not find at least 2 numa nodes. Can't do migration"); - - fprintf(stderr, "Migrating amongst %d nodes found\n", nodes); - - from = 0; - to = 1; - start_time = time(NULL); - last_update = start_time; - - ipis_sent = data->ipis_sent; - hlt_count = data->hlt_count; - wake_count = data->wake_count; - - while ((int)(time(NULL) - start_time) < run_secs) { - data->migrations_attempted++; - - /* - * migrate_pages with PID=0 will migrate all pages of this - * process between the nodes specified as bitmasks. The page - * backing the APIC access address belongs to this process - * because it is allocated by KVM in the context of the - * KVM_CREATE_VCPU ioctl. If that assumption ever changes this - * test may break or give a false positive signal. - */ - pages_not_moved = migrate_pages(0, sizeof(nodemasks[from]), - &nodemasks[from], - &nodemasks[to]); - if (pages_not_moved < 0) - fprintf(stderr, - "migrate_pages failed, errno=%d\n", errno); - else if (pages_not_moved > 0) - fprintf(stderr, - "migrate_pages could not move %ld pages\n", - pages_not_moved); - else - data->migrations_completed++; - - from = to; - to++; - if (to == nodes) - to = 0; - - now = time(NULL); - if (((now - start_time) % interval_secs == 0) && - (now != last_update)) { - last_update = now; - fprintf(stderr, - "%lu seconds: Migrations attempted=%lu completed=%lu, " - "IPIs sent=%lu received=%lu, HLTs=%lu wakes=%lu\n", - now - start_time, data->migrations_attempted, - data->migrations_completed, - data->ipis_sent, *pipis_rcvd, - data->hlt_count, data->wake_count); - - TEST_ASSERT(ipis_sent != data->ipis_sent && - hlt_count != data->hlt_count && - wake_count != data->wake_count, - "IPI, HLT and wake count have not increased " - "in the last %lu seconds. " - "HLTer is likely hung.", interval_secs); - - ipis_sent = data->ipis_sent; - hlt_count = data->hlt_count; - wake_count = data->wake_count; - } - usleep(delay_usecs); - } -} - -void get_cmdline_args(int argc, char *argv[], int *run_secs, - bool *migrate, int *delay_usecs) -{ - for (;;) { - int opt = getopt(argc, argv, "s:d:m"); - - if (opt == -1) - break; - switch (opt) { - case 's': - *run_secs = parse_size(optarg); - break; - case 'm': - *migrate = true; - break; - case 'd': - *delay_usecs = parse_size(optarg); - break; - default: - TEST_ASSERT(false, - "Usage: -s . Default is %d seconds.\n" - "-m adds calls to migrate_pages while vCPUs are running." - " Default is no migrations.\n" - "-d - delay between migrate_pages() calls." - " Default is %d microseconds.", - DEFAULT_RUN_SECS, DEFAULT_DELAY_USECS); - } - } -} - -int main(int argc, char *argv[]) -{ - int r; - int wait_secs; - const int max_halter_wait = 10; - int run_secs = 0; - int delay_usecs = 0; - struct test_data_page *data; - vm_vaddr_t test_data_page_vaddr; - bool migrate = false; - pthread_t threads[2]; - struct thread_params params[2]; - struct kvm_vm *vm; - uint64_t *pipis_rcvd; - - get_cmdline_args(argc, argv, &run_secs, &migrate, &delay_usecs); - if (run_secs <= 0) - run_secs = DEFAULT_RUN_SECS; - if (delay_usecs <= 0) - delay_usecs = DEFAULT_DELAY_USECS; - - vm = vm_create_with_one_vcpu(¶ms[0].vcpu, halter_guest_code); - - vm_install_exception_handler(vm, IPI_VECTOR, guest_ipi_handler); - - virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA); - - params[1].vcpu = vm_vcpu_add(vm, 1, sender_guest_code); - - test_data_page_vaddr = vm_vaddr_alloc_page(vm); - data = addr_gva2hva(vm, test_data_page_vaddr); - memset(data, 0, sizeof(*data)); - params[0].data = data; - params[1].data = data; - - vcpu_args_set(params[0].vcpu, 1, test_data_page_vaddr); - vcpu_args_set(params[1].vcpu, 1, test_data_page_vaddr); - - pipis_rcvd = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ipis_rcvd); - params[0].pipis_rcvd = pipis_rcvd; - params[1].pipis_rcvd = pipis_rcvd; - - /* Start halter vCPU thread and wait for it to execute first HLT. */ - r = pthread_create(&threads[0], NULL, vcpu_thread, ¶ms[0]); - TEST_ASSERT(r == 0, - "pthread_create halter failed errno=%d", errno); - fprintf(stderr, "Halter vCPU thread started\n"); - - wait_secs = 0; - while ((wait_secs < max_halter_wait) && !data->hlt_count) { - sleep(1); - wait_secs++; - } - - TEST_ASSERT(data->hlt_count, - "Halter vCPU did not execute first HLT within %d seconds", - max_halter_wait); - - fprintf(stderr, - "Halter vCPU thread reported its APIC ID: %u after %d seconds.\n", - data->halter_apic_id, wait_secs); - - r = pthread_create(&threads[1], NULL, vcpu_thread, ¶ms[1]); - TEST_ASSERT(r == 0, "pthread_create sender failed errno=%d", errno); - - fprintf(stderr, - "IPI sender vCPU thread started. Letting vCPUs run for %d seconds.\n", - run_secs); - - if (!migrate) - sleep(run_secs); - else - do_migrations(data, run_secs, delay_usecs, pipis_rcvd); - - /* - * Cancel threads and wait for them to stop. - */ - cancel_join_vcpu_thread(threads[0], params[0].vcpu); - cancel_join_vcpu_thread(threads[1], params[1].vcpu); - - fprintf(stderr, - "Test successful after running for %d seconds.\n" - "Sending vCPU sent %lu IPIs to halting vCPU\n" - "Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n" - "Halter APIC ID=%#x\n" - "Sender ICR value=%#x ICR2 value=%#x\n" - "Halter TPR=%#x PPR=%#x LVR=%#x\n" - "Migrations attempted: %lu\n" - "Migrations completed: %lu\n", - run_secs, data->ipis_sent, - data->hlt_count, data->wake_count, *pipis_rcvd, - data->halter_apic_id, - data->icr, data->icr2, - data->halter_tpr, data->halter_ppr, data->halter_lvr, - data->migrations_attempted, data->migrations_completed); - - kvm_vm_free(vm); - - return 0; -} diff --git a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c deleted file mode 100644 index 88bcca188799..000000000000 --- a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c +++ /dev/null @@ -1,262 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -#include -#include -#include -#include -#include - -#include "apic.h" -#include "kvm_util.h" -#include "processor.h" -#include "test_util.h" - -struct xapic_vcpu { - struct kvm_vcpu *vcpu; - bool is_x2apic; - bool has_xavic_errata; -}; - -static void xapic_guest_code(void) -{ - asm volatile("cli"); - - xapic_enable(); - - while (1) { - uint64_t val = (u64)xapic_read_reg(APIC_IRR) | - (u64)xapic_read_reg(APIC_IRR + 0x10) << 32; - - xapic_write_reg(APIC_ICR2, val >> 32); - xapic_write_reg(APIC_ICR, val); - GUEST_SYNC(val); - } -} - -#define X2APIC_RSVD_BITS_MASK (GENMASK_ULL(31, 20) | \ - GENMASK_ULL(17, 16) | \ - GENMASK_ULL(13, 13)) - -static void x2apic_guest_code(void) -{ - asm volatile("cli"); - - x2apic_enable(); - - do { - uint64_t val = x2apic_read_reg(APIC_IRR) | - x2apic_read_reg(APIC_IRR + 0x10) << 32; - - if (val & X2APIC_RSVD_BITS_MASK) { - x2apic_write_reg_fault(APIC_ICR, val); - } else { - x2apic_write_reg(APIC_ICR, val); - GUEST_ASSERT_EQ(x2apic_read_reg(APIC_ICR), val); - } - GUEST_SYNC(val); - } while (1); -} - -static void ____test_icr(struct xapic_vcpu *x, uint64_t val) -{ - struct kvm_vcpu *vcpu = x->vcpu; - struct kvm_lapic_state xapic; - struct ucall uc; - uint64_t icr; - - /* - * Tell the guest what ICR value to write. Use the IRR to pass info, - * all bits are valid and should not be modified by KVM (ignoring the - * fact that vectors 0-15 are technically illegal). - */ - vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic); - *((u32 *)&xapic.regs[APIC_IRR]) = val; - *((u32 *)&xapic.regs[APIC_IRR + 0x10]) = val >> 32; - vcpu_ioctl(vcpu, KVM_SET_LAPIC, &xapic); - - vcpu_run(vcpu); - TEST_ASSERT_EQ(get_ucall(vcpu, &uc), UCALL_SYNC); - TEST_ASSERT_EQ(uc.args[1], val); - - vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic); - icr = (u64)(*((u32 *)&xapic.regs[APIC_ICR])) | - (u64)(*((u32 *)&xapic.regs[APIC_ICR2])) << 32; - if (!x->is_x2apic) { - if (!x->has_xavic_errata) - val &= (-1u | (0xffull << (32 + 24))); - } else if (val & X2APIC_RSVD_BITS_MASK) { - return; - } - - if (x->has_xavic_errata) - TEST_ASSERT_EQ(icr & ~APIC_ICR_BUSY, val & ~APIC_ICR_BUSY); - else - TEST_ASSERT_EQ(icr, val & ~APIC_ICR_BUSY); -} - -static void __test_icr(struct xapic_vcpu *x, uint64_t val) -{ - /* - * The BUSY bit is reserved on both AMD and Intel, but only AMD treats - * it is as _must_ be zero. Intel simply ignores the bit. Don't test - * the BUSY bit for x2APIC, as there is no single correct behavior. - */ - if (!x->is_x2apic) - ____test_icr(x, val | APIC_ICR_BUSY); - - ____test_icr(x, val & ~(u64)APIC_ICR_BUSY); -} - -static void test_icr(struct xapic_vcpu *x) -{ - struct kvm_vcpu *vcpu = x->vcpu; - uint64_t icr, i, j; - - icr = APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_FIXED; - for (i = 0; i <= 0xff; i++) - __test_icr(x, icr | i); - - icr = APIC_INT_ASSERT | APIC_DM_FIXED; - for (i = 0; i <= 0xff; i++) - __test_icr(x, icr | i); - - /* - * Send all flavors of IPIs to non-existent vCPUs. TODO: use number of - * vCPUs, not vcpu.id + 1. Arbitrarily use vector 0xff. - */ - icr = APIC_INT_ASSERT | 0xff; - for (i = 0; i < 0xff; i++) { - if (i == vcpu->id) - continue; - for (j = 0; j < 8; j++) - __test_icr(x, i << (32 + 24) | icr | (j << 8)); - } - - /* And again with a shorthand destination for all types of IPIs. */ - icr = APIC_DEST_ALLBUT | APIC_INT_ASSERT; - for (i = 0; i < 8; i++) - __test_icr(x, icr | (i << 8)); - - /* And a few garbage value, just make sure it's an IRQ (blocked). */ - __test_icr(x, 0xa5a5a5a5a5a5a5a5 & ~APIC_DM_FIXED_MASK); - __test_icr(x, 0x5a5a5a5a5a5a5a5a & ~APIC_DM_FIXED_MASK); - __test_icr(x, -1ull & ~APIC_DM_FIXED_MASK); -} - -static void __test_apic_id(struct kvm_vcpu *vcpu, uint64_t apic_base) -{ - uint32_t apic_id, expected; - struct kvm_lapic_state xapic; - - vcpu_set_msr(vcpu, MSR_IA32_APICBASE, apic_base); - - vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic); - - expected = apic_base & X2APIC_ENABLE ? vcpu->id : vcpu->id << 24; - apic_id = *((u32 *)&xapic.regs[APIC_ID]); - - TEST_ASSERT(apic_id == expected, - "APIC_ID not set back to %s format; wanted = %x, got = %x", - (apic_base & X2APIC_ENABLE) ? "x2APIC" : "xAPIC", - expected, apic_id); -} - -/* - * Verify that KVM switches the APIC_ID between xAPIC and x2APIC when userspace - * stuffs MSR_IA32_APICBASE. Setting the APIC_ID when x2APIC is enabled and - * when the APIC transitions for DISABLED to ENABLED is architectural behavior - * (on Intel), whereas the x2APIC => xAPIC transition behavior is KVM ABI since - * attempted to transition from x2APIC to xAPIC without disabling the APIC is - * architecturally disallowed. - */ -static void test_apic_id(void) -{ - const uint32_t NR_VCPUS = 3; - struct kvm_vcpu *vcpus[NR_VCPUS]; - uint64_t apic_base; - struct kvm_vm *vm; - int i; - - vm = vm_create_with_vcpus(NR_VCPUS, NULL, vcpus); - vm_enable_cap(vm, KVM_CAP_X2APIC_API, KVM_X2APIC_API_USE_32BIT_IDS); - - for (i = 0; i < NR_VCPUS; i++) { - apic_base = vcpu_get_msr(vcpus[i], MSR_IA32_APICBASE); - - TEST_ASSERT(apic_base & MSR_IA32_APICBASE_ENABLE, - "APIC not in ENABLED state at vCPU RESET"); - TEST_ASSERT(!(apic_base & X2APIC_ENABLE), - "APIC not in xAPIC mode at vCPU RESET"); - - __test_apic_id(vcpus[i], apic_base); - __test_apic_id(vcpus[i], apic_base | X2APIC_ENABLE); - __test_apic_id(vcpus[i], apic_base); - } - - kvm_vm_free(vm); -} - -static void test_x2apic_id(void) -{ - struct kvm_lapic_state lapic = {}; - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - int i; - - vm = vm_create_with_one_vcpu(&vcpu, NULL); - vcpu_set_msr(vcpu, MSR_IA32_APICBASE, MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE); - - /* - * Try stuffing a modified x2APIC ID, KVM should ignore the value and - * always return the vCPU's default/readonly x2APIC ID. - */ - for (i = 0; i <= 0xff; i++) { - *(u32 *)(lapic.regs + APIC_ID) = i << 24; - *(u32 *)(lapic.regs + APIC_SPIV) = APIC_SPIV_APIC_ENABLED; - vcpu_ioctl(vcpu, KVM_SET_LAPIC, &lapic); - - vcpu_ioctl(vcpu, KVM_GET_LAPIC, &lapic); - TEST_ASSERT(*((u32 *)&lapic.regs[APIC_ID]) == vcpu->id << 24, - "x2APIC ID should be fully readonly"); - } - - kvm_vm_free(vm); -} - -int main(int argc, char *argv[]) -{ - struct xapic_vcpu x = { - .vcpu = NULL, - .is_x2apic = true, - }; - struct kvm_vm *vm; - - vm = vm_create_with_one_vcpu(&x.vcpu, x2apic_guest_code); - test_icr(&x); - kvm_vm_free(vm); - - /* - * Use a second VM for the xAPIC test so that x2APIC can be hidden from - * the guest in order to test AVIC. KVM disallows changing CPUID after - * KVM_RUN and AVIC is disabled if _any_ vCPU is allowed to use x2APIC. - */ - vm = vm_create_with_one_vcpu(&x.vcpu, xapic_guest_code); - x.is_x2apic = false; - - /* - * AMD's AVIC implementation is buggy (fails to clear the ICR BUSY bit), - * and also diverges from KVM with respect to ICR2[23:0] (KVM and Intel - * drops writes, AMD does not). Account for the errata when checking - * that KVM reads back what was written. - */ - x.has_xavic_errata = host_cpu_is_amd && - get_kvm_amd_param_bool("avic"); - - vcpu_clear_cpuid_feature(x.vcpu, X86_FEATURE_X2APIC); - - virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA); - test_icr(&x); - kvm_vm_free(vm); - - test_apic_id(); - test_x2apic_id(); -} diff --git a/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c b/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c deleted file mode 100644 index c8a5c5e51661..000000000000 --- a/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c +++ /dev/null @@ -1,139 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * XCR0 cpuid test - * - * Copyright (C) 2022, Google LLC. - */ -#include -#include -#include -#include -#include - -#include "test_util.h" - -#include "kvm_util.h" -#include "processor.h" - -/* - * Assert that architectural dependency rules are satisfied, e.g. that AVX is - * supported if and only if SSE is supported. - */ -#define ASSERT_XFEATURE_DEPENDENCIES(supported_xcr0, xfeatures, dependencies) \ -do { \ - uint64_t __supported = (supported_xcr0) & ((xfeatures) | (dependencies)); \ - \ - __GUEST_ASSERT((__supported & (xfeatures)) != (xfeatures) || \ - __supported == ((xfeatures) | (dependencies)), \ - "supported = 0x%lx, xfeatures = 0x%llx, dependencies = 0x%llx", \ - __supported, (xfeatures), (dependencies)); \ -} while (0) - -/* - * Assert that KVM reports a sane, usable as-is XCR0. Architecturally, a CPU - * isn't strictly required to _support_ all XFeatures related to a feature, but - * at the same time XSETBV will #GP if bundled XFeatures aren't enabled and - * disabled coherently. E.g. a CPU can technically enumerate supported for - * XTILE_CFG but not XTILE_DATA, but attempting to enable XTILE_CFG without - * XTILE_DATA will #GP. - */ -#define ASSERT_ALL_OR_NONE_XFEATURE(supported_xcr0, xfeatures) \ -do { \ - uint64_t __supported = (supported_xcr0) & (xfeatures); \ - \ - __GUEST_ASSERT(!__supported || __supported == (xfeatures), \ - "supported = 0x%lx, xfeatures = 0x%llx", \ - __supported, (xfeatures)); \ -} while (0) - -static void guest_code(void) -{ - uint64_t initial_xcr0; - uint64_t supported_xcr0; - int i, vector; - - set_cr4(get_cr4() | X86_CR4_OSXSAVE); - - initial_xcr0 = xgetbv(0); - supported_xcr0 = this_cpu_supported_xcr0(); - - GUEST_ASSERT(initial_xcr0 == supported_xcr0); - - /* Check AVX */ - ASSERT_XFEATURE_DEPENDENCIES(supported_xcr0, - XFEATURE_MASK_YMM, - XFEATURE_MASK_SSE); - - /* Check MPX */ - ASSERT_ALL_OR_NONE_XFEATURE(supported_xcr0, - XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR); - - /* Check AVX-512 */ - ASSERT_XFEATURE_DEPENDENCIES(supported_xcr0, - XFEATURE_MASK_AVX512, - XFEATURE_MASK_SSE | XFEATURE_MASK_YMM); - ASSERT_ALL_OR_NONE_XFEATURE(supported_xcr0, - XFEATURE_MASK_AVX512); - - /* Check AMX */ - ASSERT_ALL_OR_NONE_XFEATURE(supported_xcr0, - XFEATURE_MASK_XTILE); - - vector = xsetbv_safe(0, XFEATURE_MASK_FP); - __GUEST_ASSERT(!vector, - "Expected success on XSETBV(FP), got vector '0x%x'", - vector); - - vector = xsetbv_safe(0, supported_xcr0); - __GUEST_ASSERT(!vector, - "Expected success on XSETBV(0x%lx), got vector '0x%x'", - supported_xcr0, vector); - - for (i = 0; i < 64; i++) { - if (supported_xcr0 & BIT_ULL(i)) - continue; - - vector = xsetbv_safe(0, supported_xcr0 | BIT_ULL(i)); - __GUEST_ASSERT(vector == GP_VECTOR, - "Expected #GP on XSETBV(0x%llx), supported XCR0 = %lx, got vector '0x%x'", - BIT_ULL(i), supported_xcr0, vector); - } - - GUEST_DONE(); -} - -int main(int argc, char *argv[]) -{ - struct kvm_vcpu *vcpu; - struct kvm_run *run; - struct kvm_vm *vm; - struct ucall uc; - - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVE)); - - vm = vm_create_with_one_vcpu(&vcpu, guest_code); - run = vcpu->run; - - while (1) { - vcpu_run(vcpu); - - TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, - "Unexpected exit reason: %u (%s),", - run->exit_reason, - exit_reason_str(run->exit_reason)); - - switch (get_ucall(vcpu, &uc)) { - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - break; - case UCALL_DONE: - goto done; - default: - TEST_FAIL("Unknown ucall %lu", uc.cmd); - } - } - -done: - kvm_vm_free(vm); - return 0; -} diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c deleted file mode 100644 index a59b3c799bb2..000000000000 --- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c +++ /dev/null @@ -1,1161 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright © 2021 Amazon.com, Inc. or its affiliates. - */ - -#include "test_util.h" -#include "kvm_util.h" -#include "processor.h" - -#include -#include -#include -#include -#include - -#include - -#define SHINFO_REGION_GVA 0xc0000000ULL -#define SHINFO_REGION_GPA 0xc0000000ULL -#define SHINFO_REGION_SLOT 10 - -#define DUMMY_REGION_GPA (SHINFO_REGION_GPA + (3 * PAGE_SIZE)) -#define DUMMY_REGION_SLOT 11 - -#define DUMMY_REGION_GPA_2 (SHINFO_REGION_GPA + (4 * PAGE_SIZE)) -#define DUMMY_REGION_SLOT_2 12 - -#define SHINFO_ADDR (SHINFO_REGION_GPA) -#define VCPU_INFO_ADDR (SHINFO_REGION_GPA + 0x40) -#define PVTIME_ADDR (SHINFO_REGION_GPA + PAGE_SIZE) -#define RUNSTATE_ADDR (SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE - 15) - -#define SHINFO_VADDR (SHINFO_REGION_GVA) -#define VCPU_INFO_VADDR (SHINFO_REGION_GVA + 0x40) -#define RUNSTATE_VADDR (SHINFO_REGION_GVA + PAGE_SIZE + PAGE_SIZE - 15) - -#define EVTCHN_VECTOR 0x10 - -#define EVTCHN_TEST1 15 -#define EVTCHN_TEST2 66 -#define EVTCHN_TIMER 13 - -enum { - TEST_INJECT_VECTOR = 0, - TEST_RUNSTATE_runnable, - TEST_RUNSTATE_blocked, - TEST_RUNSTATE_offline, - TEST_RUNSTATE_ADJUST, - TEST_RUNSTATE_DATA, - TEST_STEAL_TIME, - TEST_EVTCHN_MASKED, - TEST_EVTCHN_UNMASKED, - TEST_EVTCHN_SLOWPATH, - TEST_EVTCHN_SEND_IOCTL, - TEST_EVTCHN_HCALL, - TEST_EVTCHN_HCALL_SLOWPATH, - TEST_EVTCHN_HCALL_EVENTFD, - TEST_TIMER_SETUP, - TEST_TIMER_WAIT, - TEST_TIMER_RESTORE, - TEST_POLL_READY, - TEST_POLL_TIMEOUT, - TEST_POLL_MASKED, - TEST_POLL_WAKE, - SET_VCPU_INFO, - TEST_TIMER_PAST, - TEST_LOCKING_SEND_RACE, - TEST_LOCKING_POLL_RACE, - TEST_LOCKING_POLL_TIMEOUT, - TEST_DONE, - - TEST_GUEST_SAW_IRQ, -}; - -#define XEN_HYPERCALL_MSR 0x40000000 - -#define MIN_STEAL_TIME 50000 - -#define SHINFO_RACE_TIMEOUT 2 /* seconds */ - -#define __HYPERVISOR_set_timer_op 15 -#define __HYPERVISOR_sched_op 29 -#define __HYPERVISOR_event_channel_op 32 - -#define SCHEDOP_poll 3 - -#define EVTCHNOP_send 4 - -#define EVTCHNSTAT_interdomain 2 - -struct evtchn_send { - u32 port; -}; - -struct sched_poll { - u32 *ports; - unsigned int nr_ports; - u64 timeout; -}; - -struct pvclock_vcpu_time_info { - u32 version; - u32 pad0; - u64 tsc_timestamp; - u64 system_time; - u32 tsc_to_system_mul; - s8 tsc_shift; - u8 flags; - u8 pad[2]; -} __attribute__((__packed__)); /* 32 bytes */ - -struct pvclock_wall_clock { - u32 version; - u32 sec; - u32 nsec; -} __attribute__((__packed__)); - -struct vcpu_runstate_info { - uint32_t state; - uint64_t state_entry_time; - uint64_t time[5]; /* Extra field for overrun check */ -}; - -struct compat_vcpu_runstate_info { - uint32_t state; - uint64_t state_entry_time; - uint64_t time[5]; -} __attribute__((__packed__)); - -struct arch_vcpu_info { - unsigned long cr2; - unsigned long pad; /* sizeof(vcpu_info_t) == 64 */ -}; - -struct vcpu_info { - uint8_t evtchn_upcall_pending; - uint8_t evtchn_upcall_mask; - unsigned long evtchn_pending_sel; - struct arch_vcpu_info arch; - struct pvclock_vcpu_time_info time; -}; /* 64 bytes (x86) */ - -struct shared_info { - struct vcpu_info vcpu_info[32]; - unsigned long evtchn_pending[64]; - unsigned long evtchn_mask[64]; - struct pvclock_wall_clock wc; - uint32_t wc_sec_hi; - /* arch_shared_info here */ -}; - -#define RUNSTATE_running 0 -#define RUNSTATE_runnable 1 -#define RUNSTATE_blocked 2 -#define RUNSTATE_offline 3 - -static const char *runstate_names[] = { - "running", - "runnable", - "blocked", - "offline" -}; - -struct { - struct kvm_irq_routing info; - struct kvm_irq_routing_entry entries[2]; -} irq_routes; - -static volatile bool guest_saw_irq; - -static void evtchn_handler(struct ex_regs *regs) -{ - struct vcpu_info *vi = (void *)VCPU_INFO_VADDR; - - vcpu_arch_put_guest(vi->evtchn_upcall_pending, 0); - vcpu_arch_put_guest(vi->evtchn_pending_sel, 0); - guest_saw_irq = true; - - GUEST_SYNC(TEST_GUEST_SAW_IRQ); -} - -static void guest_wait_for_irq(void) -{ - while (!guest_saw_irq) - __asm__ __volatile__ ("rep nop" : : : "memory"); - guest_saw_irq = false; -} - -static void guest_code(void) -{ - struct vcpu_runstate_info *rs = (void *)RUNSTATE_VADDR; - int i; - - __asm__ __volatile__( - "sti\n" - "nop\n" - ); - - /* Trigger an interrupt injection */ - GUEST_SYNC(TEST_INJECT_VECTOR); - - guest_wait_for_irq(); - - /* Test having the host set runstates manually */ - GUEST_SYNC(TEST_RUNSTATE_runnable); - GUEST_ASSERT(rs->time[RUNSTATE_runnable] != 0); - GUEST_ASSERT(rs->state == 0); - - GUEST_SYNC(TEST_RUNSTATE_blocked); - GUEST_ASSERT(rs->time[RUNSTATE_blocked] != 0); - GUEST_ASSERT(rs->state == 0); - - GUEST_SYNC(TEST_RUNSTATE_offline); - GUEST_ASSERT(rs->time[RUNSTATE_offline] != 0); - GUEST_ASSERT(rs->state == 0); - - /* Test runstate time adjust */ - GUEST_SYNC(TEST_RUNSTATE_ADJUST); - GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x5a); - GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x6b6b); - - /* Test runstate time set */ - GUEST_SYNC(TEST_RUNSTATE_DATA); - GUEST_ASSERT(rs->state_entry_time >= 0x8000); - GUEST_ASSERT(rs->time[RUNSTATE_runnable] == 0); - GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x6b6b); - GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x5a); - - /* sched_yield() should result in some 'runnable' time */ - GUEST_SYNC(TEST_STEAL_TIME); - GUEST_ASSERT(rs->time[RUNSTATE_runnable] >= MIN_STEAL_TIME); - - /* Attempt to deliver a *masked* interrupt */ - GUEST_SYNC(TEST_EVTCHN_MASKED); - - /* Wait until we see the bit set */ - struct shared_info *si = (void *)SHINFO_VADDR; - while (!si->evtchn_pending[0]) - __asm__ __volatile__ ("rep nop" : : : "memory"); - - /* Now deliver an *unmasked* interrupt */ - GUEST_SYNC(TEST_EVTCHN_UNMASKED); - - guest_wait_for_irq(); - - /* Change memslots and deliver an interrupt */ - GUEST_SYNC(TEST_EVTCHN_SLOWPATH); - - guest_wait_for_irq(); - - /* Deliver event channel with KVM_XEN_HVM_EVTCHN_SEND */ - GUEST_SYNC(TEST_EVTCHN_SEND_IOCTL); - - guest_wait_for_irq(); - - GUEST_SYNC(TEST_EVTCHN_HCALL); - - /* Our turn. Deliver event channel (to ourselves) with - * EVTCHNOP_send hypercall. */ - struct evtchn_send s = { .port = 127 }; - xen_hypercall(__HYPERVISOR_event_channel_op, EVTCHNOP_send, &s); - - guest_wait_for_irq(); - - GUEST_SYNC(TEST_EVTCHN_HCALL_SLOWPATH); - - /* - * Same again, but this time the host has messed with memslots so it - * should take the slow path in kvm_xen_set_evtchn(). - */ - xen_hypercall(__HYPERVISOR_event_channel_op, EVTCHNOP_send, &s); - - guest_wait_for_irq(); - - GUEST_SYNC(TEST_EVTCHN_HCALL_EVENTFD); - - /* Deliver "outbound" event channel to an eventfd which - * happens to be one of our own irqfds. */ - s.port = 197; - xen_hypercall(__HYPERVISOR_event_channel_op, EVTCHNOP_send, &s); - - guest_wait_for_irq(); - - GUEST_SYNC(TEST_TIMER_SETUP); - - /* Set a timer 100ms in the future. */ - xen_hypercall(__HYPERVISOR_set_timer_op, - rs->state_entry_time + 100000000, NULL); - - GUEST_SYNC(TEST_TIMER_WAIT); - - /* Now wait for the timer */ - guest_wait_for_irq(); - - GUEST_SYNC(TEST_TIMER_RESTORE); - - /* The host has 'restored' the timer. Just wait for it. */ - guest_wait_for_irq(); - - GUEST_SYNC(TEST_POLL_READY); - - /* Poll for an event channel port which is already set */ - u32 ports[1] = { EVTCHN_TIMER }; - struct sched_poll p = { - .ports = ports, - .nr_ports = 1, - .timeout = 0, - }; - - xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p); - - GUEST_SYNC(TEST_POLL_TIMEOUT); - - /* Poll for an unset port and wait for the timeout. */ - p.timeout = 100000000; - xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p); - - GUEST_SYNC(TEST_POLL_MASKED); - - /* A timer will wake the masked port we're waiting on, while we poll */ - p.timeout = 0; - xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p); - - GUEST_SYNC(TEST_POLL_WAKE); - - /* Set the vcpu_info to point at exactly the place it already is to - * make sure the attribute is functional. */ - GUEST_SYNC(SET_VCPU_INFO); - - /* A timer wake an *unmasked* port which should wake us with an - * actual interrupt, while we're polling on a different port. */ - ports[0]++; - p.timeout = 0; - xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p); - - guest_wait_for_irq(); - - GUEST_SYNC(TEST_TIMER_PAST); - - /* Timer should have fired already */ - guest_wait_for_irq(); - - GUEST_SYNC(TEST_LOCKING_SEND_RACE); - /* Racing host ioctls */ - - guest_wait_for_irq(); - - GUEST_SYNC(TEST_LOCKING_POLL_RACE); - /* Racing vmcall against host ioctl */ - - ports[0] = 0; - - p = (struct sched_poll) { - .ports = ports, - .nr_ports = 1, - .timeout = 0 - }; - -wait_for_timer: - /* - * Poll for a timer wake event while the worker thread is mucking with - * the shared info. KVM XEN drops timer IRQs if the shared info is - * invalid when the timer expires. Arbitrarily poll 100 times before - * giving up and asking the VMM to re-arm the timer. 100 polls should - * consume enough time to beat on KVM without taking too long if the - * timer IRQ is dropped due to an invalid event channel. - */ - for (i = 0; i < 100 && !guest_saw_irq; i++) - __xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p); - - /* - * Re-send the timer IRQ if it was (likely) dropped due to the timer - * expiring while the event channel was invalid. - */ - if (!guest_saw_irq) { - GUEST_SYNC(TEST_LOCKING_POLL_TIMEOUT); - goto wait_for_timer; - } - guest_saw_irq = false; - - GUEST_SYNC(TEST_DONE); -} - -static struct shared_info *shinfo; -static struct vcpu_info *vinfo; -static struct kvm_vcpu *vcpu; - -static void handle_alrm(int sig) -{ - if (vinfo) - printf("evtchn_upcall_pending 0x%x\n", vinfo->evtchn_upcall_pending); - vcpu_dump(stdout, vcpu, 0); - TEST_FAIL("IRQ delivery timed out"); -} - -static void *juggle_shinfo_state(void *arg) -{ - struct kvm_vm *vm = (struct kvm_vm *)arg; - - struct kvm_xen_hvm_attr cache_activate_gfn = { - .type = KVM_XEN_ATTR_TYPE_SHARED_INFO, - .u.shared_info.gfn = SHINFO_REGION_GPA / PAGE_SIZE - }; - - struct kvm_xen_hvm_attr cache_deactivate_gfn = { - .type = KVM_XEN_ATTR_TYPE_SHARED_INFO, - .u.shared_info.gfn = KVM_XEN_INVALID_GFN - }; - - struct kvm_xen_hvm_attr cache_activate_hva = { - .type = KVM_XEN_ATTR_TYPE_SHARED_INFO_HVA, - .u.shared_info.hva = (unsigned long)shinfo - }; - - struct kvm_xen_hvm_attr cache_deactivate_hva = { - .type = KVM_XEN_ATTR_TYPE_SHARED_INFO, - .u.shared_info.hva = 0 - }; - - int xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM); - - for (;;) { - __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_activate_gfn); - pthread_testcancel(); - __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_deactivate_gfn); - - if (xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA) { - __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_activate_hva); - pthread_testcancel(); - __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_deactivate_hva); - } - } - - return NULL; -} - -int main(int argc, char *argv[]) -{ - struct kvm_xen_hvm_attr evt_reset; - struct kvm_vm *vm; - pthread_t thread; - bool verbose; - int ret; - - verbose = argc > 1 && (!strncmp(argv[1], "-v", 3) || - !strncmp(argv[1], "--verbose", 10)); - - int xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM); - TEST_REQUIRE(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO); - - bool do_runstate_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE); - bool do_runstate_flag = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG); - bool do_eventfd_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL); - bool do_evtchn_tests = do_eventfd_tests && !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND); - bool has_shinfo_hva = !!(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA); - - vm = vm_create_with_one_vcpu(&vcpu, guest_code); - - /* Map a region for the shared_info page */ - vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, - SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 3, 0); - virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 3); - - shinfo = addr_gpa2hva(vm, SHINFO_VADDR); - - int zero_fd = open("/dev/zero", O_RDONLY); - TEST_ASSERT(zero_fd != -1, "Failed to open /dev/zero"); - - struct kvm_xen_hvm_config hvmc = { - .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL, - .msr = XEN_HYPERCALL_MSR, - }; - - /* Let the kernel know that we *will* use it for sending all - * event channels, which lets it intercept SCHEDOP_poll */ - if (do_evtchn_tests) - hvmc.flags |= KVM_XEN_HVM_CONFIG_EVTCHN_SEND; - - vm_ioctl(vm, KVM_XEN_HVM_CONFIG, &hvmc); - - struct kvm_xen_hvm_attr lm = { - .type = KVM_XEN_ATTR_TYPE_LONG_MODE, - .u.long_mode = 1, - }; - vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm); - - if (do_runstate_flag) { - struct kvm_xen_hvm_attr ruf = { - .type = KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG, - .u.runstate_update_flag = 1, - }; - vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ruf); - - ruf.u.runstate_update_flag = 0; - vm_ioctl(vm, KVM_XEN_HVM_GET_ATTR, &ruf); - TEST_ASSERT(ruf.u.runstate_update_flag == 1, - "Failed to read back RUNSTATE_UPDATE_FLAG attr"); - } - - struct kvm_xen_hvm_attr ha = {}; - - if (has_shinfo_hva) { - ha.type = KVM_XEN_ATTR_TYPE_SHARED_INFO_HVA; - ha.u.shared_info.hva = (unsigned long)shinfo; - } else { - ha.type = KVM_XEN_ATTR_TYPE_SHARED_INFO; - ha.u.shared_info.gfn = SHINFO_ADDR / PAGE_SIZE; - } - - vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ha); - - /* - * Test what happens when the HVA of the shinfo page is remapped after - * the kernel has a reference to it. But make sure we copy the clock - * info over since that's only set at setup time, and we test it later. - */ - struct pvclock_wall_clock wc_copy = shinfo->wc; - void *m = mmap(shinfo, PAGE_SIZE, PROT_READ|PROT_WRITE, MAP_FIXED|MAP_PRIVATE, zero_fd, 0); - TEST_ASSERT(m == shinfo, "Failed to map /dev/zero over shared info"); - shinfo->wc = wc_copy; - - struct kvm_xen_vcpu_attr vi = { - .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO, - .u.gpa = VCPU_INFO_ADDR, - }; - vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &vi); - - struct kvm_xen_vcpu_attr pvclock = { - .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO, - .u.gpa = PVTIME_ADDR, - }; - vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &pvclock); - - struct kvm_xen_hvm_attr vec = { - .type = KVM_XEN_ATTR_TYPE_UPCALL_VECTOR, - .u.vector = EVTCHN_VECTOR, - }; - vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &vec); - - vm_install_exception_handler(vm, EVTCHN_VECTOR, evtchn_handler); - - if (do_runstate_tests) { - struct kvm_xen_vcpu_attr st = { - .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR, - .u.gpa = RUNSTATE_ADDR, - }; - vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &st); - } - - int irq_fd[2] = { -1, -1 }; - - if (do_eventfd_tests) { - irq_fd[0] = eventfd(0, 0); - irq_fd[1] = eventfd(0, 0); - - /* Unexpected, but not a KVM failure */ - if (irq_fd[0] == -1 || irq_fd[1] == -1) - do_evtchn_tests = do_eventfd_tests = false; - } - - if (do_eventfd_tests) { - irq_routes.info.nr = 2; - - irq_routes.entries[0].gsi = 32; - irq_routes.entries[0].type = KVM_IRQ_ROUTING_XEN_EVTCHN; - irq_routes.entries[0].u.xen_evtchn.port = EVTCHN_TEST1; - irq_routes.entries[0].u.xen_evtchn.vcpu = vcpu->id; - irq_routes.entries[0].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL; - - irq_routes.entries[1].gsi = 33; - irq_routes.entries[1].type = KVM_IRQ_ROUTING_XEN_EVTCHN; - irq_routes.entries[1].u.xen_evtchn.port = EVTCHN_TEST2; - irq_routes.entries[1].u.xen_evtchn.vcpu = vcpu->id; - irq_routes.entries[1].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL; - - vm_ioctl(vm, KVM_SET_GSI_ROUTING, &irq_routes.info); - - struct kvm_irqfd ifd = { }; - - ifd.fd = irq_fd[0]; - ifd.gsi = 32; - vm_ioctl(vm, KVM_IRQFD, &ifd); - - ifd.fd = irq_fd[1]; - ifd.gsi = 33; - vm_ioctl(vm, KVM_IRQFD, &ifd); - - struct sigaction sa = { }; - sa.sa_handler = handle_alrm; - sigaction(SIGALRM, &sa, NULL); - } - - struct kvm_xen_vcpu_attr tmr = { - .type = KVM_XEN_VCPU_ATTR_TYPE_TIMER, - .u.timer.port = EVTCHN_TIMER, - .u.timer.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL, - .u.timer.expires_ns = 0 - }; - - if (do_evtchn_tests) { - struct kvm_xen_hvm_attr inj = { - .type = KVM_XEN_ATTR_TYPE_EVTCHN, - .u.evtchn.send_port = 127, - .u.evtchn.type = EVTCHNSTAT_interdomain, - .u.evtchn.flags = 0, - .u.evtchn.deliver.port.port = EVTCHN_TEST1, - .u.evtchn.deliver.port.vcpu = vcpu->id + 1, - .u.evtchn.deliver.port.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL, - }; - vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj); - - /* Test migration to a different vCPU */ - inj.u.evtchn.flags = KVM_XEN_EVTCHN_UPDATE; - inj.u.evtchn.deliver.port.vcpu = vcpu->id; - vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj); - - inj.u.evtchn.send_port = 197; - inj.u.evtchn.deliver.eventfd.port = 0; - inj.u.evtchn.deliver.eventfd.fd = irq_fd[1]; - inj.u.evtchn.flags = 0; - vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj); - - vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr); - } - vinfo = addr_gpa2hva(vm, VCPU_INFO_VADDR); - vinfo->evtchn_upcall_pending = 0; - - struct vcpu_runstate_info *rs = addr_gpa2hva(vm, RUNSTATE_ADDR); - rs->state = 0x5a; - - bool evtchn_irq_expected = false; - - for (;;) { - struct ucall uc; - - vcpu_run(vcpu); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); - - switch (get_ucall(vcpu, &uc)) { - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - /* NOT REACHED */ - case UCALL_SYNC: { - struct kvm_xen_vcpu_attr rst; - long rundelay; - - if (do_runstate_tests) - TEST_ASSERT(rs->state_entry_time == rs->time[0] + - rs->time[1] + rs->time[2] + rs->time[3], - "runstate times don't add up"); - - switch (uc.args[1]) { - case TEST_INJECT_VECTOR: - if (verbose) - printf("Delivering evtchn upcall\n"); - evtchn_irq_expected = true; - vinfo->evtchn_upcall_pending = 1; - break; - - case TEST_RUNSTATE_runnable...TEST_RUNSTATE_offline: - TEST_ASSERT(!evtchn_irq_expected, "Event channel IRQ not seen"); - if (!do_runstate_tests) - goto done; - if (verbose) - printf("Testing runstate %s\n", runstate_names[uc.args[1]]); - rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT; - rst.u.runstate.state = uc.args[1] + RUNSTATE_runnable - - TEST_RUNSTATE_runnable; - vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst); - break; - - case TEST_RUNSTATE_ADJUST: - if (verbose) - printf("Testing RUNSTATE_ADJUST\n"); - rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST; - memset(&rst.u, 0, sizeof(rst.u)); - rst.u.runstate.state = (uint64_t)-1; - rst.u.runstate.time_blocked = - 0x5a - rs->time[RUNSTATE_blocked]; - rst.u.runstate.time_offline = - 0x6b6b - rs->time[RUNSTATE_offline]; - rst.u.runstate.time_runnable = -rst.u.runstate.time_blocked - - rst.u.runstate.time_offline; - vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst); - break; - - case TEST_RUNSTATE_DATA: - if (verbose) - printf("Testing RUNSTATE_DATA\n"); - rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA; - memset(&rst.u, 0, sizeof(rst.u)); - rst.u.runstate.state = RUNSTATE_running; - rst.u.runstate.state_entry_time = 0x6b6b + 0x5a; - rst.u.runstate.time_blocked = 0x6b6b; - rst.u.runstate.time_offline = 0x5a; - vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst); - break; - - case TEST_STEAL_TIME: - if (verbose) - printf("Testing steal time\n"); - /* Yield until scheduler delay exceeds target */ - rundelay = get_run_delay() + MIN_STEAL_TIME; - do { - sched_yield(); - } while (get_run_delay() < rundelay); - break; - - case TEST_EVTCHN_MASKED: - if (!do_eventfd_tests) - goto done; - if (verbose) - printf("Testing masked event channel\n"); - shinfo->evtchn_mask[0] = 1UL << EVTCHN_TEST1; - eventfd_write(irq_fd[0], 1UL); - alarm(1); - break; - - case TEST_EVTCHN_UNMASKED: - if (verbose) - printf("Testing unmasked event channel\n"); - /* Unmask that, but deliver the other one */ - shinfo->evtchn_pending[0] = 0; - shinfo->evtchn_mask[0] = 0; - eventfd_write(irq_fd[1], 1UL); - evtchn_irq_expected = true; - alarm(1); - break; - - case TEST_EVTCHN_SLOWPATH: - TEST_ASSERT(!evtchn_irq_expected, - "Expected event channel IRQ but it didn't happen"); - shinfo->evtchn_pending[1] = 0; - if (verbose) - printf("Testing event channel after memslot change\n"); - vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, - DUMMY_REGION_GPA, DUMMY_REGION_SLOT, 1, 0); - eventfd_write(irq_fd[0], 1UL); - evtchn_irq_expected = true; - alarm(1); - break; - - case TEST_EVTCHN_SEND_IOCTL: - TEST_ASSERT(!evtchn_irq_expected, - "Expected event channel IRQ but it didn't happen"); - if (!do_evtchn_tests) - goto done; - - shinfo->evtchn_pending[0] = 0; - if (verbose) - printf("Testing injection with KVM_XEN_HVM_EVTCHN_SEND\n"); - - struct kvm_irq_routing_xen_evtchn e; - e.port = EVTCHN_TEST2; - e.vcpu = vcpu->id; - e.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL; - - vm_ioctl(vm, KVM_XEN_HVM_EVTCHN_SEND, &e); - evtchn_irq_expected = true; - alarm(1); - break; - - case TEST_EVTCHN_HCALL: - TEST_ASSERT(!evtchn_irq_expected, - "Expected event channel IRQ but it didn't happen"); - shinfo->evtchn_pending[1] = 0; - - if (verbose) - printf("Testing guest EVTCHNOP_send direct to evtchn\n"); - evtchn_irq_expected = true; - alarm(1); - break; - - case TEST_EVTCHN_HCALL_SLOWPATH: - TEST_ASSERT(!evtchn_irq_expected, - "Expected event channel IRQ but it didn't happen"); - shinfo->evtchn_pending[0] = 0; - - if (verbose) - printf("Testing guest EVTCHNOP_send direct to evtchn after memslot change\n"); - vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, - DUMMY_REGION_GPA_2, DUMMY_REGION_SLOT_2, 1, 0); - evtchn_irq_expected = true; - alarm(1); - break; - - case TEST_EVTCHN_HCALL_EVENTFD: - TEST_ASSERT(!evtchn_irq_expected, - "Expected event channel IRQ but it didn't happen"); - shinfo->evtchn_pending[0] = 0; - - if (verbose) - printf("Testing guest EVTCHNOP_send to eventfd\n"); - evtchn_irq_expected = true; - alarm(1); - break; - - case TEST_TIMER_SETUP: - TEST_ASSERT(!evtchn_irq_expected, - "Expected event channel IRQ but it didn't happen"); - shinfo->evtchn_pending[1] = 0; - - if (verbose) - printf("Testing guest oneshot timer\n"); - break; - - case TEST_TIMER_WAIT: - memset(&tmr, 0, sizeof(tmr)); - tmr.type = KVM_XEN_VCPU_ATTR_TYPE_TIMER; - vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr); - TEST_ASSERT(tmr.u.timer.port == EVTCHN_TIMER, - "Timer port not returned"); - TEST_ASSERT(tmr.u.timer.priority == KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL, - "Timer priority not returned"); - TEST_ASSERT(tmr.u.timer.expires_ns > rs->state_entry_time, - "Timer expiry not returned"); - evtchn_irq_expected = true; - alarm(1); - break; - - case TEST_TIMER_RESTORE: - TEST_ASSERT(!evtchn_irq_expected, - "Expected event channel IRQ but it didn't happen"); - shinfo->evtchn_pending[0] = 0; - - if (verbose) - printf("Testing restored oneshot timer\n"); - - tmr.u.timer.expires_ns = rs->state_entry_time + 100000000; - vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr); - evtchn_irq_expected = true; - alarm(1); - break; - - case TEST_POLL_READY: - TEST_ASSERT(!evtchn_irq_expected, - "Expected event channel IRQ but it didn't happen"); - - if (verbose) - printf("Testing SCHEDOP_poll with already pending event\n"); - shinfo->evtchn_pending[0] = shinfo->evtchn_mask[0] = 1UL << EVTCHN_TIMER; - alarm(1); - break; - - case TEST_POLL_TIMEOUT: - if (verbose) - printf("Testing SCHEDOP_poll timeout\n"); - shinfo->evtchn_pending[0] = 0; - alarm(1); - break; - - case TEST_POLL_MASKED: - if (verbose) - printf("Testing SCHEDOP_poll wake on masked event\n"); - - tmr.u.timer.expires_ns = rs->state_entry_time + 100000000; - vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr); - alarm(1); - break; - - case TEST_POLL_WAKE: - shinfo->evtchn_pending[0] = shinfo->evtchn_mask[0] = 0; - if (verbose) - printf("Testing SCHEDOP_poll wake on unmasked event\n"); - - evtchn_irq_expected = true; - tmr.u.timer.expires_ns = rs->state_entry_time + 100000000; - vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr); - - /* Read it back and check the pending time is reported correctly */ - tmr.u.timer.expires_ns = 0; - vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr); - TEST_ASSERT(tmr.u.timer.expires_ns == rs->state_entry_time + 100000000, - "Timer not reported pending"); - alarm(1); - break; - - case SET_VCPU_INFO: - if (has_shinfo_hva) { - struct kvm_xen_vcpu_attr vih = { - .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO_HVA, - .u.hva = (unsigned long)vinfo - }; - vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &vih); - } - break; - - case TEST_TIMER_PAST: - TEST_ASSERT(!evtchn_irq_expected, - "Expected event channel IRQ but it didn't happen"); - /* Read timer and check it is no longer pending */ - vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr); - TEST_ASSERT(!tmr.u.timer.expires_ns, "Timer still reported pending"); - - shinfo->evtchn_pending[0] = 0; - if (verbose) - printf("Testing timer in the past\n"); - - evtchn_irq_expected = true; - tmr.u.timer.expires_ns = rs->state_entry_time - 100000000ULL; - vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr); - alarm(1); - break; - - case TEST_LOCKING_SEND_RACE: - TEST_ASSERT(!evtchn_irq_expected, - "Expected event channel IRQ but it didn't happen"); - alarm(0); - - if (verbose) - printf("Testing shinfo lock corruption (KVM_XEN_HVM_EVTCHN_SEND)\n"); - - ret = pthread_create(&thread, NULL, &juggle_shinfo_state, (void *)vm); - TEST_ASSERT(ret == 0, "pthread_create() failed: %s", strerror(ret)); - - struct kvm_irq_routing_xen_evtchn uxe = { - .port = 1, - .vcpu = vcpu->id, - .priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL - }; - - evtchn_irq_expected = true; - for (time_t t = time(NULL) + SHINFO_RACE_TIMEOUT; time(NULL) < t;) - __vm_ioctl(vm, KVM_XEN_HVM_EVTCHN_SEND, &uxe); - break; - - case TEST_LOCKING_POLL_RACE: - TEST_ASSERT(!evtchn_irq_expected, - "Expected event channel IRQ but it didn't happen"); - - if (verbose) - printf("Testing shinfo lock corruption (SCHEDOP_poll)\n"); - - shinfo->evtchn_pending[0] = 1; - - evtchn_irq_expected = true; - tmr.u.timer.expires_ns = rs->state_entry_time + - SHINFO_RACE_TIMEOUT * 1000000000ULL; - vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr); - break; - - case TEST_LOCKING_POLL_TIMEOUT: - /* - * Optional and possibly repeated sync point. - * Injecting the timer IRQ may fail if the - * shinfo is invalid when the timer expires. - * If the timer has expired but the IRQ hasn't - * been delivered, rearm the timer and retry. - */ - vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr); - - /* Resume the guest if the timer is still pending. */ - if (tmr.u.timer.expires_ns) - break; - - /* All done if the IRQ was delivered. */ - if (!evtchn_irq_expected) - break; - - tmr.u.timer.expires_ns = rs->state_entry_time + - SHINFO_RACE_TIMEOUT * 1000000000ULL; - vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr); - break; - case TEST_DONE: - TEST_ASSERT(!evtchn_irq_expected, - "Expected event channel IRQ but it didn't happen"); - - ret = pthread_cancel(thread); - TEST_ASSERT(ret == 0, "pthread_cancel() failed: %s", strerror(ret)); - - ret = pthread_join(thread, 0); - TEST_ASSERT(ret == 0, "pthread_join() failed: %s", strerror(ret)); - goto done; - - case TEST_GUEST_SAW_IRQ: - TEST_ASSERT(evtchn_irq_expected, "Unexpected event channel IRQ"); - evtchn_irq_expected = false; - break; - } - break; - } - case UCALL_DONE: - goto done; - default: - TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd); - } - } - - done: - evt_reset.type = KVM_XEN_ATTR_TYPE_EVTCHN; - evt_reset.u.evtchn.flags = KVM_XEN_EVTCHN_RESET; - vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &evt_reset); - - alarm(0); - - /* - * Just a *really* basic check that things are being put in the - * right place. The actual calculations are much the same for - * Xen as they are for the KVM variants, so no need to check. - */ - struct pvclock_wall_clock *wc; - struct pvclock_vcpu_time_info *ti, *ti2; - struct kvm_clock_data kcdata; - long long delta; - - wc = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0xc00); - ti = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0x40 + 0x20); - ti2 = addr_gpa2hva(vm, PVTIME_ADDR); - - if (verbose) { - printf("Wall clock (v %d) %d.%09d\n", wc->version, wc->sec, wc->nsec); - printf("Time info 1: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n", - ti->version, ti->tsc_timestamp, ti->system_time, ti->tsc_to_system_mul, - ti->tsc_shift, ti->flags); - printf("Time info 2: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n", - ti2->version, ti2->tsc_timestamp, ti2->system_time, ti2->tsc_to_system_mul, - ti2->tsc_shift, ti2->flags); - } - - TEST_ASSERT(wc->version && !(wc->version & 1), - "Bad wallclock version %x", wc->version); - - vm_ioctl(vm, KVM_GET_CLOCK, &kcdata); - - if (kcdata.flags & KVM_CLOCK_REALTIME) { - if (verbose) { - printf("KVM_GET_CLOCK clock: %lld.%09lld\n", - kcdata.clock / NSEC_PER_SEC, kcdata.clock % NSEC_PER_SEC); - printf("KVM_GET_CLOCK realtime: %lld.%09lld\n", - kcdata.realtime / NSEC_PER_SEC, kcdata.realtime % NSEC_PER_SEC); - } - - delta = (wc->sec * NSEC_PER_SEC + wc->nsec) - (kcdata.realtime - kcdata.clock); - - /* - * KVM_GET_CLOCK gives CLOCK_REALTIME which jumps on leap seconds updates but - * unfortunately KVM doesn't currently offer a CLOCK_TAI alternative. Accept 1s - * delta as testing clock accuracy is not the goal here. The test just needs to - * check that the value in shinfo is somewhat sane. - */ - TEST_ASSERT(llabs(delta) < NSEC_PER_SEC, - "Guest's epoch from shinfo %d.%09d differs from KVM_GET_CLOCK %lld.%lld", - wc->sec, wc->nsec, (kcdata.realtime - kcdata.clock) / NSEC_PER_SEC, - (kcdata.realtime - kcdata.clock) % NSEC_PER_SEC); - } else { - pr_info("Missing KVM_CLOCK_REALTIME, skipping shinfo epoch sanity check\n"); - } - - TEST_ASSERT(ti->version && !(ti->version & 1), - "Bad time_info version %x", ti->version); - TEST_ASSERT(ti2->version && !(ti2->version & 1), - "Bad time_info version %x", ti->version); - - if (do_runstate_tests) { - /* - * Fetch runstate and check sanity. Strictly speaking in the - * general case we might not expect the numbers to be identical - * but in this case we know we aren't running the vCPU any more. - */ - struct kvm_xen_vcpu_attr rst = { - .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA, - }; - vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &rst); - - if (verbose) { - printf("Runstate: %s(%d), entry %" PRIu64 " ns\n", - rs->state <= RUNSTATE_offline ? runstate_names[rs->state] : "unknown", - rs->state, rs->state_entry_time); - for (int i = RUNSTATE_running; i <= RUNSTATE_offline; i++) { - printf("State %s: %" PRIu64 " ns\n", - runstate_names[i], rs->time[i]); - } - } - - /* - * Exercise runstate info at all points across the page boundary, in - * 32-bit and 64-bit mode. In particular, test the case where it is - * configured in 32-bit mode and then switched to 64-bit mode while - * active, which takes it onto the second page. - */ - unsigned long runstate_addr; - struct compat_vcpu_runstate_info *crs; - for (runstate_addr = SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE - sizeof(*rs) - 4; - runstate_addr < SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE + 4; runstate_addr++) { - - rs = addr_gpa2hva(vm, runstate_addr); - crs = (void *)rs; - - memset(rs, 0xa5, sizeof(*rs)); - - /* Set to compatibility mode */ - lm.u.long_mode = 0; - vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm); - - /* Set runstate to new address (kernel will write it) */ - struct kvm_xen_vcpu_attr st = { - .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR, - .u.gpa = runstate_addr, - }; - vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &st); - - if (verbose) - printf("Compatibility runstate at %08lx\n", runstate_addr); - - TEST_ASSERT(crs->state == rst.u.runstate.state, "Runstate mismatch"); - TEST_ASSERT(crs->state_entry_time == rst.u.runstate.state_entry_time, - "State entry time mismatch"); - TEST_ASSERT(crs->time[RUNSTATE_running] == rst.u.runstate.time_running, - "Running time mismatch"); - TEST_ASSERT(crs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable, - "Runnable time mismatch"); - TEST_ASSERT(crs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked, - "Blocked time mismatch"); - TEST_ASSERT(crs->time[RUNSTATE_offline] == rst.u.runstate.time_offline, - "Offline time mismatch"); - TEST_ASSERT(crs->time[RUNSTATE_offline + 1] == 0xa5a5a5a5a5a5a5a5ULL, - "Structure overrun"); - TEST_ASSERT(crs->state_entry_time == crs->time[0] + - crs->time[1] + crs->time[2] + crs->time[3], - "runstate times don't add up"); - - - /* Now switch to 64-bit mode */ - lm.u.long_mode = 1; - vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm); - - memset(rs, 0xa5, sizeof(*rs)); - - /* Don't change the address, just trigger a write */ - struct kvm_xen_vcpu_attr adj = { - .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST, - .u.runstate.state = (uint64_t)-1 - }; - vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &adj); - - if (verbose) - printf("64-bit runstate at %08lx\n", runstate_addr); - - TEST_ASSERT(rs->state == rst.u.runstate.state, "Runstate mismatch"); - TEST_ASSERT(rs->state_entry_time == rst.u.runstate.state_entry_time, - "State entry time mismatch"); - TEST_ASSERT(rs->time[RUNSTATE_running] == rst.u.runstate.time_running, - "Running time mismatch"); - TEST_ASSERT(rs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable, - "Runnable time mismatch"); - TEST_ASSERT(rs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked, - "Blocked time mismatch"); - TEST_ASSERT(rs->time[RUNSTATE_offline] == rst.u.runstate.time_offline, - "Offline time mismatch"); - TEST_ASSERT(rs->time[RUNSTATE_offline + 1] == 0xa5a5a5a5a5a5a5a5ULL, - "Structure overrun"); - - TEST_ASSERT(rs->state_entry_time == rs->time[0] + - rs->time[1] + rs->time[2] + rs->time[3], - "runstate times don't add up"); - } - } - - kvm_vm_free(vm); - return 0; -} diff --git a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c b/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c deleted file mode 100644 index 2585087cdf5c..000000000000 --- a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c +++ /dev/null @@ -1,143 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * xen_vmcall_test - * - * Copyright © 2020 Amazon.com, Inc. or its affiliates. - * - * Userspace hypercall testing - */ - -#include "test_util.h" -#include "kvm_util.h" -#include "processor.h" -#include "hyperv.h" - -#define HCALL_REGION_GPA 0xc0000000ULL -#define HCALL_REGION_SLOT 10 - -#define INPUTVALUE 17 -#define ARGVALUE(x) (0xdeadbeef5a5a0000UL + x) -#define RETVALUE 0xcafef00dfbfbffffUL - -#define XEN_HYPERCALL_MSR 0x40000200 -#define HV_GUEST_OS_ID_MSR 0x40000000 -#define HV_HYPERCALL_MSR 0x40000001 - -#define HVCALL_SIGNAL_EVENT 0x005d -#define HV_STATUS_INVALID_ALIGNMENT 4 - -static void guest_code(void) -{ - unsigned long rax = INPUTVALUE; - unsigned long rdi = ARGVALUE(1); - unsigned long rsi = ARGVALUE(2); - unsigned long rdx = ARGVALUE(3); - unsigned long rcx; - register unsigned long r10 __asm__("r10") = ARGVALUE(4); - register unsigned long r8 __asm__("r8") = ARGVALUE(5); - register unsigned long r9 __asm__("r9") = ARGVALUE(6); - - /* First a direct invocation of 'vmcall' */ - __asm__ __volatile__("vmcall" : - "=a"(rax) : - "a"(rax), "D"(rdi), "S"(rsi), "d"(rdx), - "r"(r10), "r"(r8), "r"(r9)); - GUEST_ASSERT(rax == RETVALUE); - - /* Fill in the Xen hypercall page */ - __asm__ __volatile__("wrmsr" : : "c" (XEN_HYPERCALL_MSR), - "a" (HCALL_REGION_GPA & 0xffffffff), - "d" (HCALL_REGION_GPA >> 32)); - - /* Set Hyper-V Guest OS ID */ - __asm__ __volatile__("wrmsr" : : "c" (HV_GUEST_OS_ID_MSR), - "a" (0x5a), "d" (0)); - - /* Hyper-V hypercall page */ - u64 msrval = HCALL_REGION_GPA + PAGE_SIZE + 1; - __asm__ __volatile__("wrmsr" : : "c" (HV_HYPERCALL_MSR), - "a" (msrval & 0xffffffff), - "d" (msrval >> 32)); - - /* Invoke a Xen hypercall */ - __asm__ __volatile__("call *%1" : "=a"(rax) : - "r"(HCALL_REGION_GPA + INPUTVALUE * 32), - "a"(rax), "D"(rdi), "S"(rsi), "d"(rdx), - "r"(r10), "r"(r8), "r"(r9)); - GUEST_ASSERT(rax == RETVALUE); - - /* Invoke a Hyper-V hypercall */ - rax = 0; - rcx = HVCALL_SIGNAL_EVENT; /* code */ - rdx = 0x5a5a5a5a; /* ingpa (badly aligned) */ - __asm__ __volatile__("call *%1" : "=a"(rax) : - "r"(HCALL_REGION_GPA + PAGE_SIZE), - "a"(rax), "c"(rcx), "d"(rdx), - "r"(r8)); - GUEST_ASSERT(rax == HV_STATUS_INVALID_ALIGNMENT); - - GUEST_DONE(); -} - -int main(int argc, char *argv[]) -{ - unsigned int xen_caps; - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - - xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM); - TEST_REQUIRE(xen_caps & KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL); - - vm = vm_create_with_one_vcpu(&vcpu, guest_code); - vcpu_set_hv_cpuid(vcpu); - - struct kvm_xen_hvm_config hvmc = { - .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL, - .msr = XEN_HYPERCALL_MSR, - }; - vm_ioctl(vm, KVM_XEN_HVM_CONFIG, &hvmc); - - /* Map a region for the hypercall pages */ - vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, - HCALL_REGION_GPA, HCALL_REGION_SLOT, 2, 0); - virt_map(vm, HCALL_REGION_GPA, HCALL_REGION_GPA, 2); - - for (;;) { - volatile struct kvm_run *run = vcpu->run; - struct ucall uc; - - vcpu_run(vcpu); - - if (run->exit_reason == KVM_EXIT_XEN) { - TEST_ASSERT_EQ(run->xen.type, KVM_EXIT_XEN_HCALL); - TEST_ASSERT_EQ(run->xen.u.hcall.cpl, 0); - TEST_ASSERT_EQ(run->xen.u.hcall.longmode, 1); - TEST_ASSERT_EQ(run->xen.u.hcall.input, INPUTVALUE); - TEST_ASSERT_EQ(run->xen.u.hcall.params[0], ARGVALUE(1)); - TEST_ASSERT_EQ(run->xen.u.hcall.params[1], ARGVALUE(2)); - TEST_ASSERT_EQ(run->xen.u.hcall.params[2], ARGVALUE(3)); - TEST_ASSERT_EQ(run->xen.u.hcall.params[3], ARGVALUE(4)); - TEST_ASSERT_EQ(run->xen.u.hcall.params[4], ARGVALUE(5)); - TEST_ASSERT_EQ(run->xen.u.hcall.params[5], ARGVALUE(6)); - run->xen.u.hcall.result = RETVALUE; - continue; - } - - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); - - switch (get_ucall(vcpu, &uc)) { - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - /* NOT REACHED */ - case UCALL_SYNC: - break; - case UCALL_DONE: - goto done; - default: - TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd); - } - } -done: - kvm_vm_free(vm); - return 0; -} diff --git a/tools/testing/selftests/kvm/x86_64/xss_msr_test.c b/tools/testing/selftests/kvm/x86_64/xss_msr_test.c deleted file mode 100644 index f331a4e9bae3..000000000000 --- a/tools/testing/selftests/kvm/x86_64/xss_msr_test.c +++ /dev/null @@ -1,54 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2019, Google LLC. - * - * Tests for the IA32_XSS MSR. - */ -#include - -#include "test_util.h" -#include "kvm_util.h" -#include "vmx.h" - -#define MSR_BITS 64 - -int main(int argc, char *argv[]) -{ - bool xss_in_msr_list; - struct kvm_vm *vm; - struct kvm_vcpu *vcpu; - uint64_t xss_val; - int i, r; - - /* Create VM */ - vm = vm_create_with_one_vcpu(&vcpu, NULL); - - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVES)); - - xss_val = vcpu_get_msr(vcpu, MSR_IA32_XSS); - TEST_ASSERT(xss_val == 0, - "MSR_IA32_XSS should be initialized to zero"); - - vcpu_set_msr(vcpu, MSR_IA32_XSS, xss_val); - - /* - * At present, KVM only supports a guest IA32_XSS value of 0. Verify - * that trying to set the guest IA32_XSS to an unsupported value fails. - * Also, in the future when a non-zero value succeeds check that - * IA32_XSS is in the list of MSRs to save/restore. - */ - xss_in_msr_list = kvm_msr_is_in_save_restore_list(MSR_IA32_XSS); - for (i = 0; i < MSR_BITS; ++i) { - r = _vcpu_set_msr(vcpu, MSR_IA32_XSS, 1ull << i); - - /* - * Setting a list of MSRs returns the entry that "faulted", or - * the last entry +1 if all MSRs were successfully written. - */ - TEST_ASSERT(!r || r == 1, KVM_IOCTL_ERROR(KVM_SET_MSRS, r)); - TEST_ASSERT(r != 1 || xss_in_msr_list, - "IA32_XSS was able to be set, but was not in save/restore list"); - } - - kvm_vm_free(vm); -} -- cgit v1.2.3 From 9af04539d474dda4984ff4909d4568e6123c8cba Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Nov 2024 16:55:47 -0800 Subject: KVM: selftests: Override ARCH for x86_64 instead of using ARCH_DIR Now that KVM selftests uses the kernel's canonical arch paths, directly override ARCH to 'x86' when targeting x86_64 instead of defining ARCH_DIR to redirect to appropriate paths. ARCH_DIR was originally added to deal with KVM selftests using the target triple ARCH for directories, e.g. s390x and aarch64; keeping it around just to deal with the one-off alias from x86_64=>x86 is unnecessary and confusing. Note, even when selftests are built from the top-level Makefile, ARCH is scoped to KVM's makefiles, i.e. overriding ARCH won't trip up some other selftests that (somehow) expects x86_64 and can't work with x86. Reviewed-by: Muhammad Usama Anjum Link: https://lore.kernel.org/r/20241128005547.4077116-17-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/Makefile | 4 +--- tools/testing/selftests/kvm/Makefile.kvm | 20 ++++++++++---------- 2 files changed, 11 insertions(+), 13 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 9bc2eba1af1c..20af35a91d6f 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -6,9 +6,7 @@ ARCH ?= $(SUBARCH) ifeq ($(ARCH),$(filter $(ARCH),arm64 s390 riscv x86 x86_64)) # Top-level selftests allows ARCH=x86_64 :-( ifeq ($(ARCH),x86_64) - ARCH_DIR := x86 -else - ARCH_DIR := $(ARCH) + ARCH := x86 endif include Makefile.kvm else diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm index 9888dd6bb483..4277b983cace 100644 --- a/tools/testing/selftests/kvm/Makefile.kvm +++ b/tools/testing/selftests/kvm/Makefile.kvm @@ -207,10 +207,10 @@ TEST_GEN_PROGS_riscv += steal_time SPLIT_TESTS += arch_timer SPLIT_TESTS += get-reg-list -TEST_PROGS += $(TEST_PROGS_$(ARCH_DIR)) -TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(ARCH_DIR)) -TEST_GEN_PROGS_EXTENDED += $(TEST_GEN_PROGS_EXTENDED_$(ARCH_DIR)) -LIBKVM += $(LIBKVM_$(ARCH_DIR)) +TEST_PROGS += $(TEST_PROGS_$(ARCH)) +TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(ARCH)) +TEST_GEN_PROGS_EXTENDED += $(TEST_GEN_PROGS_EXTENDED_$(ARCH)) +LIBKVM += $(LIBKVM_$(ARCH)) OVERRIDE_TARGETS = 1 @@ -222,14 +222,14 @@ include ../lib.mk INSTALL_HDR_PATH = $(top_srcdir)/usr LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/ LINUX_TOOL_INCLUDE = $(top_srcdir)/tools/include -LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH_DIR)/include +LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \ -Wno-gnu-variable-sized-type-not-at-end -MD -MP -DCONFIG_64BIT \ -fno-builtin-memcmp -fno-builtin-memcpy \ -fno-builtin-memset -fno-builtin-strnlen \ -fno-stack-protector -fno-PIE -fno-strict-aliasing \ -I$(LINUX_TOOL_INCLUDE) -I$(LINUX_TOOL_ARCH_INCLUDE) \ - -I$(LINUX_HDR_PATH) -Iinclude -I$( Date: Wed, 27 Nov 2024 17:33:33 -0800 Subject: KVM: selftests: Update x86's set_sregs_test to match KVM's CPUID enforcement Rework x86's set sregs test to verify that KVM enforces CPUID vs. CR4 features even if userspace hasn't explicitly set guest CPUID. KVM used to allow userspace to set any KVM-supported CR4 value prior to KVM_SET_CPUID2, and the test verified that behavior. However, the testcase was written purely to verify KVM's existing behavior, i.e. was NOT written to match the needs of real world VMMs. Opportunistically verify that KVM continues to reject unsupported features after KVM_SET_CPUID2 (using KVM_GET_SUPPORTED_CPUID). Reviewed-by: Maxim Levitsky Reviewed-by: Binbin Wu Reviewed-by: Xiaoyao Li Link: https://lore.kernel.org/r/20241128013424.4096668-7-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/x86/set_sregs_test.c | 53 ++++++++++++++---------- 1 file changed, 30 insertions(+), 23 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/x86/set_sregs_test.c b/tools/testing/selftests/kvm/x86/set_sregs_test.c index c021c0795a96..96fd690d479a 100644 --- a/tools/testing/selftests/kvm/x86/set_sregs_test.c +++ b/tools/testing/selftests/kvm/x86/set_sregs_test.c @@ -41,13 +41,15 @@ do { \ TEST_ASSERT(!memcmp(&new, &orig, sizeof(new)), "KVM modified sregs"); \ } while (0) +#define KVM_ALWAYS_ALLOWED_CR4 (X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | \ + X86_CR4_DE | X86_CR4_PSE | X86_CR4_PAE | \ + X86_CR4_MCE | X86_CR4_PGE | X86_CR4_PCE | \ + X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT) + static uint64_t calc_supported_cr4_feature_bits(void) { - uint64_t cr4; + uint64_t cr4 = KVM_ALWAYS_ALLOWED_CR4; - cr4 = X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE | - X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE | X86_CR4_PGE | - X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT; if (kvm_cpu_has(X86_FEATURE_UMIP)) cr4 |= X86_CR4_UMIP; if (kvm_cpu_has(X86_FEATURE_LA57)) @@ -72,28 +74,14 @@ static uint64_t calc_supported_cr4_feature_bits(void) return cr4; } -int main(int argc, char *argv[]) +static void test_cr_bits(struct kvm_vcpu *vcpu, uint64_t cr4) { struct kvm_sregs sregs; - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - uint64_t cr4; int rc, i; - /* - * Create a dummy VM, specifically to avoid doing KVM_SET_CPUID2, and - * use it to verify all supported CR4 bits can be set prior to defining - * the vCPU model, i.e. without doing KVM_SET_CPUID2. - */ - vm = vm_create_barebones(); - vcpu = __vm_vcpu_add(vm, 0); - vcpu_sregs_get(vcpu, &sregs); - - sregs.cr0 = 0; - sregs.cr4 |= calc_supported_cr4_feature_bits(); - cr4 = sregs.cr4; - + sregs.cr0 &= ~(X86_CR0_CD | X86_CR0_NW); + sregs.cr4 |= cr4; rc = _vcpu_sregs_set(vcpu, &sregs); TEST_ASSERT(!rc, "Failed to set supported CR4 bits (0x%lx)", cr4); @@ -101,7 +89,6 @@ int main(int argc, char *argv[]) TEST_ASSERT(sregs.cr4 == cr4, "sregs.CR4 (0x%llx) != CR4 (0x%lx)", sregs.cr4, cr4); - /* Verify all unsupported features are rejected by KVM. */ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_UMIP); TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_LA57); TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_VMXE); @@ -119,10 +106,28 @@ int main(int argc, char *argv[]) /* NW without CD is illegal, as is PG without PE. */ TEST_INVALID_CR_BIT(vcpu, cr0, sregs, X86_CR0_NW); TEST_INVALID_CR_BIT(vcpu, cr0, sregs, X86_CR0_PG); +} +int main(int argc, char *argv[]) +{ + struct kvm_sregs sregs; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + int rc; + + /* + * Create a dummy VM, specifically to avoid doing KVM_SET_CPUID2, and + * use it to verify KVM enforces guest CPUID even if *userspace* never + * sets CPUID. + */ + vm = vm_create_barebones(); + vcpu = __vm_vcpu_add(vm, 0); + test_cr_bits(vcpu, KVM_ALWAYS_ALLOWED_CR4); kvm_vm_free(vm); - /* Create a "real" VM and verify APIC_BASE can be set. */ + /* Create a "real" VM with a fully populated guest CPUID and verify + * APIC_BASE and all supported CR4 can be set. + */ vm = vm_create_with_one_vcpu(&vcpu, NULL); vcpu_sregs_get(vcpu, &sregs); @@ -135,6 +140,8 @@ int main(int argc, char *argv[]) TEST_ASSERT(!rc, "Couldn't set IA32_APIC_BASE to %llx (valid)", sregs.apic_base); + test_cr_bits(vcpu, calc_supported_cr4_feature_bits()); + kvm_vm_free(vm); return 0; -- cgit v1.2.3 From 08833719e77041e331f6193878f1b944744b9068 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Nov 2024 17:33:34 -0800 Subject: KVM: selftests: Assert that vcpu->cpuid is non-NULL when getting CPUID entries Add a sanity check in __vcpu_get_cpuid_entry() to provide a friendlier error than a segfault when a test developer tries to use a vCPU CPUID helper on a barebones vCPU. Link: https://lore.kernel.org/r/20241128013424.4096668-8-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/include/x86/processor.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h index 9ec984cf8674..71aa290c583f 100644 --- a/tools/testing/selftests/kvm/include/x86/processor.h +++ b/tools/testing/selftests/kvm/include/x86/processor.h @@ -1014,6 +1014,8 @@ static inline struct kvm_cpuid_entry2 *__vcpu_get_cpuid_entry(struct kvm_vcpu *v uint32_t function, uint32_t index) { + TEST_ASSERT(vcpu->cpuid, "Must do vcpu_init_cpuid() first (or equivalent)"); + return (struct kvm_cpuid_entry2 *)get_cpuid_entry(vcpu->cpuid, function, index); } -- cgit v1.2.3 From a2a791e8208623b1575f21c7ec559df095c0a96e Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Nov 2024 17:33:35 -0800 Subject: KVM: selftests: Refresh vCPU CPUID cache in __vcpu_get_cpuid_entry() Refresh selftests' CPUID cache in the vCPU structure when querying a CPUID entry so that tests don't consume stale data when KVM modifies CPUID as a side effect to a completely unrelated change. E.g. KVM adjusts OSXSAVE in response to CR4.OSXSAVE changes. Unnecessarily invoking KVM_GET_CPUID is suboptimal, but vcpu->cpuid exists to simplify selftests development, not for performance reasons. And, unfortunately, trying to handle the side effects in tests or other flows is unpleasant, e.g. selftests could manually refresh if KVM_SET_SREGS is successful, but that would still leave a gap with respect to guest CR4 changes. Reviewed-by: Maxim Levitsky Link: https://lore.kernel.org/r/20241128013424.4096668-9-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/include/x86/processor.h | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h index 71aa290c583f..b6753e27dfb6 100644 --- a/tools/testing/selftests/kvm/include/x86/processor.h +++ b/tools/testing/selftests/kvm/include/x86/processor.h @@ -1010,12 +1010,19 @@ static inline struct kvm_cpuid2 *allocate_kvm_cpuid2(int nr_entries) void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid); +static inline void vcpu_get_cpuid(struct kvm_vcpu *vcpu) +{ + vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid); +} + static inline struct kvm_cpuid_entry2 *__vcpu_get_cpuid_entry(struct kvm_vcpu *vcpu, uint32_t function, uint32_t index) { TEST_ASSERT(vcpu->cpuid, "Must do vcpu_init_cpuid() first (or equivalent)"); + vcpu_get_cpuid(vcpu); + return (struct kvm_cpuid_entry2 *)get_cpuid_entry(vcpu->cpuid, function, index); } @@ -1036,7 +1043,7 @@ static inline int __vcpu_set_cpuid(struct kvm_vcpu *vcpu) return r; /* On success, refresh the cache to pick up adjustments made by KVM. */ - vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid); + vcpu_get_cpuid(vcpu); return 0; } @@ -1046,12 +1053,7 @@ static inline void vcpu_set_cpuid(struct kvm_vcpu *vcpu) vcpu_ioctl(vcpu, KVM_SET_CPUID2, vcpu->cpuid); /* Refresh the cache to pick up adjustments made by KVM. */ - vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid); -} - -static inline void vcpu_get_cpuid(struct kvm_vcpu *vcpu) -{ - vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid); + vcpu_get_cpuid(vcpu); } void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu, -- cgit v1.2.3 From 01bcd829c63fdde92d9d6c32b2ed3ba34ead0930 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Nov 2024 17:33:36 -0800 Subject: KVM: selftests: Verify KVM stuffs runtime CPUID OS bits on CR4 writes Extend x86's set sregs test to verify that KVM sets/clears OSXSAVE and OSKPKE according to CR4.XSAVE and CR4.PKE respectively. For performance reasons, KVM is responsible for emulating the architectural behavior of the OS CPUID bits tracking CR4. Reviewed-by: Maxim Levitsky Reviewed-by: Binbin Wu Reviewed-by: Xiaoyao Li Link: https://lore.kernel.org/r/20241128013424.4096668-10-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/x86/set_sregs_test.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/x86/set_sregs_test.c b/tools/testing/selftests/kvm/x86/set_sregs_test.c index 96fd690d479a..f4095a3d1278 100644 --- a/tools/testing/selftests/kvm/x86/set_sregs_test.c +++ b/tools/testing/selftests/kvm/x86/set_sregs_test.c @@ -85,6 +85,16 @@ static void test_cr_bits(struct kvm_vcpu *vcpu, uint64_t cr4) rc = _vcpu_sregs_set(vcpu, &sregs); TEST_ASSERT(!rc, "Failed to set supported CR4 bits (0x%lx)", cr4); + TEST_ASSERT(!!(sregs.cr4 & X86_CR4_OSXSAVE) == + (vcpu->cpuid && vcpu_cpuid_has(vcpu, X86_FEATURE_OSXSAVE)), + "KVM didn't %s OSXSAVE in CPUID as expected", + (sregs.cr4 & X86_CR4_OSXSAVE) ? "set" : "clear"); + + TEST_ASSERT(!!(sregs.cr4 & X86_CR4_PKE) == + (vcpu->cpuid && vcpu_cpuid_has(vcpu, X86_FEATURE_OSPKE)), + "KVM didn't %s OSPKE in CPUID as expected", + (sregs.cr4 & X86_CR4_PKE) ? "set" : "clear"); + vcpu_sregs_get(vcpu, &sregs); TEST_ASSERT(sregs.cr4 == cr4, "sregs.CR4 (0x%llx) != CR4 (0x%lx)", sregs.cr4, cr4); -- cgit v1.2.3 From 7b2658cb33c744ca41358ada2421a86774914764 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Nov 2024 17:33:43 -0800 Subject: KVM: selftests: Fix a bad TEST_REQUIRE() in x86's KVM PV test Actually check for KVM support for disabling HLT-exiting instead of effectively checking that KVM_CAP_X86_DISABLE_EXITS is #defined to a non-zero value, and convert the TEST_REQUIRE() to a simple return so that only the sub-test is skipped if HLT-exiting is mandatory. The goof has likely gone unnoticed because all x86 CPUs support disabling HLT-exiting, only systems with the opt-in mitigate_smt_rsb KVM module param disallow HLT-exiting. Reviewed-by: Maxim Levitsky Reviewed-by: Binbin Wu Reviewed-by: Xiaoyao Li Link: https://lore.kernel.org/r/20241128013424.4096668-17-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/x86/kvm_pv_test.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/x86/kvm_pv_test.c b/tools/testing/selftests/kvm/x86/kvm_pv_test.c index 78878b3a2725..2aee93108a54 100644 --- a/tools/testing/selftests/kvm/x86/kvm_pv_test.c +++ b/tools/testing/selftests/kvm/x86/kvm_pv_test.c @@ -140,9 +140,10 @@ static void test_pv_unhalt(void) struct kvm_cpuid_entry2 *ent; u32 kvm_sig_old; - pr_info("testing KVM_FEATURE_PV_UNHALT\n"); + if (!(kvm_check_cap(KVM_CAP_X86_DISABLE_EXITS) & KVM_X86_DISABLE_EXITS_HLT)) + return; - TEST_REQUIRE(KVM_CAP_X86_DISABLE_EXITS); + pr_info("testing KVM_FEATURE_PV_UNHALT\n"); /* KVM_PV_UNHALT test */ vm = vm_create_with_one_vcpu(&vcpu, guest_main); -- cgit v1.2.3 From 59cb3acdb316130c7247a3d3a20d7d6e75e2896a Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Nov 2024 17:33:44 -0800 Subject: KVM: selftests: Update x86's KVM PV test to match KVM's disabling exits behavior Rework x86's KVM PV features test to align with KVM's new, fixed behavior of not allowing userspace to disable HLT-exiting after vCPUs have been created. Rework the core testcase to disable HLT-exiting before creating a vCPU, and opportunistically modify keep the paired VM+vCPU creation to verify that KVM rejects KVM_CAP_X86_DISABLE_EXITS as expected. Reviewed-by: Maxim Levitsky Reviewed-by: Binbin Wu Reviewed-by: Xiaoyao Li Link: https://lore.kernel.org/r/20241128013424.4096668-18-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/x86/kvm_pv_test.c | 33 ++++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/x86/kvm_pv_test.c b/tools/testing/selftests/kvm/x86/kvm_pv_test.c index 2aee93108a54..1b805cbdb47b 100644 --- a/tools/testing/selftests/kvm/x86/kvm_pv_test.c +++ b/tools/testing/selftests/kvm/x86/kvm_pv_test.c @@ -139,6 +139,7 @@ static void test_pv_unhalt(void) struct kvm_vm *vm; struct kvm_cpuid_entry2 *ent; u32 kvm_sig_old; + int r; if (!(kvm_check_cap(KVM_CAP_X86_DISABLE_EXITS) & KVM_X86_DISABLE_EXITS_HLT)) return; @@ -152,19 +153,45 @@ static void test_pv_unhalt(void) TEST_ASSERT(vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT), "Enabling X86_FEATURE_KVM_PV_UNHALT had no effect"); - /* Make sure KVM clears vcpu->arch.kvm_cpuid */ + /* Verify KVM disallows disabling exits after vCPU creation. */ + r = __vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS, KVM_X86_DISABLE_EXITS_HLT); + TEST_ASSERT(r && errno == EINVAL, + "Disabling exits after vCPU creation didn't fail as expected"); + + kvm_vm_free(vm); + + /* Verify that KVM clear PV_UNHALT from guest CPUID. */ + vm = vm_create(1); + vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS, KVM_X86_DISABLE_EXITS_HLT); + + vcpu = vm_vcpu_add(vm, 0, NULL); + TEST_ASSERT(!vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT), + "vCPU created with PV_UNHALT set by default"); + + vcpu_set_cpuid_feature(vcpu, X86_FEATURE_KVM_PV_UNHALT); + TEST_ASSERT(!vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT), + "PV_UNHALT set in guest CPUID when HLT-exiting is disabled"); + + /* + * Clobber the KVM PV signature and verify KVM does NOT clear PV_UNHALT + * when KVM PV is not present, and DOES clear PV_UNHALT when switching + * back to the correct signature.. + */ ent = vcpu_get_cpuid_entry(vcpu, KVM_CPUID_SIGNATURE); kvm_sig_old = ent->ebx; ent->ebx = 0xdeadbeef; vcpu_set_cpuid(vcpu); - vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS, KVM_X86_DISABLE_EXITS_HLT); + vcpu_set_cpuid_feature(vcpu, X86_FEATURE_KVM_PV_UNHALT); + TEST_ASSERT(vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT), + "PV_UNHALT cleared when using bogus KVM PV signature"); + ent = vcpu_get_cpuid_entry(vcpu, KVM_CPUID_SIGNATURE); ent->ebx = kvm_sig_old; vcpu_set_cpuid(vcpu); TEST_ASSERT(!vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT), - "KVM_FEATURE_PV_UNHALT is set with KVM_CAP_X86_DISABLE_EXITS"); + "PV_UNHALT set in guest CPUID when HLT-exiting is disabled"); /* FIXME: actually test KVM_FEATURE_PV_UNHALT feature */ -- cgit v1.2.3 From 7a9b65ab0abd52ae646ba327522315d7500a7d4f Mon Sep 17 00:00:00 2001 From: Amit Vadhavana Date: Sat, 16 Nov 2024 20:51:36 +0530 Subject: selftests: refactor the lsm `flags_overset_lsm_set_self_attr` test Remove the temporary context variable `tctx` to simplify the code. use the original context `ctx` directly in calls to `lsm_get_self_attr`, eliminating redundancy without any functional changes. Reviewed-by: Casey Schaufler Reviewed-by: Shuah Khan Signed-off-by: Amit Vadhavana [PM: subject tweak] Signed-off-by: Paul Moore --- tools/testing/selftests/lsm/lsm_set_self_attr_test.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/lsm/lsm_set_self_attr_test.c b/tools/testing/selftests/lsm/lsm_set_self_attr_test.c index 66dec47e3ca3..732e89fe99c0 100644 --- a/tools/testing/selftests/lsm/lsm_set_self_attr_test.c +++ b/tools/testing/selftests/lsm/lsm_set_self_attr_test.c @@ -56,16 +56,15 @@ TEST(flags_zero_lsm_set_self_attr) TEST(flags_overset_lsm_set_self_attr) { const long page_size = sysconf(_SC_PAGESIZE); - char *ctx = calloc(page_size, 1); + struct lsm_ctx *ctx = calloc(page_size, 1); __u32 size = page_size; - struct lsm_ctx *tctx = (struct lsm_ctx *)ctx; ASSERT_NE(NULL, ctx); if (attr_lsm_count()) { - ASSERT_LE(1, lsm_get_self_attr(LSM_ATTR_CURRENT, tctx, &size, + ASSERT_LE(1, lsm_get_self_attr(LSM_ATTR_CURRENT, ctx, &size, 0)); } - ASSERT_EQ(-1, lsm_set_self_attr(LSM_ATTR_CURRENT | LSM_ATTR_PREV, tctx, + ASSERT_EQ(-1, lsm_set_self_attr(LSM_ATTR_CURRENT | LSM_ATTR_PREV, ctx, size, 0)); free(ctx); -- cgit v1.2.3 From 4e9427aeb9572a4023b42e64ca2cd2ca3cbf7e20 Mon Sep 17 00:00:00 2001 From: Ivan Orlov Date: Tue, 17 Dec 2024 18:14:57 +0000 Subject: KVM: selftests: Add and use a helper function for x86's LIDT Implement a function for setting the IDT descriptor from the guest code. Replace the existing lidt occurrences with calls to this function as `lidt` is used in multiple places. Signed-off-by: Ivan Orlov Link: https://lore.kernel.org/r/20241217181458.68690-7-iorlov@amazon.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/include/x86/processor.h | 5 +++++ tools/testing/selftests/kvm/set_memory_region_test.c | 2 +- tools/testing/selftests/kvm/x86/sev_smoke_test.c | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h index b6753e27dfb6..daa02b851273 100644 --- a/tools/testing/selftests/kvm/include/x86/processor.h +++ b/tools/testing/selftests/kvm/include/x86/processor.h @@ -569,6 +569,11 @@ static inline void set_cr4(uint64_t val) __asm__ __volatile__("mov %0, %%cr4" : : "r" (val) : "memory"); } +static inline void set_idt(const struct desc_ptr *idt_desc) +{ + __asm__ __volatile__("lidt %0"::"m"(*idt_desc)); +} + static inline u64 xgetbv(u32 index) { u32 eax, edx; diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c index 86ee3385e860..621a227e2bf2 100644 --- a/tools/testing/selftests/kvm/set_memory_region_test.c +++ b/tools/testing/selftests/kvm/set_memory_region_test.c @@ -235,7 +235,7 @@ static void guest_code_delete_memory_region(void) * in the guest will never succeed, and so isn't an option. */ memset(&idt, 0, sizeof(idt)); - __asm__ __volatile__("lidt %0" :: "m"(idt)); + set_idt(&idt); GUEST_SYNC(0); diff --git a/tools/testing/selftests/kvm/x86/sev_smoke_test.c b/tools/testing/selftests/kvm/x86/sev_smoke_test.c index ae77698e6e97..a1a688e75266 100644 --- a/tools/testing/selftests/kvm/x86/sev_smoke_test.c +++ b/tools/testing/selftests/kvm/x86/sev_smoke_test.c @@ -155,7 +155,7 @@ static void guest_shutdown_code(void) /* Clobber the IDT so that #UD is guaranteed to trigger SHUTDOWN. */ memset(&idt, 0, sizeof(idt)); - __asm__ __volatile__("lidt %0" :: "m"(idt)); + set_idt(&idt); __asm__ __volatile__("ud2"); } -- cgit v1.2.3 From 62e41f6b4f3697e5909cdf70d56e9a7ebd958732 Mon Sep 17 00:00:00 2001 From: Ivan Orlov Date: Tue, 17 Dec 2024 18:14:58 +0000 Subject: KVM: selftests: Add test case for MMIO during vectoring on x86 Extend the 'set_memory_region_test' with an x86-only test case which covers emulated MMIO during event vectoring error handling. The test case 1) Sets an IDT descriptor base to point to an MMIO address 2) Generates a #GP in the guest 3) Verifies userspace gets the correct exit reason, suberror code, and GPA in internal.data[3] Opportunistically add a definition for a non-canonical address to processor.h so that the source of the #GP is somewhat self-documenting, and so that future tests don't have to reinvent the wheel. Signed-off-by: Ivan Orlov Link: https://lore.kernel.org/r/20241217181458.68690-8-iorlov@amazon.com [sean: massage changelog] Signed-off-by: Sean Christopherson --- .../testing/selftests/kvm/include/x86/processor.h | 2 + .../testing/selftests/kvm/set_memory_region_test.c | 51 ++++++++++++++++++++++ 2 files changed, 53 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h index daa02b851273..d60da8966772 100644 --- a/tools/testing/selftests/kvm/include/x86/processor.h +++ b/tools/testing/selftests/kvm/include/x86/processor.h @@ -27,6 +27,8 @@ extern uint64_t guest_tsc_khz; #define MAX_NR_CPUID_ENTRIES 100 #endif +#define NONCANONICAL 0xaaaaaaaaaaaaaaaaull + /* Forced emulation prefix, used to invoke the emulator unconditionally. */ #define KVM_FEP "ud2; .byte 'k', 'v', 'm';" diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c index 621a227e2bf2..bc440d5aba57 100644 --- a/tools/testing/selftests/kvm/set_memory_region_test.c +++ b/tools/testing/selftests/kvm/set_memory_region_test.c @@ -553,6 +553,56 @@ static void test_add_overlapping_private_memory_regions(void) close(memfd); kvm_vm_free(vm); } + +static void guest_code_mmio_during_vectoring(void) +{ + const struct desc_ptr idt_desc = { + .address = MEM_REGION_GPA, + .size = 0xFFF, + }; + + set_idt(&idt_desc); + + /* Generate a #GP by dereferencing a non-canonical address */ + *((uint8_t *)NONCANONICAL) = 0x1; + + GUEST_ASSERT(0); +} + +/* + * This test points the IDT descriptor base to an MMIO address. It should cause + * a KVM internal error when an event occurs in the guest. + */ +static void test_mmio_during_vectoring(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_run *run; + struct kvm_vm *vm; + u64 expected_gpa; + + pr_info("Testing MMIO during vectoring error handling\n"); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code_mmio_during_vectoring); + virt_map(vm, MEM_REGION_GPA, MEM_REGION_GPA, 1); + + run = vcpu->run; + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR); + TEST_ASSERT(run->internal.suberror == KVM_INTERNAL_ERROR_DELIVERY_EV, + "Unexpected suberror = %d", vcpu->run->internal.suberror); + TEST_ASSERT(run->internal.ndata != 4, "Unexpected internal error data array size = %d", + run->internal.ndata); + + /* The reported GPA should be IDT base + offset of the GP vector */ + expected_gpa = MEM_REGION_GPA + GP_VECTOR * sizeof(struct idt_entry); + + TEST_ASSERT(run->internal.data[3] == expected_gpa, + "Unexpected GPA = %llx (expected %lx)", + vcpu->run->internal.data[3], expected_gpa); + + kvm_vm_free(vm); +} #endif int main(int argc, char *argv[]) @@ -568,6 +618,7 @@ int main(int argc, char *argv[]) * KVM_RUN fails with ENOEXEC or EFAULT. */ test_zero_memory_regions(); + test_mmio_during_vectoring(); #endif test_invalid_memory_region_flags(); -- cgit v1.2.3 From b083cc815376a8ccfba6535b4d59a396b77601d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Thu, 12 Dec 2024 18:42:18 +0100 Subject: selftests/exec: Add 32 tests for AT_EXECVE_CHECK and exec securebits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Test that checks performed by execveat(..., AT_EXECVE_CHECK) are consistent with noexec mount points and file execute permissions. Test that SECBIT_EXEC_RESTRICT_FILE and SECBIT_EXEC_DENY_INTERACTIVE are inherited by child processes and that they can be pinned with the appropriate SECBIT_EXEC_RESTRICT_FILE_LOCKED and SECBIT_EXEC_DENY_INTERACTIVE_LOCKED bits. Cc: Al Viro Cc: Christian Brauner Cc: Kees Cook Cc: Paul Moore Cc: Serge Hallyn Signed-off-by: Mickaël Salaün Link: https://lore.kernel.org/r/20241212174223.389435-4-mic@digikod.net Signed-off-by: Kees Cook --- tools/testing/selftests/exec/.gitignore | 2 + tools/testing/selftests/exec/Makefile | 7 + tools/testing/selftests/exec/check-exec.c | 456 ++++++++++++++++++++++++++++++ tools/testing/selftests/exec/config | 2 + tools/testing/selftests/exec/false.c | 5 + 5 files changed, 472 insertions(+) create mode 100644 tools/testing/selftests/exec/check-exec.c create mode 100644 tools/testing/selftests/exec/config create mode 100644 tools/testing/selftests/exec/false.c (limited to 'tools') diff --git a/tools/testing/selftests/exec/.gitignore b/tools/testing/selftests/exec/.gitignore index a0dc5d4bf733..a32c63bb4df1 100644 --- a/tools/testing/selftests/exec/.gitignore +++ b/tools/testing/selftests/exec/.gitignore @@ -9,6 +9,8 @@ execveat.ephemeral execveat.denatured non-regular null-argv +/check-exec +/false /load_address.* !load_address.c /recursion-depth diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile index ba012bc5aab9..8713d1c862ae 100644 --- a/tools/testing/selftests/exec/Makefile +++ b/tools/testing/selftests/exec/Makefile @@ -1,6 +1,9 @@ # SPDX-License-Identifier: GPL-2.0 CFLAGS = -Wall CFLAGS += -Wno-nonnull +CFLAGS += $(KHDR_INCLUDES) + +LDLIBS += -lcap ALIGNS := 0x1000 0x200000 0x1000000 ALIGN_PIES := $(patsubst %,load_address.%,$(ALIGNS)) @@ -9,12 +12,14 @@ ALIGNMENT_TESTS := $(ALIGN_PIES) $(ALIGN_STATIC_PIES) TEST_PROGS := binfmt_script.py TEST_GEN_PROGS := execveat non-regular $(ALIGNMENT_TESTS) +TEST_GEN_PROGS_EXTENDED := false TEST_GEN_FILES := execveat.symlink execveat.denatured script subdir # Makefile is a run-time dependency, since it's accessed by the execveat test TEST_FILES := Makefile TEST_GEN_PROGS += recursion-depth TEST_GEN_PROGS += null-argv +TEST_GEN_PROGS += check-exec EXTRA_CLEAN := $(OUTPUT)/subdir.moved $(OUTPUT)/execveat.moved $(OUTPUT)/xxxxx* \ $(OUTPUT)/S_I*.test @@ -38,3 +43,5 @@ $(OUTPUT)/load_address.0x%: load_address.c $(OUTPUT)/load_address.static.0x%: load_address.c $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=$(lastword $(subst ., ,$@)) \ -fPIE -static-pie $< -o $@ +$(OUTPUT)/false: false.c + $(CC) $(CFLAGS) $(LDFLAGS) -static $< -o $@ diff --git a/tools/testing/selftests/exec/check-exec.c b/tools/testing/selftests/exec/check-exec.c new file mode 100644 index 000000000000..4d3f4525e1e1 --- /dev/null +++ b/tools/testing/selftests/exec/check-exec.c @@ -0,0 +1,456 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test execveat(2) with AT_EXECVE_CHECK, and prctl(2) with + * SECBIT_EXEC_RESTRICT_FILE, SECBIT_EXEC_DENY_INTERACTIVE, and their locked + * counterparts. + * + * Copyright © 2018-2020 ANSSI + * Copyright © 2024 Microsoft Corporation + * + * Author: Mickaël Salaün + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Defines AT_EXECVE_CHECK without type conflicts. */ +#define _ASM_GENERIC_FCNTL_H +#include + +#include "../kselftest_harness.h" + +static void drop_privileges(struct __test_metadata *const _metadata) +{ + const unsigned int noroot = SECBIT_NOROOT | SECBIT_NOROOT_LOCKED; + cap_t cap_p; + + if ((cap_get_secbits() & noroot) != noroot) + EXPECT_EQ(0, cap_set_secbits(noroot)); + + cap_p = cap_get_proc(); + EXPECT_NE(NULL, cap_p); + EXPECT_NE(-1, cap_clear(cap_p)); + + /* + * Drops everything, especially CAP_SETPCAP, CAP_DAC_OVERRIDE, and + * CAP_DAC_READ_SEARCH. + */ + EXPECT_NE(-1, cap_set_proc(cap_p)); + EXPECT_NE(-1, cap_free(cap_p)); +} + +static int test_secbits_set(const unsigned int secbits) +{ + int err; + + err = prctl(PR_SET_SECUREBITS, secbits); + if (err) + return errno; + return 0; +} + +FIXTURE(access) +{ + int memfd, pipefd; + int pipe_fds[2], socket_fds[2]; +}; + +FIXTURE_VARIANT(access) +{ + const bool mount_exec; + const bool file_exec; +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(access, mount_exec_file_exec) { + /* clang-format on */ + .mount_exec = true, + .file_exec = true, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(access, mount_exec_file_noexec) { + /* clang-format on */ + .mount_exec = true, + .file_exec = false, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(access, mount_noexec_file_exec) { + /* clang-format on */ + .mount_exec = false, + .file_exec = true, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(access, mount_noexec_file_noexec) { + /* clang-format on */ + .mount_exec = false, + .file_exec = false, +}; + +static const char binary_path[] = "./false"; +static const char workdir_path[] = "./test-mount"; +static const char reg_file_path[] = "./test-mount/regular_file"; +static const char dir_path[] = "./test-mount/directory"; +static const char block_dev_path[] = "./test-mount/block_device"; +static const char char_dev_path[] = "./test-mount/character_device"; +static const char fifo_path[] = "./test-mount/fifo"; + +FIXTURE_SETUP(access) +{ + int procfd_path_size; + static const char path_template[] = "/proc/self/fd/%d"; + char procfd_path[sizeof(path_template) + 10]; + + /* Makes sure we are not already restricted nor locked. */ + EXPECT_EQ(0, test_secbits_set(0)); + + /* + * Cleans previous workspace if any error previously happened (don't + * check errors). + */ + umount(workdir_path); + rmdir(workdir_path); + + /* Creates a clean mount point. */ + ASSERT_EQ(0, mkdir(workdir_path, 00700)); + ASSERT_EQ(0, mount("test", workdir_path, "tmpfs", + MS_MGC_VAL | (variant->mount_exec ? 0 : MS_NOEXEC), + "mode=0700,size=9m")); + + /* Creates a regular file. */ + ASSERT_EQ(0, mknod(reg_file_path, + S_IFREG | (variant->file_exec ? 0700 : 0600), 0)); + /* Creates a directory. */ + ASSERT_EQ(0, mkdir(dir_path, variant->file_exec ? 0700 : 0600)); + /* Creates a character device: /dev/null. */ + ASSERT_EQ(0, mknod(char_dev_path, S_IFCHR | 0400, makedev(1, 3))); + /* Creates a block device: /dev/loop0 */ + ASSERT_EQ(0, mknod(block_dev_path, S_IFBLK | 0400, makedev(7, 0))); + /* Creates a fifo. */ + ASSERT_EQ(0, mknod(fifo_path, S_IFIFO | 0600, 0)); + + /* Creates a regular file without user mount point. */ + self->memfd = memfd_create("test-exec-probe", MFD_CLOEXEC); + ASSERT_LE(0, self->memfd); + /* Sets mode, which must be ignored by the exec check. */ + ASSERT_EQ(0, fchmod(self->memfd, variant->file_exec ? 0700 : 0600)); + + /* Creates a pipefs file descriptor. */ + ASSERT_EQ(0, pipe(self->pipe_fds)); + procfd_path_size = snprintf(procfd_path, sizeof(procfd_path), + path_template, self->pipe_fds[0]); + ASSERT_LT(procfd_path_size, sizeof(procfd_path)); + self->pipefd = open(procfd_path, O_RDWR | O_CLOEXEC); + ASSERT_LE(0, self->pipefd); + ASSERT_EQ(0, fchmod(self->pipefd, variant->file_exec ? 0700 : 0600)); + + /* Creates a socket file descriptor. */ + ASSERT_EQ(0, socketpair(AF_UNIX, SOCK_DGRAM | SOCK_CLOEXEC, 0, + self->socket_fds)); +} + +FIXTURE_TEARDOWN_PARENT(access) +{ + /* There is no need to unlink the test files. */ + EXPECT_EQ(0, umount(workdir_path)); + EXPECT_EQ(0, rmdir(workdir_path)); +} + +static void fill_exec_fd(struct __test_metadata *_metadata, const int fd_out) +{ + char buf[1024]; + size_t len; + int fd_in; + + fd_in = open(binary_path, O_CLOEXEC | O_RDONLY); + ASSERT_LE(0, fd_in); + /* Cannot use copy_file_range(2) because of EXDEV. */ + len = read(fd_in, buf, sizeof(buf)); + EXPECT_LE(0, len); + while (len > 0) { + EXPECT_EQ(len, write(fd_out, buf, len)) + { + TH_LOG("Failed to write: %s (%d)", strerror(errno), + errno); + } + len = read(fd_in, buf, sizeof(buf)); + EXPECT_LE(0, len); + } + EXPECT_EQ(0, close(fd_in)); +} + +static void fill_exec_path(struct __test_metadata *_metadata, + const char *const path) +{ + int fd_out; + + fd_out = open(path, O_CLOEXEC | O_WRONLY); + ASSERT_LE(0, fd_out) + { + TH_LOG("Failed to open %s: %s", path, strerror(errno)); + } + fill_exec_fd(_metadata, fd_out); + EXPECT_EQ(0, close(fd_out)); +} + +static void test_exec_fd(struct __test_metadata *_metadata, const int fd, + const int err_code) +{ + char *const argv[] = { "", NULL }; + int access_ret, access_errno; + + /* + * If we really execute fd, filled with the "false" binary, the current + * thread will exits with an error, which will be interpreted by the + * test framework as an error. With AT_EXECVE_CHECK, we only check a + * potential successful execution. + */ + access_ret = + execveat(fd, "", argv, NULL, AT_EMPTY_PATH | AT_EXECVE_CHECK); + access_errno = errno; + if (err_code) { + EXPECT_EQ(-1, access_ret); + EXPECT_EQ(err_code, access_errno) + { + TH_LOG("Wrong error for execveat(2): %s (%d)", + strerror(access_errno), errno); + } + } else { + EXPECT_EQ(0, access_ret) + { + TH_LOG("Access denied: %s", strerror(access_errno)); + } + } +} + +static void test_exec_path(struct __test_metadata *_metadata, + const char *const path, const int err_code) +{ + int flags = O_CLOEXEC; + int fd; + + /* Do not block on pipes. */ + if (path == fifo_path) + flags |= O_NONBLOCK; + + fd = open(path, flags | O_RDONLY); + ASSERT_LE(0, fd) + { + TH_LOG("Failed to open %s: %s", path, strerror(errno)); + } + test_exec_fd(_metadata, fd, err_code); + EXPECT_EQ(0, close(fd)); +} + +/* Tests that we don't get ENOEXEC. */ +TEST_F(access, regular_file_empty) +{ + const int exec = variant->mount_exec && variant->file_exec; + + test_exec_path(_metadata, reg_file_path, exec ? 0 : EACCES); + + drop_privileges(_metadata); + test_exec_path(_metadata, reg_file_path, exec ? 0 : EACCES); +} + +TEST_F(access, regular_file_elf) +{ + const int exec = variant->mount_exec && variant->file_exec; + + fill_exec_path(_metadata, reg_file_path); + + test_exec_path(_metadata, reg_file_path, exec ? 0 : EACCES); + + drop_privileges(_metadata); + test_exec_path(_metadata, reg_file_path, exec ? 0 : EACCES); +} + +/* Tests that we don't get ENOEXEC. */ +TEST_F(access, memfd_empty) +{ + const int exec = variant->file_exec; + + test_exec_fd(_metadata, self->memfd, exec ? 0 : EACCES); + + drop_privileges(_metadata); + test_exec_fd(_metadata, self->memfd, exec ? 0 : EACCES); +} + +TEST_F(access, memfd_elf) +{ + const int exec = variant->file_exec; + + fill_exec_fd(_metadata, self->memfd); + + test_exec_fd(_metadata, self->memfd, exec ? 0 : EACCES); + + drop_privileges(_metadata); + test_exec_fd(_metadata, self->memfd, exec ? 0 : EACCES); +} + +TEST_F(access, non_regular_files) +{ + test_exec_path(_metadata, dir_path, EACCES); + test_exec_path(_metadata, block_dev_path, EACCES); + test_exec_path(_metadata, char_dev_path, EACCES); + test_exec_path(_metadata, fifo_path, EACCES); + test_exec_fd(_metadata, self->socket_fds[0], EACCES); + test_exec_fd(_metadata, self->pipefd, EACCES); +} + +/* clang-format off */ +FIXTURE(secbits) {}; +/* clang-format on */ + +FIXTURE_VARIANT(secbits) +{ + const bool is_privileged; + const int error; +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(secbits, priv) { + /* clang-format on */ + .is_privileged = true, + .error = 0, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(secbits, unpriv) { + /* clang-format on */ + .is_privileged = false, + .error = EPERM, +}; + +FIXTURE_SETUP(secbits) +{ + /* Makes sure no exec bits are set. */ + EXPECT_EQ(0, test_secbits_set(0)); + EXPECT_EQ(0, prctl(PR_GET_SECUREBITS)); + + if (!variant->is_privileged) + drop_privileges(_metadata); +} + +FIXTURE_TEARDOWN(secbits) +{ +} + +TEST_F(secbits, legacy) +{ + EXPECT_EQ(variant->error, test_secbits_set(0)); +} + +#define CHILD(...) \ + do { \ + pid_t child = vfork(); \ + EXPECT_LE(0, child); \ + if (child == 0) { \ + __VA_ARGS__; \ + _exit(0); \ + } \ + } while (0) + +TEST_F(secbits, exec) +{ + unsigned int secbits = prctl(PR_GET_SECUREBITS); + + secbits |= SECBIT_EXEC_RESTRICT_FILE; + EXPECT_EQ(0, test_secbits_set(secbits)); + EXPECT_EQ(secbits, prctl(PR_GET_SECUREBITS)); + CHILD(EXPECT_EQ(secbits, prctl(PR_GET_SECUREBITS))); + + secbits |= SECBIT_EXEC_DENY_INTERACTIVE; + EXPECT_EQ(0, test_secbits_set(secbits)); + EXPECT_EQ(secbits, prctl(PR_GET_SECUREBITS)); + CHILD(EXPECT_EQ(secbits, prctl(PR_GET_SECUREBITS))); + + secbits &= ~(SECBIT_EXEC_RESTRICT_FILE | SECBIT_EXEC_DENY_INTERACTIVE); + EXPECT_EQ(0, test_secbits_set(secbits)); + EXPECT_EQ(secbits, prctl(PR_GET_SECUREBITS)); + CHILD(EXPECT_EQ(secbits, prctl(PR_GET_SECUREBITS))); +} + +TEST_F(secbits, check_locked_set) +{ + unsigned int secbits = prctl(PR_GET_SECUREBITS); + + secbits |= SECBIT_EXEC_RESTRICT_FILE; + EXPECT_EQ(0, test_secbits_set(secbits)); + secbits |= SECBIT_EXEC_RESTRICT_FILE_LOCKED; + EXPECT_EQ(0, test_secbits_set(secbits)); + + /* Checks lock set but unchanged. */ + EXPECT_EQ(variant->error, test_secbits_set(secbits)); + CHILD(EXPECT_EQ(variant->error, test_secbits_set(secbits))); + + secbits &= ~SECBIT_EXEC_RESTRICT_FILE; + EXPECT_EQ(EPERM, test_secbits_set(0)); + CHILD(EXPECT_EQ(EPERM, test_secbits_set(0))); +} + +TEST_F(secbits, check_locked_unset) +{ + unsigned int secbits = prctl(PR_GET_SECUREBITS); + + secbits |= SECBIT_EXEC_RESTRICT_FILE_LOCKED; + EXPECT_EQ(0, test_secbits_set(secbits)); + + /* Checks lock unset but unchanged. */ + EXPECT_EQ(variant->error, test_secbits_set(secbits)); + CHILD(EXPECT_EQ(variant->error, test_secbits_set(secbits))); + + secbits &= ~SECBIT_EXEC_RESTRICT_FILE; + EXPECT_EQ(EPERM, test_secbits_set(0)); + CHILD(EXPECT_EQ(EPERM, test_secbits_set(0))); +} + +TEST_F(secbits, restrict_locked_set) +{ + unsigned int secbits = prctl(PR_GET_SECUREBITS); + + secbits |= SECBIT_EXEC_DENY_INTERACTIVE; + EXPECT_EQ(0, test_secbits_set(secbits)); + secbits |= SECBIT_EXEC_DENY_INTERACTIVE_LOCKED; + EXPECT_EQ(0, test_secbits_set(secbits)); + + /* Checks lock set but unchanged. */ + EXPECT_EQ(variant->error, test_secbits_set(secbits)); + CHILD(EXPECT_EQ(variant->error, test_secbits_set(secbits))); + + secbits &= ~SECBIT_EXEC_DENY_INTERACTIVE; + EXPECT_EQ(EPERM, test_secbits_set(0)); + CHILD(EXPECT_EQ(EPERM, test_secbits_set(0))); +} + +TEST_F(secbits, restrict_locked_unset) +{ + unsigned int secbits = prctl(PR_GET_SECUREBITS); + + secbits |= SECBIT_EXEC_DENY_INTERACTIVE_LOCKED; + EXPECT_EQ(0, test_secbits_set(secbits)); + + /* Checks lock unset but unchanged. */ + EXPECT_EQ(variant->error, test_secbits_set(secbits)); + CHILD(EXPECT_EQ(variant->error, test_secbits_set(secbits))); + + secbits &= ~SECBIT_EXEC_DENY_INTERACTIVE; + EXPECT_EQ(EPERM, test_secbits_set(0)); + CHILD(EXPECT_EQ(EPERM, test_secbits_set(0))); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/exec/config b/tools/testing/selftests/exec/config new file mode 100644 index 000000000000..c308079867b3 --- /dev/null +++ b/tools/testing/selftests/exec/config @@ -0,0 +1,2 @@ +CONFIG_BLK_DEV=y +CONFIG_BLK_DEV_LOOP=y diff --git a/tools/testing/selftests/exec/false.c b/tools/testing/selftests/exec/false.c new file mode 100644 index 000000000000..104383ec3a79 --- /dev/null +++ b/tools/testing/selftests/exec/false.c @@ -0,0 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0 +int main(void) +{ + return 1; +} -- cgit v1.2.3 From 0e7f90f34cf79bf329d6d08edea3403544498843 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Thu, 12 Dec 2024 18:42:19 +0100 Subject: selftests/landlock: Add tests for execveat + AT_EXECVE_CHECK MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extend layout1.execute with the new AT_EXECVE_CHECK flag. The semantic with AT_EXECVE_CHECK is the same as with a simple execve(2), LANDLOCK_ACCESS_FS_EXECUTE is enforced the same way. Cc: Günther Noack Cc: Kees Cook Cc: Paul Moore Signed-off-by: Mickaël Salaün Link: https://lore.kernel.org/r/20241212174223.389435-5-mic@digikod.net Signed-off-by: Kees Cook --- tools/testing/selftests/landlock/fs_test.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c index 6788762188fe..cd66901be612 100644 --- a/tools/testing/selftests/landlock/fs_test.c +++ b/tools/testing/selftests/landlock/fs_test.c @@ -37,6 +37,10 @@ #include #include +/* Defines AT_EXECVE_CHECK without type conflicts. */ +#define _ASM_GENERIC_FCNTL_H +#include + #include "common.h" #ifndef renameat2 @@ -2008,6 +2012,22 @@ static void test_execute(struct __test_metadata *const _metadata, const int err, }; } +static void test_check_exec(struct __test_metadata *const _metadata, + const int err, const char *const path) +{ + int ret; + char *const argv[] = { (char *)path, NULL }; + + ret = execveat(AT_FDCWD, path, argv, NULL, + AT_EMPTY_PATH | AT_EXECVE_CHECK); + if (err) { + EXPECT_EQ(-1, ret); + EXPECT_EQ(errno, err); + } else { + EXPECT_EQ(0, ret); + } +} + TEST_F_FORK(layout1, execute) { const struct rule rules[] = { @@ -2025,20 +2045,27 @@ TEST_F_FORK(layout1, execute) copy_binary(_metadata, file1_s1d2); copy_binary(_metadata, file1_s1d3); + /* Checks before file1_s1d1 being denied. */ + test_execute(_metadata, 0, file1_s1d1); + test_check_exec(_metadata, 0, file1_s1d1); + enforce_ruleset(_metadata, ruleset_fd); ASSERT_EQ(0, close(ruleset_fd)); ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY)); ASSERT_EQ(0, test_open(file1_s1d1, O_RDONLY)); test_execute(_metadata, EACCES, file1_s1d1); + test_check_exec(_metadata, EACCES, file1_s1d1); ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY)); ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY)); test_execute(_metadata, 0, file1_s1d2); + test_check_exec(_metadata, 0, file1_s1d2); ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY)); ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY)); test_execute(_metadata, 0, file1_s1d3); + test_check_exec(_metadata, 0, file1_s1d3); } TEST_F_FORK(layout1, link) -- cgit v1.2.3 From 3e707b07f582c12ed78fa5516a97bf701bf0634c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Thu, 12 Dec 2024 18:42:21 +0100 Subject: selftests: ktap_helpers: Fix uninitialized variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit __ktap_test() may be called without the optional third argument which is an issue for scripts using `set -u` to detect uninitialized variables and potential bugs. Fix this optional "directive" argument by either using the third argument or an empty string. This is required for the next commit to properly test script execution control. Cc: Kees Cook Cc: Nícolas F. R. A. Prado Cc: Shuah Khan Fixes: 14571ab1ad21 ("kselftest: Add new test for detecting unprobed Devicetree devices") Signed-off-by: Mickaël Salaün Link: https://lore.kernel.org/r/20241212174223.389435-7-mic@digikod.net Signed-off-by: Kees Cook --- tools/testing/selftests/kselftest/ktap_helpers.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/kselftest/ktap_helpers.sh b/tools/testing/selftests/kselftest/ktap_helpers.sh index 79a125eb24c2..14e7f3ec3f84 100644 --- a/tools/testing/selftests/kselftest/ktap_helpers.sh +++ b/tools/testing/selftests/kselftest/ktap_helpers.sh @@ -40,7 +40,7 @@ ktap_skip_all() { __ktap_test() { result="$1" description="$2" - directive="$3" # optional + directive="${3:-}" # optional local directive_str= [ ! -z "$directive" ] && directive_str="# $directive" -- cgit v1.2.3 From 2a69962be4a7e97ab347e05826480a3352c6fbc8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Thu, 12 Dec 2024 18:42:22 +0100 Subject: samples/check-exec: Add an enlighten "inc" interpreter and 28 tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a very simple script interpreter called "inc" that can evaluate two different commands (one per line): - "?" to initialize a counter from user's input; - "+" to increment the counter (which is set to 0 by default). It is enlighten to only interpret executable files according to AT_EXECVE_CHECK and the related securebits: # Executing a script with RESTRICT_FILE is only allowed if the script # is executable: ./set-exec -f -- ./inc script-exec.inc # Allowed ./set-exec -f -- ./inc script-noexec.inc # Denied # Executing stdin with DENY_INTERACTIVE is only allowed if stdin is an # executable regular file: ./set-exec -i -- ./inc -i < script-exec.inc # Allowed ./set-exec -i -- ./inc -i < script-noexec.inc # Denied # However, a pipe is not executable and it is then denied: cat script-noexec.inc | ./set-exec -i -- ./inc -i # Denied # Executing raw data (e.g. command argument) with DENY_INTERACTIVE is # always denied. ./set-exec -i -- ./inc -c "+" # Denied ./inc -c "$( Cc: Christian Brauner Cc: Kees Cook Cc: Paul Moore Cc: Serge Hallyn Signed-off-by: Mickaël Salaün Link: https://lore.kernel.org/r/20241212174223.389435-8-mic@digikod.net Signed-off-by: Kees Cook --- tools/testing/selftests/exec/.gitignore | 2 + tools/testing/selftests/exec/Makefile | 14 +- tools/testing/selftests/exec/check-exec-tests.sh | 205 +++++++++++++++++++++++ 3 files changed, 219 insertions(+), 2 deletions(-) create mode 100755 tools/testing/selftests/exec/check-exec-tests.sh (limited to 'tools') diff --git a/tools/testing/selftests/exec/.gitignore b/tools/testing/selftests/exec/.gitignore index a32c63bb4df1..7f3d1ae762ec 100644 --- a/tools/testing/selftests/exec/.gitignore +++ b/tools/testing/selftests/exec/.gitignore @@ -11,9 +11,11 @@ non-regular null-argv /check-exec /false +/inc /load_address.* !load_address.c /recursion-depth +/set-exec xxxxxxxx* pipe S_I*.test diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile index 8713d1c862ae..45a3cfc435cf 100644 --- a/tools/testing/selftests/exec/Makefile +++ b/tools/testing/selftests/exec/Makefile @@ -10,9 +10,9 @@ ALIGN_PIES := $(patsubst %,load_address.%,$(ALIGNS)) ALIGN_STATIC_PIES := $(patsubst %,load_address.static.%,$(ALIGNS)) ALIGNMENT_TESTS := $(ALIGN_PIES) $(ALIGN_STATIC_PIES) -TEST_PROGS := binfmt_script.py +TEST_PROGS := binfmt_script.py check-exec-tests.sh TEST_GEN_PROGS := execveat non-regular $(ALIGNMENT_TESTS) -TEST_GEN_PROGS_EXTENDED := false +TEST_GEN_PROGS_EXTENDED := false inc set-exec script-exec.inc script-noexec.inc TEST_GEN_FILES := execveat.symlink execveat.denatured script subdir # Makefile is a run-time dependency, since it's accessed by the execveat test TEST_FILES := Makefile @@ -26,6 +26,8 @@ EXTRA_CLEAN := $(OUTPUT)/subdir.moved $(OUTPUT)/execveat.moved $(OUTPUT)/xxxxx* include ../lib.mk +CHECK_EXEC_SAMPLES := $(top_srcdir)/samples/check-exec + $(OUTPUT)/subdir: mkdir -p $@ $(OUTPUT)/script: Makefile @@ -45,3 +47,11 @@ $(OUTPUT)/load_address.static.0x%: load_address.c -fPIE -static-pie $< -o $@ $(OUTPUT)/false: false.c $(CC) $(CFLAGS) $(LDFLAGS) -static $< -o $@ +$(OUTPUT)/inc: $(CHECK_EXEC_SAMPLES)/inc.c + $(CC) $(CFLAGS) $(LDFLAGS) $< -o $@ +$(OUTPUT)/set-exec: $(CHECK_EXEC_SAMPLES)/set-exec.c + $(CC) $(CFLAGS) $(LDFLAGS) $< -o $@ +$(OUTPUT)/script-exec.inc: $(CHECK_EXEC_SAMPLES)/script-exec.inc + cp $< $@ +$(OUTPUT)/script-noexec.inc: $(CHECK_EXEC_SAMPLES)/script-noexec.inc + cp $< $@ diff --git a/tools/testing/selftests/exec/check-exec-tests.sh b/tools/testing/selftests/exec/check-exec-tests.sh new file mode 100755 index 000000000000..87102906ae3c --- /dev/null +++ b/tools/testing/selftests/exec/check-exec-tests.sh @@ -0,0 +1,205 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test the "inc" interpreter. +# +# See include/uapi/linux/securebits.h, include/uapi/linux/fcntl.h and +# samples/check-exec/inc.c +# +# Copyright © 2024 Microsoft Corporation + +set -u -e -o pipefail + +EXPECTED_OUTPUT="1" +exec 2>/dev/null + +DIR="$(dirname $(readlink -f "$0"))" +source "${DIR}"/../kselftest/ktap_helpers.sh + +exec_direct() { + local expect="$1" + local script="$2" + shift 2 + local ret=0 + local out + + # Updates PATH for `env` to execute the `inc` interpreter. + out="$(PATH="." "$@" "${script}")" || ret=$? + + if [[ ${ret} -ne ${expect} ]]; then + echo "ERROR: Wrong expectation for direct file execution: ${ret}" + return 1 + fi + if [[ ${ret} -eq 0 && "${out}" != "${EXPECTED_OUTPUT}" ]]; then + echo "ERROR: Wrong output for direct file execution: ${out}" + return 1 + fi +} + +exec_indirect() { + local expect="$1" + local script="$2" + shift 2 + local ret=0 + local out + + # Script passed as argument. + out="$("$@" ./inc "${script}")" || ret=$? + + if [[ ${ret} -ne ${expect} ]]; then + echo "ERROR: Wrong expectation for indirect file execution: ${ret}" + return 1 + fi + if [[ ${ret} -eq 0 && "${out}" != "${EXPECTED_OUTPUT}" ]]; then + echo "ERROR: Wrong output for indirect file execution: ${out}" + return 1 + fi +} + +exec_stdin_reg() { + local expect="$1" + local script="$2" + shift 2 + local ret=0 + local out + + # Executing stdin must be allowed if the related file is executable. + out="$("$@" ./inc -i < "${script}")" || ret=$? + + if [[ ${ret} -ne ${expect} ]]; then + echo "ERROR: Wrong expectation for stdin regular file execution: ${ret}" + return 1 + fi + if [[ ${ret} -eq 0 && "${out}" != "${EXPECTED_OUTPUT}" ]]; then + echo "ERROR: Wrong output for stdin regular file execution: ${out}" + return 1 + fi +} + +exec_stdin_pipe() { + local expect="$1" + shift + local ret=0 + local out + + # A pipe is not executable. + out="$(cat script-exec.inc | "$@" ./inc -i)" || ret=$? + + if [[ ${ret} -ne ${expect} ]]; then + echo "ERROR: Wrong expectation for stdin pipe execution: ${ret}" + return 1 + fi +} + +exec_argument() { + local expect="$1" + local ret=0 + shift + local out + + # Script not coming from a file must not be executed. + out="$("$@" ./inc -c "$(< script-exec.inc)")" || ret=$? + + if [[ ${ret} -ne ${expect} ]]; then + echo "ERROR: Wrong expectation for arbitrary argument execution: ${ret}" + return 1 + fi + if [[ ${ret} -eq 0 && "${out}" != "${EXPECTED_OUTPUT}" ]]; then + echo "ERROR: Wrong output for arbitrary argument execution: ${out}" + return 1 + fi +} + +exec_interactive() { + exec_stdin_pipe "$@" + exec_argument "$@" +} + +ktap_test() { + ktap_test_result "$*" "$@" +} + +ktap_print_header +ktap_set_plan 28 + +# Without secbit configuration, nothing is changed. + +ktap_print_msg "By default, executable scripts are allowed to be interpreted and executed." +ktap_test exec_direct 0 script-exec.inc +ktap_test exec_indirect 0 script-exec.inc + +ktap_print_msg "By default, executable stdin is allowed to be interpreted." +ktap_test exec_stdin_reg 0 script-exec.inc + +ktap_print_msg "By default, non-executable scripts are allowed to be interpreted, but not directly executed." +# We get 126 because of direct execution by Bash. +ktap_test exec_direct 126 script-noexec.inc +ktap_test exec_indirect 0 script-noexec.inc + +ktap_print_msg "By default, non-executable stdin is allowed to be interpreted." +ktap_test exec_stdin_reg 0 script-noexec.inc + +ktap_print_msg "By default, interactive commands are allowed to be interpreted." +ktap_test exec_interactive 0 + +# With only file restriction: protect non-malicious users from inadvertent errors (e.g. python ~/Downloads/*.py). + +ktap_print_msg "With -f, executable scripts are allowed to be interpreted and executed." +ktap_test exec_direct 0 script-exec.inc ./set-exec -f -- +ktap_test exec_indirect 0 script-exec.inc ./set-exec -f -- + +ktap_print_msg "With -f, executable stdin is allowed to be interpreted." +ktap_test exec_stdin_reg 0 script-exec.inc ./set-exec -f -- + +ktap_print_msg "With -f, non-executable scripts are not allowed to be executed nor interpreted." +# Direct execution of non-executable script is alwayse denied by the kernel. +ktap_test exec_direct 1 script-noexec.inc ./set-exec -f -- +ktap_test exec_indirect 1 script-noexec.inc ./set-exec -f -- + +ktap_print_msg "With -f, non-executable stdin is allowed to be interpreted." +ktap_test exec_stdin_reg 0 script-noexec.inc ./set-exec -f -- + +ktap_print_msg "With -f, interactive commands are allowed to be interpreted." +ktap_test exec_interactive 0 ./set-exec -f -- + +# With only denied interactive commands: check or monitor script content (e.g. with LSM). + +ktap_print_msg "With -i, executable scripts are allowed to be interpreted and executed." +ktap_test exec_direct 0 script-exec.inc ./set-exec -i -- +ktap_test exec_indirect 0 script-exec.inc ./set-exec -i -- + +ktap_print_msg "With -i, executable stdin is allowed to be interpreted." +ktap_test exec_stdin_reg 0 script-exec.inc ./set-exec -i -- + +ktap_print_msg "With -i, non-executable scripts are allowed to be interpreted, but not directly executed." +# Direct execution of non-executable script is alwayse denied by the kernel. +ktap_test exec_direct 1 script-noexec.inc ./set-exec -i -- +ktap_test exec_indirect 0 script-noexec.inc ./set-exec -i -- + +ktap_print_msg "With -i, non-executable stdin is not allowed to be interpreted." +ktap_test exec_stdin_reg 1 script-noexec.inc ./set-exec -i -- + +ktap_print_msg "With -i, interactive commands are not allowed to be interpreted." +ktap_test exec_interactive 1 ./set-exec -i -- + +# With both file restriction and denied interactive commands: only allow executable scripts. + +ktap_print_msg "With -fi, executable scripts are allowed to be interpreted and executed." +ktap_test exec_direct 0 script-exec.inc ./set-exec -fi -- +ktap_test exec_indirect 0 script-exec.inc ./set-exec -fi -- + +ktap_print_msg "With -fi, executable stdin is allowed to be interpreted." +ktap_test exec_stdin_reg 0 script-exec.inc ./set-exec -fi -- + +ktap_print_msg "With -fi, non-executable scripts are not allowed to be interpreted nor executed." +# Direct execution of non-executable script is alwayse denied by the kernel. +ktap_test exec_direct 1 script-noexec.inc ./set-exec -fi -- +ktap_test exec_indirect 1 script-noexec.inc ./set-exec -fi -- + +ktap_print_msg "With -fi, non-executable stdin is not allowed to be interpreted." +ktap_test exec_stdin_reg 1 script-noexec.inc ./set-exec -fi -- + +ktap_print_msg "With -fi, interactive commands are not allowed to be interpreted." +ktap_test exec_interactive 1 ./set-exec -fi -- + +ktap_finished -- cgit v1.2.3 From 6a75f19af16ff482cfd6085c77123aa0f464f8dd Mon Sep 17 00:00:00 2001 From: "Isaac J. Manjarres" Date: Thu, 5 Dec 2024 11:29:41 -0800 Subject: selftests/memfd: run sysctl tests when PID namespace support is enabled The sysctl tests for vm.memfd_noexec rely on the kernel to support PID namespaces (i.e. the kernel is built with CONFIG_PID_NS=y). If the kernel the test runs on does not support PID namespaces, the first sysctl test will fail when attempting to spawn a new thread in a new PID namespace, abort the test, preventing the remaining tests from being run. This is not desirable, as not all kernels need PID namespaces, but can still use the other features provided by memfd. Therefore, only run the sysctl tests if the kernel supports PID namespaces. Otherwise, skip those tests and emit an informative message to let the user know why the sysctl tests are not being run. Link: https://lkml.kernel.org/r/20241205192943.3228757-1-isaacmanjarres@google.com Fixes: 11f75a01448f ("selftests/memfd: add tests for MFD_NOEXEC_SEAL MFD_EXEC") Signed-off-by: Isaac J. Manjarres Reviewed-by: Jeff Xu Cc: Suren Baghdasaryan Cc: Kalesh Singh Cc: [6.6+] Signed-off-by: Andrew Morton --- tools/testing/selftests/memfd/memfd_test.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c index 95af2d78fd31..0a0b55516028 100644 --- a/tools/testing/selftests/memfd/memfd_test.c +++ b/tools/testing/selftests/memfd/memfd_test.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -1557,6 +1558,11 @@ static void test_share_fork(char *banner, char *b_suffix) close(fd); } +static bool pid_ns_supported(void) +{ + return access("/proc/self/ns/pid", F_OK) == 0; +} + int main(int argc, char **argv) { pid_t pid; @@ -1591,8 +1597,12 @@ int main(int argc, char **argv) test_seal_grow(); test_seal_resize(); - test_sysctl_simple(); - test_sysctl_nested(); + if (pid_ns_supported()) { + test_sysctl_simple(); + test_sysctl_nested(); + } else { + printf("PID namespaces are not supported; skipping sysctl tests\n"); + } test_share_dup("SHARE-DUP", ""); test_share_mmap("SHARE-MMAP", ""); -- cgit v1.2.3 From a17975992cc11588767175247ccaae1213a8b582 Mon Sep 17 00:00:00 2001 From: Adrian Moreno Date: Tue, 17 Dec 2024 22:16:51 +0100 Subject: selftests: openvswitch: fix tcpdump execution Fix the way tcpdump is executed by: - Using the right variable for the namespace. Currently the use of the empty "ns" makes the command fail. - Waiting until it starts to capture to ensure the interesting traffic is caught on slow systems. - Using line-buffered output to ensure logs are available when the test is paused with "-p". Otherwise the last chunk of data might only be written when tcpdump is killed. Fixes: 74cc26f416b9 ("selftests: openvswitch: add interface support") Signed-off-by: Adrian Moreno Acked-by: Eelco Chaudron Link: https://patch.msgid.link/20241217211652.483016-1-amorenoz@redhat.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/openvswitch/openvswitch.sh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh index cc0bfae2bafa..960e1ab4dd04 100755 --- a/tools/testing/selftests/net/openvswitch/openvswitch.sh +++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh @@ -171,8 +171,10 @@ ovs_add_netns_and_veths () { ovs_add_if "$1" "$2" "$4" -u || return 1 fi - [ $TRACING -eq 1 ] && ovs_netns_spawn_daemon "$1" "$ns" \ - tcpdump -i any -s 65535 + if [ $TRACING -eq 1 ]; then + ovs_netns_spawn_daemon "$1" "$3" tcpdump -l -i any -s 6553 + ovs_wait grep -q "listening on any" ${ovs_dir}/stderr + fi return 0 } -- cgit v1.2.3 From 29d44cce324dab2bd86c447071a596262e7109b6 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Thu, 19 Dec 2024 19:15:06 +0800 Subject: selftests/bpf: Use asm constraint "m" for LoongArch Currently, LoongArch LLVM does not support the constraint "o" and no plan to support it, it only supports the similar constraint "m", so change the constraints from "nor" in the "else" case to arch-specific "nmr" to avoid the build error such as "unexpected asm memory constraint" for LoongArch. Fixes: 630301b0d59d ("selftests/bpf: Add basic USDT selftests") Suggested-by: Weining Lu Suggested-by: Li Chen Signed-off-by: Tiezhu Yang Signed-off-by: Daniel Borkmann Reviewed-by: Huacai Chen Cc: stable@vger.kernel.org Link: https://llvm.org/docs/LangRef.html#supported-constraint-code-list Link: https://github.com/llvm/llvm-project/blob/main/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp#L172 Link: https://lore.kernel.org/bpf/20241219111506.20643-1-yangtiezhu@loongson.cn --- tools/testing/selftests/bpf/sdt.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/sdt.h b/tools/testing/selftests/bpf/sdt.h index ca0162b4dc57..1fcfa5160231 100644 --- a/tools/testing/selftests/bpf/sdt.h +++ b/tools/testing/selftests/bpf/sdt.h @@ -102,6 +102,8 @@ # define STAP_SDT_ARG_CONSTRAINT nZr # elif defined __arm__ # define STAP_SDT_ARG_CONSTRAINT g +# elif defined __loongarch__ +# define STAP_SDT_ARG_CONSTRAINT nmr # else # define STAP_SDT_ARG_CONSTRAINT nor # endif -- cgit v1.2.3 From 716f2bca1ce93bb95364f1fc0555c1650507b588 Mon Sep 17 00:00:00 2001 From: Jerome Marchand Date: Wed, 18 Dec 2024 18:57:24 +0100 Subject: selftests/bpf: Fix compilation error in get_uprobe_offset() In get_uprobe_offset(), the call to procmap_query() use the constant PROCMAP_QUERY_VMA_EXECUTABLE, even if PROCMAP_QUERY is not defined. Define PROCMAP_QUERY_VMA_EXECUTABLE when PROCMAP_QUERY isn't. Fixes: 4e9e07603ecd ("selftests/bpf: make use of PROCMAP_QUERY ioctl if available") Signed-off-by: Jerome Marchand Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20241218175724.578884-1-jmarchan@redhat.com --- tools/testing/selftests/bpf/trace_helpers.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c index 2d742fdac6b9..81943c6254e6 100644 --- a/tools/testing/selftests/bpf/trace_helpers.c +++ b/tools/testing/selftests/bpf/trace_helpers.c @@ -293,6 +293,10 @@ static int procmap_query(int fd, const void *addr, __u32 query_flags, size_t *st return 0; } #else +# ifndef PROCMAP_QUERY_VMA_EXECUTABLE +# define PROCMAP_QUERY_VMA_EXECUTABLE 0x04 +# endif + static int procmap_query(int fd, const void *addr, __u32 query_flags, size_t *start, size_t *offset, int *flags) { return -EOPNOTSUPP; -- cgit v1.2.3 From 5760711e198d86bd0d0b9270a54a494ae9a501e0 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 16 Dec 2024 19:12:01 +0200 Subject: selftests: fib_rule_tests: Add flow label selector match tests Add tests for the new FIB rule flow label selector. Test both good and bad flows and with both input and output routes. # ./fib_rule_tests.sh IPv6 FIB rule tests [...] TEST: rule6 check: flowlabel redirect to table [ OK ] TEST: rule6 check: flowlabel no redirect to table [ OK ] TEST: rule6 del by pref: flowlabel redirect to table [ OK ] TEST: rule6 check: iif flowlabel redirect to table [ OK ] TEST: rule6 check: iif flowlabel no redirect to table [ OK ] TEST: rule6 del by pref: iif flowlabel redirect to table [ OK ] TEST: rule6 check: flowlabel masked redirect to table [ OK ] TEST: rule6 check: flowlabel masked no redirect to table [ OK ] TEST: rule6 del by pref: flowlabel masked redirect to table [ OK ] TEST: rule6 check: iif flowlabel masked redirect to table [ OK ] TEST: rule6 check: iif flowlabel masked no redirect to table [ OK ] TEST: rule6 del by pref: iif flowlabel masked redirect to table [ OK ] [...] Tests passed: 268 Tests failed: 0 Reviewed-by: Petr Machata Signed-off-by: Ido Schimmel Reviewed-by: Guillaume Nault Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/fib_rule_tests.sh | 31 +++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh index 1d58b3b87465..847936363a12 100755 --- a/tools/testing/selftests/net/fib_rule_tests.sh +++ b/tools/testing/selftests/net/fib_rule_tests.sh @@ -291,6 +291,37 @@ fib_rule6_test() "$getnomatch" "iif dscp redirect to table" \ "iif dscp no redirect to table" fi + + fib_check_iproute_support "flowlabel" "flowlabel" + if [ $? -eq 0 ]; then + match="flowlabel 0xfffff" + getmatch="flowlabel 0xfffff" + getnomatch="flowlabel 0xf" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "flowlabel redirect to table" \ + "flowlabel no redirect to table" + + match="flowlabel 0xfffff" + getmatch="from $SRC_IP6 iif $DEV flowlabel 0xfffff" + getnomatch="from $SRC_IP6 iif $DEV flowlabel 0xf" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "iif flowlabel redirect to table" \ + "iif flowlabel no redirect to table" + + match="flowlabel 0x08000/0x08000" + getmatch="flowlabel 0xfffff" + getnomatch="flowlabel 0xf7fff" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "flowlabel masked redirect to table" \ + "flowlabel masked no redirect to table" + + match="flowlabel 0x08000/0x08000" + getmatch="from $SRC_IP6 iif $DEV flowlabel 0xfffff" + getnomatch="from $SRC_IP6 iif $DEV flowlabel 0xf7fff" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "iif flowlabel masked redirect to table" \ + "iif flowlabel masked no redirect to table" + fi } fib_rule6_vrf_test() -- cgit v1.2.3 From 6724bc65e59b57e64f65269da8956f8bdc12bb03 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Wed, 18 Dec 2024 09:00:18 -0500 Subject: selftests: net: remove redundant ncdevmem print Remove extrenous fprintf Signed-off-by: Jamal Hadi Salim Reviewed-by: Mina Almasry Link: https://patch.msgid.link/20241218140018.15607-1-jhs@mojatatu.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/hw/ncdevmem.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/hw/ncdevmem.c b/tools/testing/selftests/drivers/net/hw/ncdevmem.c index 8e502a1f8f9b..19a6969643f4 100644 --- a/tools/testing/selftests/drivers/net/hw/ncdevmem.c +++ b/tools/testing/selftests/drivers/net/hw/ncdevmem.c @@ -619,9 +619,6 @@ int do_server(struct memory_buffer *mem) fprintf(stderr, "page_aligned_frags=%lu, non_page_aligned_frags=%lu\n", page_aligned_frags, non_page_aligned_frags); - fprintf(stderr, "page_aligned_frags=%lu, non_page_aligned_frags=%lu\n", - page_aligned_frags, non_page_aligned_frags); - cleanup: free(tmp_mem); -- cgit v1.2.3 From 55853cb829dc707427c3519f6b8686682a204368 Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Wed, 18 Dec 2024 10:59:31 +0800 Subject: selftests/alsa: Fix circular dependency involving global-timer The pattern rule `$(OUTPUT)/%: %.c` inadvertently included a circular dependency on the global-timer target due to its inclusion in $(TEST_GEN_PROGS_EXTENDED). This resulted in a circular dependency warning during the build process. To resolve this, the dependency on $(TEST_GEN_PROGS_EXTENDED) has been replaced with an explicit dependency on $(OUTPUT)/libatest.so. This change ensures that libatest.so is built before any other targets that require it, without creating a circular dependency. This fix addresses the following warning: make[4]: Entering directory 'tools/testing/selftests/alsa' make[4]: Circular default_modconfig/kselftest/alsa/global-timer <- default_modconfig/kselftest/alsa/global-timer dependency dropped. make[4]: Nothing to be done for 'all'. make[4]: Leaving directory 'tools/testing/selftests/alsa' Cc: Mark Brown Cc: Jaroslav Kysela Cc: Takashi Iwai Cc: Shuah Khan Signed-off-by: Li Zhijian Link: https://patch.msgid.link/20241218025931.914164-1-lizhijian@fujitsu.com Signed-off-by: Takashi Iwai --- tools/testing/selftests/alsa/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/alsa/Makefile b/tools/testing/selftests/alsa/Makefile index 944279160fed..8dab90ad22bb 100644 --- a/tools/testing/selftests/alsa/Makefile +++ b/tools/testing/selftests/alsa/Makefile @@ -27,5 +27,5 @@ include ../lib.mk $(OUTPUT)/libatest.so: conf.c alsa-local.h $(CC) $(CFLAGS) -shared -fPIC $< $(LDLIBS) -o $@ -$(OUTPUT)/%: %.c $(TEST_GEN_PROGS_EXTENDED) alsa-local.h +$(OUTPUT)/%: %.c $(OUTPUT)/libatest.so alsa-local.h $(CC) $(CFLAGS) $< $(LDLIBS) -latest -o $@ -- cgit v1.2.3 From dec2f97a1571ed28ddbadf4431afc5e5872a10df Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 18 Dec 2024 13:09:50 -0600 Subject: cpupower: Remove spurious return statement print_duration() has a return; statement at the end of the function that is not necessary as it's a void function. Link: https://lore.kernel.org/r/20241218191144.3440854-2-superm1@kernel.org Signed-off-by: Mario Limonciello Signed-off-by: Shuah Khan --- tools/power/cpupower/utils/cpufreq-info.c | 1 - 1 file changed, 1 deletion(-) (limited to 'tools') diff --git a/tools/power/cpupower/utils/cpufreq-info.c b/tools/power/cpupower/utils/cpufreq-info.c index c96b77365c63..5f092f3c729e 100644 --- a/tools/power/cpupower/utils/cpufreq-info.c +++ b/tools/power/cpupower/utils/cpufreq-info.c @@ -120,7 +120,6 @@ static void print_duration(unsigned long duration) } else printf("%lu ns", duration); } - return; } static int get_boost_mode_x86(unsigned int cpu) -- cgit v1.2.3 From 3f2eb7606eee37aea630c4b7aa42497bc36ca157 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 18 Dec 2024 13:09:51 -0600 Subject: cpupower: Add support for parsing 'enabled' or 'disabled' strings from table When cpufreq_get_sysfs_value_from_table() is passed a table with kernel strings that report 'enabled' or 'disabled' it always returns 0 because these can't cleanly convert to integers. Explicitly look for enabled or disabled strings from the kernel to handle this. Link: https://lore.kernel.org/r/20241218191144.3440854-3-superm1@kernel.org Signed-off-by: Mario Limonciello Signed-off-by: Shuah Khan --- tools/power/cpupower/lib/cpufreq.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tools') diff --git a/tools/power/cpupower/lib/cpufreq.c b/tools/power/cpupower/lib/cpufreq.c index 1516d23c17c9..f27ee6d4b000 100644 --- a/tools/power/cpupower/lib/cpufreq.c +++ b/tools/power/cpupower/lib/cpufreq.c @@ -102,6 +102,10 @@ unsigned long cpufreq_get_sysfs_value_from_table(unsigned int cpu, if (len == 0) return 0; + if (!strcmp(linebuf, "enabled\n")) + return 1; + if (!strcmp(linebuf, "disabled\n")) + return 0; value = strtoul(linebuf, &endp, 0); if (endp == linebuf || errno == ERANGE) -- cgit v1.2.3 From 6d4a2987f96b9f281b07286eeb1d4022054e1ecd Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 18 Dec 2024 13:09:52 -0600 Subject: cpupower: Add support for amd-pstate preferred core rankings The rankings are useful information to determine if the scheduler is placing tasks appropriately for the hardware. Link: https://lore.kernel.org/r/20241218191144.3440854-4-superm1@kernel.org Signed-off-by: Mario Limonciello Signed-off-by: Shuah Khan --- tools/power/cpupower/utils/helpers/amd.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'tools') diff --git a/tools/power/cpupower/utils/helpers/amd.c b/tools/power/cpupower/utils/helpers/amd.c index 0a56e22240fc..090fdd6d1551 100644 --- a/tools/power/cpupower/utils/helpers/amd.c +++ b/tools/power/cpupower/utils/helpers/amd.c @@ -177,6 +177,8 @@ enum amd_pstate_value { AMD_PSTATE_HIGHEST_PERF, AMD_PSTATE_MAX_FREQ, AMD_PSTATE_LOWEST_NONLINEAR_FREQ, + AMD_PSTATE_HW_PREFCORE, + AMD_PSTATE_PREFCORE_RANKING, MAX_AMD_PSTATE_VALUE_READ_FILES, }; @@ -184,6 +186,8 @@ static const char *amd_pstate_value_files[MAX_AMD_PSTATE_VALUE_READ_FILES] = { [AMD_PSTATE_HIGHEST_PERF] = "amd_pstate_highest_perf", [AMD_PSTATE_MAX_FREQ] = "amd_pstate_max_freq", [AMD_PSTATE_LOWEST_NONLINEAR_FREQ] = "amd_pstate_lowest_nonlinear_freq", + [AMD_PSTATE_HW_PREFCORE] = "amd_pstate_hw_prefcore", + [AMD_PSTATE_PREFCORE_RANKING] = "amd_pstate_prefcore_ranking", }; static unsigned long amd_pstate_get_data(unsigned int cpu, @@ -240,6 +244,10 @@ void amd_pstate_show_perf_and_freq(unsigned int cpu, int no_rounding) acpi_cppc_get_data(cpu, LOWEST_PERF)); print_speed(acpi_cppc_get_data(cpu, LOWEST_FREQ) * 1000, no_rounding); printf(".\n"); + + printf(_(" Preferred Core Support: %lu. Preferred Core Ranking: %lu.\n"), + amd_pstate_get_data(cpu, AMD_PSTATE_HW_PREFCORE), + amd_pstate_get_data(cpu, AMD_PSTATE_PREFCORE_RANKING)); } /* AMD P-State Helper Functions ************************************/ -- cgit v1.2.3 From 26e16174f54d40a3774614c4d43966572ed79dc1 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 18 Dec 2024 13:09:53 -0600 Subject: cpupower: Don't try to read frequency from hardware when kernel uses aperfmperf When the amd-pstate is in use frequency is set by the hardware and measured by the kernel through using the aperf and mperf registers. There is no direct call to the hardware to indicate current frequency. Detect that this feature is in use and skip the check. Link: https://lore.kernel.org/r/20241218191144.3440854-5-superm1@kernel.org Signed-off-by: Mario Limonciello Signed-off-by: Shuah Khan --- tools/power/cpupower/utils/cpufreq-info.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/power/cpupower/utils/cpufreq-info.c b/tools/power/cpupower/utils/cpufreq-info.c index 5f092f3c729e..3df28e45be42 100644 --- a/tools/power/cpupower/utils/cpufreq-info.c +++ b/tools/power/cpupower/utils/cpufreq-info.c @@ -254,7 +254,12 @@ static int get_freq_kernel(unsigned int cpu, unsigned int human) static int get_freq_hardware(unsigned int cpu, unsigned int human) { - unsigned long freq = cpufreq_get_freq_hardware(cpu); + unsigned long freq; + + if (cpupower_cpu_info.caps & CPUPOWER_CAP_APERF) + return -EINVAL; + + freq = cpufreq_get_freq_hardware(cpu); printf(_(" current CPU frequency: ")); if (!freq) { printf("Unable to call hardware\n"); -- cgit v1.2.3 From 5f567afc283fc9e7c6a34d013c4fc6c5e8d6afae Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 18 Dec 2024 13:09:54 -0600 Subject: cpupower: Add support for showing energy performance preference The EPP value is useful for characterization of performance. Show it in cpupower frequency-info output. Link: https://lore.kernel.org/r/20241218191144.3440854-6-superm1@kernel.org Signed-off-by: Mario Limonciello Signed-off-by: Shuah Khan --- tools/power/cpupower/lib/cpufreq.c | 14 ++++++++++++++ tools/power/cpupower/lib/cpufreq.h | 8 ++++++++ tools/power/cpupower/utils/cpufreq-info.c | 25 ++++++++++++++++++++++++- 3 files changed, 46 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/power/cpupower/lib/cpufreq.c b/tools/power/cpupower/lib/cpufreq.c index f27ee6d4b000..8dda3db2dff0 100644 --- a/tools/power/cpupower/lib/cpufreq.c +++ b/tools/power/cpupower/lib/cpufreq.c @@ -127,12 +127,14 @@ static unsigned long sysfs_cpufreq_get_one_value(unsigned int cpu, enum cpufreq_string { SCALING_DRIVER, SCALING_GOVERNOR, + ENERGY_PERFORMANCE_PREFERENCE, MAX_CPUFREQ_STRING_FILES }; static const char *cpufreq_string_files[MAX_CPUFREQ_STRING_FILES] = { [SCALING_DRIVER] = "scaling_driver", [SCALING_GOVERNOR] = "scaling_governor", + [ENERGY_PERFORMANCE_PREFERENCE] = "energy_performance_preference", }; @@ -207,6 +209,18 @@ unsigned long cpufreq_get_transition_latency(unsigned int cpu) return sysfs_cpufreq_get_one_value(cpu, CPUINFO_LATENCY); } +char *cpufreq_get_energy_performance_preference(unsigned int cpu) +{ + return sysfs_cpufreq_get_one_string(cpu, ENERGY_PERFORMANCE_PREFERENCE); +} + +void cpufreq_put_energy_performance_preference(char *ptr) +{ + if (!ptr) + return; + free(ptr); +} + int cpufreq_get_hardware_limits(unsigned int cpu, unsigned long *min, unsigned long *max) diff --git a/tools/power/cpupower/lib/cpufreq.h b/tools/power/cpupower/lib/cpufreq.h index 2f3c84035806..bfc617311ebd 100644 --- a/tools/power/cpupower/lib/cpufreq.h +++ b/tools/power/cpupower/lib/cpufreq.h @@ -68,6 +68,14 @@ unsigned long cpufreq_get_freq_hardware(unsigned int cpu); unsigned long cpufreq_get_transition_latency(unsigned int cpu); +/* determine energy performance preference + * + * returns NULL on failure, else the string that represents the energy performance + * preference requested. + */ +char *cpufreq_get_energy_performance_preference(unsigned int cpu); +void cpufreq_put_energy_performance_preference(char *ptr); + /* determine hardware CPU frequency limits * * These may be limited further by thermal, energy or other diff --git a/tools/power/cpupower/utils/cpufreq-info.c b/tools/power/cpupower/utils/cpufreq-info.c index 3df28e45be42..eb9cc0f10634 100644 --- a/tools/power/cpupower/utils/cpufreq-info.c +++ b/tools/power/cpupower/utils/cpufreq-info.c @@ -422,6 +422,23 @@ static int get_freq_stats(unsigned int cpu, unsigned int human) return 0; } +/* --epp / -z */ + +static int get_epp(unsigned int cpu, bool interactive) +{ + char *epp; + + epp = cpufreq_get_energy_performance_preference(cpu); + if (!epp) + return -EINVAL; + if (interactive) + printf(_(" energy performance preference: %s\n"), epp); + + cpufreq_put_energy_performance_preference(epp); + + return 0; +} + /* --latency / -y */ static int get_latency(unsigned int cpu, unsigned int human) @@ -461,6 +478,7 @@ static void debug_output_one(unsigned int cpu) get_related_cpus(cpu); get_affected_cpus(cpu); get_latency(cpu, 1); + get_epp(cpu, true); get_hardware_limits(cpu, 1); freqs = cpufreq_get_available_frequencies(cpu); @@ -501,6 +519,7 @@ static struct option info_opts[] = { {"human", no_argument, NULL, 'm'}, {"no-rounding", no_argument, NULL, 'n'}, {"performance", no_argument, NULL, 'c'}, + {"epp", no_argument, NULL, 'z'}, { }, }; @@ -514,7 +533,7 @@ int cmd_freq_info(int argc, char **argv) int output_param = 0; do { - ret = getopt_long(argc, argv, "oefwldpgrasmybnc", info_opts, + ret = getopt_long(argc, argv, "oefwldpgrasmybncz", info_opts, NULL); switch (ret) { case '?': @@ -538,6 +557,7 @@ int cmd_freq_info(int argc, char **argv) case 's': case 'y': case 'c': + case 'z': if (output_param) { output_param = -1; cont = 0; @@ -647,6 +667,9 @@ int cmd_freq_info(int argc, char **argv) case 'c': ret = get_perf_cap(cpu); break; + case 'z': + ret = get_epp(cpu, true); + break; } if (ret) return ret; -- cgit v1.2.3 From acf71265e4c0289e23ee1b66fc0977478edea9a5 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 18 Dec 2024 13:09:55 -0600 Subject: cpupower: Don't fetch maximum latency when EPP is enabled When EPP has been enabled the hardware will autonomously change frequencies on it's own and thus there is no latency with changing from the kernel. Avoid doing the maximum latency check when EPP is found. This will apply to both amd-pstate and intel-pstate drivers. Link: https://lore.kernel.org/r/20241218191144.3440854-7-superm1@kernel.org Signed-off-by: Mario Limonciello Signed-off-by: Shuah Khan --- tools/power/cpupower/utils/cpufreq-info.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/power/cpupower/utils/cpufreq-info.c b/tools/power/cpupower/utils/cpufreq-info.c index eb9cc0f10634..fc750e127404 100644 --- a/tools/power/cpupower/utils/cpufreq-info.c +++ b/tools/power/cpupower/utils/cpufreq-info.c @@ -445,6 +445,9 @@ static int get_latency(unsigned int cpu, unsigned int human) { unsigned long latency = cpufreq_get_transition_latency(cpu); + if (!get_epp(cpu, false)) + return -EINVAL; + printf(_(" maximum transition latency: ")); if (!latency || latency == UINT_MAX) { printf(_(" Cannot determine or is not supported.\n")); -- cgit v1.2.3 From 8395d43949790f1671621975730a07c264ef3e6f Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 18 Dec 2024 13:09:56 -0600 Subject: cpupower: Adjust whitespace for amd-pstate specific prints The amd-pstate section is grouped under boost, which isn't appropriate. Adjust the indentation so that it is it's own section. Link: https://lore.kernel.org/r/20241218191144.3440854-8-superm1@kernel.org Signed-off-by: Mario Limonciello Signed-off-by: Shuah Khan --- tools/power/cpupower/utils/helpers/amd.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/power/cpupower/utils/helpers/amd.c b/tools/power/cpupower/utils/helpers/amd.c index 090fdd6d1551..795562e879de 100644 --- a/tools/power/cpupower/utils/helpers/amd.c +++ b/tools/power/cpupower/utils/helpers/amd.c @@ -219,7 +219,9 @@ void amd_pstate_boost_init(unsigned int cpu, int *support, int *active) void amd_pstate_show_perf_and_freq(unsigned int cpu, int no_rounding) { - printf(_(" AMD PSTATE Highest Performance: %lu. Maximum Frequency: "), + + printf(_(" amd-pstate limits:\n")); + printf(_(" Highest Performance: %lu. Maximum Frequency: "), amd_pstate_get_data(cpu, AMD_PSTATE_HIGHEST_PERF)); /* * If boost isn't active, the cpuinfo_max doesn't indicate real max @@ -228,19 +230,19 @@ void amd_pstate_show_perf_and_freq(unsigned int cpu, int no_rounding) print_speed(amd_pstate_get_data(cpu, AMD_PSTATE_MAX_FREQ), no_rounding); printf(".\n"); - printf(_(" AMD PSTATE Nominal Performance: %lu. Nominal Frequency: "), + printf(_(" Nominal Performance: %lu. Nominal Frequency: "), acpi_cppc_get_data(cpu, NOMINAL_PERF)); print_speed(acpi_cppc_get_data(cpu, NOMINAL_FREQ) * 1000, no_rounding); printf(".\n"); - printf(_(" AMD PSTATE Lowest Non-linear Performance: %lu. Lowest Non-linear Frequency: "), + printf(_(" Lowest Non-linear Performance: %lu. Lowest Non-linear Frequency: "), acpi_cppc_get_data(cpu, LOWEST_NONLINEAR_PERF)); print_speed(amd_pstate_get_data(cpu, AMD_PSTATE_LOWEST_NONLINEAR_FREQ), no_rounding); printf(".\n"); - printf(_(" AMD PSTATE Lowest Performance: %lu. Lowest Frequency: "), + printf(_(" Lowest Performance: %lu. Lowest Frequency: "), acpi_cppc_get_data(cpu, LOWEST_PERF)); print_speed(acpi_cppc_get_data(cpu, LOWEST_FREQ) * 1000, no_rounding); printf(".\n"); -- cgit v1.2.3 From 6de02569a2bb678db04236fdf29814c0c27f5121 Mon Sep 17 00:00:00 2001 From: "John B. Wyatt IV" Date: Wed, 18 Dec 2024 20:26:02 -0500 Subject: pm: cpupower: Add install and uninstall options to bindings makefile Installs the .so and .py files generated by SWIG to system's site packages directory. This allows the Python bindings to be used system wide. This commit also includes documentation on setting up and installing the Python bindings. Link: https://lore.kernel.org/r/20241219012606.38963-1-jwyatt@redhat.com Signed-off-by: "John B. Wyatt IV" Signed-off-by: "John B. Wyatt IV" Signed-off-by: Shuah Khan --- tools/power/cpupower/bindings/python/Makefile | 10 ++++++++++ tools/power/cpupower/bindings/python/README | 25 +++++++++++++++++++++++++ 2 files changed, 35 insertions(+) (limited to 'tools') diff --git a/tools/power/cpupower/bindings/python/Makefile b/tools/power/cpupower/bindings/python/Makefile index e1ebb1d60cd4..741f21477432 100644 --- a/tools/power/cpupower/bindings/python/Makefile +++ b/tools/power/cpupower/bindings/python/Makefile @@ -11,6 +11,7 @@ HAVE_PYCONFIG := $(shell if which python-config >/dev/null 2>&1; then echo 1; el LIB_DIR := ../../lib PY_INCLUDE = $(firstword $(shell python-config --includes)) OBJECTS_LIB = $(wildcard $(LIB_DIR)/*.o) +INSTALL_DIR = $(shell python3 -c "import site; print(site.getsitepackages()[0])") all: _raw_pylibcpupower.so @@ -28,6 +29,15 @@ else ifeq ($(HAVE_PYCONFIG),0) endif swig -python raw_pylibcpupower.swg +# Only installs the Python bindings +install: _raw_pylibcpupower.so + install -D _raw_pylibcpupower.so $(INSTALL_DIR)/_raw_pylibcpupower.so + install -D raw_pylibcpupower.py $(INSTALL_DIR)/raw_pylibcpupower.py + +uninstall: + rm -f $(INSTALL_DIR)/_raw_pylibcpupower.so + rm -f $(INSTALL_DIR)/raw_pylibcpupower.py + # Will only clean the bindings folder; will not clean the actual cpupower folder clean: rm -f raw_pylibcpupower.py raw_pylibcpupower_wrap.c raw_pylibcpupower_wrap.o _raw_pylibcpupower.so diff --git a/tools/power/cpupower/bindings/python/README b/tools/power/cpupower/bindings/python/README index 0a4bb2581e8a..952e2e02fd32 100644 --- a/tools/power/cpupower/bindings/python/README +++ b/tools/power/cpupower/bindings/python/README @@ -48,6 +48,31 @@ To run the test script: $ python test_raw_pylibcpupower.py +developing/using the bindings directly +-------------------------------------- + +You need to add the Python bindings directory to your $PYTHONPATH. + +You would set the path in the Bash terminal or in the Bash profile: + +PYTHONPATH=~/linux/tools/power/cpupower/bindings/python:$PYTHONPATH + +This allows you to set a specific repo of the bindings to use. + + +installing/uninstalling +----------------------- + +Python uses a system specific site-packages folder to look up modules to import +by default. You do not need to install cpupower to use the SWIG bindings. + +You can install and uninstall the bindings to the site-packages with: + +sudo make install + +sudo make uninstall + + credits ------- -- cgit v1.2.3 From c5d2bac978c513e1f22273cba9c55db3778032e5 Mon Sep 17 00:00:00 2001 From: Ariel Otilibili Date: Wed, 11 Dec 2024 22:57:29 +0100 Subject: selftests/bpf: Clear out Python syntax warnings Invalid escape sequences are used, and produced syntax warnings: $ test_bpftool_synctypes.py test_bpftool_synctypes.py:69: SyntaxWarning: invalid escape sequence '\[' self.start_marker = re.compile(f'(static )?const bool {self.array_name}\[.*\] = {{\n') test_bpftool_synctypes.py:83: SyntaxWarning: invalid escape sequence '\[' pattern = re.compile('\[(BPF_\w*)\]\s*= (true|false),?$') test_bpftool_synctypes.py:181: SyntaxWarning: invalid escape sequence '\s' pattern = re.compile('^\s*(BPF_\w+),?(\s+/\*.*\*/)?$') test_bpftool_synctypes.py:229: SyntaxWarning: invalid escape sequence '\*' start_marker = re.compile(f'\*{block_name}\* := {{') test_bpftool_synctypes.py:229: SyntaxWarning: invalid escape sequence '\*' start_marker = re.compile(f'\*{block_name}\* := {{') test_bpftool_synctypes.py:230: SyntaxWarning: invalid escape sequence '\*' pattern = re.compile('\*\*([\w/-]+)\*\*') test_bpftool_synctypes.py:248: SyntaxWarning: invalid escape sequence '\s' start_marker = re.compile(f'"\s*{block_name} := {{') test_bpftool_synctypes.py:249: SyntaxWarning: invalid escape sequence '\w' pattern = re.compile('([\w/]+) [|}]') test_bpftool_synctypes.py:267: SyntaxWarning: invalid escape sequence '\s' start_marker = re.compile(f'"\s*{macro}\s*" [|}}]') test_bpftool_synctypes.py:267: SyntaxWarning: invalid escape sequence '\s' start_marker = re.compile(f'"\s*{macro}\s*" [|}}]') test_bpftool_synctypes.py:268: SyntaxWarning: invalid escape sequence '\w' pattern = re.compile('([\w-]+) ?(?:\||}[ }\]])') test_bpftool_synctypes.py:287: SyntaxWarning: invalid escape sequence '\w' pattern = re.compile('(?:.*=\')?([\w/]+)') test_bpftool_synctypes.py:319: SyntaxWarning: invalid escape sequence '\w' pattern = re.compile('([\w-]+) ?(?:\||}[ }\]"])') test_bpftool_synctypes.py:341: SyntaxWarning: invalid escape sequence '\|' start_marker = re.compile('\|COMMON_OPTIONS\| replace:: {') test_bpftool_synctypes.py:342: SyntaxWarning: invalid escape sequence '\*' pattern = re.compile('\*\*([\w/-]+)\*\*') Escaping them clears out the warnings. $ tools/testing/selftests/bpf/test_bpftool_synctypes.py; echo $? 0 Signed-off-by: Ariel Otilibili Signed-off-by: Daniel Borkmann Tested-by: Quentin Monnet Reviewed-by: Quentin Monnet Link: https://docs.python.org/3/library/re.html Link: https://lore.kernel.org/bpf/20241211220012.714055-2-ariel.otilibili-anieli@eurecom.fr --- .../selftests/bpf/test_bpftool_synctypes.py | 28 +++++++++++----------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_bpftool_synctypes.py b/tools/testing/selftests/bpf/test_bpftool_synctypes.py index 0ed67b6b31dd..238121fda5b6 100755 --- a/tools/testing/selftests/bpf/test_bpftool_synctypes.py +++ b/tools/testing/selftests/bpf/test_bpftool_synctypes.py @@ -66,7 +66,7 @@ class ArrayParser(BlockParser): def __init__(self, reader, array_name): self.array_name = array_name - self.start_marker = re.compile(f'(static )?const bool {self.array_name}\[.*\] = {{\n') + self.start_marker = re.compile(fr'(static )?const bool {self.array_name}\[.*\] = {{\n') super().__init__(reader) def search_block(self): @@ -80,7 +80,7 @@ class ArrayParser(BlockParser): Parse a block and return data as a dictionary. Items to extract must be on separate lines in the file. """ - pattern = re.compile('\[(BPF_\w*)\]\s*= (true|false),?$') + pattern = re.compile(r'\[(BPF_\w*)\]\s*= (true|false),?$') entries = set() while True: line = self.reader.readline() @@ -178,7 +178,7 @@ class FileExtractor(object): @enum_name: name of the enum to parse """ start_marker = re.compile(f'enum {enum_name} {{\n') - pattern = re.compile('^\s*(BPF_\w+),?(\s+/\*.*\*/)?$') + pattern = re.compile(r'^\s*(BPF_\w+),?(\s+/\*.*\*/)?$') end_marker = re.compile('^};') parser = BlockParser(self.reader) parser.search_block(start_marker) @@ -226,8 +226,8 @@ class FileExtractor(object): @block_name: name of the blog to parse, 'TYPE' in the example """ - start_marker = re.compile(f'\*{block_name}\* := {{') - pattern = re.compile('\*\*([\w/-]+)\*\*') + start_marker = re.compile(fr'\*{block_name}\* := {{') + pattern = re.compile(r'\*\*([\w/-]+)\*\*') end_marker = re.compile('}\n') return self.__get_description_list(start_marker, pattern, end_marker) @@ -245,8 +245,8 @@ class FileExtractor(object): @block_name: name of the blog to parse, 'TYPE' in the example """ - start_marker = re.compile(f'"\s*{block_name} := {{') - pattern = re.compile('([\w/]+) [|}]') + start_marker = re.compile(fr'"\s*{block_name} := {{') + pattern = re.compile(r'([\w/]+) [|}]') end_marker = re.compile('}') return self.__get_description_list(start_marker, pattern, end_marker) @@ -264,8 +264,8 @@ class FileExtractor(object): @macro: macro starting the block, 'HELP_SPEC_OPTIONS' in the example """ - start_marker = re.compile(f'"\s*{macro}\s*" [|}}]') - pattern = re.compile('([\w-]+) ?(?:\||}[ }\]])') + start_marker = re.compile(fr'"\s*{macro}\s*" [|}}]') + pattern = re.compile(r'([\w-]+) ?(?:\||}[ }\]])') end_marker = re.compile('}\\\\n') return self.__get_description_list(start_marker, pattern, end_marker) @@ -283,8 +283,8 @@ class FileExtractor(object): @block_name: name of the blog to parse, 'TYPE' in the example """ - start_marker = re.compile(f'local {block_name}=\'') - pattern = re.compile('(?:.*=\')?([\w/]+)') + start_marker = re.compile(fr'local {block_name}=\'') + pattern = re.compile(r'(?:.*=\')?([\w/]+)') end_marker = re.compile('\'$') return self.__get_description_list(start_marker, pattern, end_marker) @@ -316,7 +316,7 @@ class MainHeaderFileExtractor(SourceFileExtractor): {'-p', '-d', '--pretty', '--debug', '--json', '-j'} """ start_marker = re.compile(f'"OPTIONS :=') - pattern = re.compile('([\w-]+) ?(?:\||}[ }\]"])') + pattern = re.compile(r'([\w-]+) ?(?:\||}[ }\]"])') end_marker = re.compile('#define') parser = InlineListParser(self.reader) @@ -338,8 +338,8 @@ class ManSubstitutionsExtractor(SourceFileExtractor): {'-p', '-d', '--pretty', '--debug', '--json', '-j'} """ - start_marker = re.compile('\|COMMON_OPTIONS\| replace:: {') - pattern = re.compile('\*\*([\w/-]+)\*\*') + start_marker = re.compile(r'\|COMMON_OPTIONS\| replace:: {') + pattern = re.compile(r'\*\*([\w/-]+)\*\*') end_marker = re.compile('}$') parser = InlineListParser(self.reader) -- cgit v1.2.3 From 724c6ce38bbaeb4b3f109b0e066d6c0ecd15446c Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Thu, 19 Dec 2024 14:57:34 +0100 Subject: stddef: make __struct_group() UAPI C++-friendly For the most part of the C++ history, it couldn't have type declarations inside anonymous unions for different reasons. At the same time, __struct_group() relies on the latters, so when the @TAG argument is not empty, C++ code doesn't want to build (even under `extern "C"`): ../linux/include/uapi/linux/pkt_cls.h:25:24: error: 'struct tc_u32_sel::::tc_u32_sel_hdr,' invalid; an anonymous union may only have public non-static data members [-fpermissive] The safest way to fix this without trying to switch standards (which is impossible in UAPI anyway) etc., is to disable tag declaration for that language. This won't break anything since for now it's not buildable at all. Use a separate definition for __struct_group() when __cplusplus is defined to mitigate the error, including the version from tools/. Fixes: 50d7bd38c3aa ("stddef: Introduce struct_group() helper macro") Reported-by: Christopher Ferris Closes: https://lore.kernel.org/linux-hardening/Z1HZpe3WE5As8UAz@google.com Suggested-by: Kees Cook # __struct_group_tag() Signed-off-by: Alexander Lobakin Reviewed-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/20241219135734.2130002-1-aleksander.lobakin@intel.com Signed-off-by: Kees Cook --- tools/include/uapi/linux/stddef.h | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/include/uapi/linux/stddef.h b/tools/include/uapi/linux/stddef.h index bb6ea517efb5..c53cde425406 100644 --- a/tools/include/uapi/linux/stddef.h +++ b/tools/include/uapi/linux/stddef.h @@ -8,6 +8,13 @@ #define __always_inline __inline__ #endif +/* Not all C++ standards support type declarations inside an anonymous union */ +#ifndef __cplusplus +#define __struct_group_tag(TAG) TAG +#else +#define __struct_group_tag(TAG) +#endif + /** * __struct_group() - Create a mirrored named and anonyomous struct * @@ -20,14 +27,14 @@ * and size: one anonymous and one named. The former's members can be used * normally without sub-struct naming, and the latter can be used to * reason about the start, end, and size of the group of struct members. - * The named struct can also be explicitly tagged for layer reuse, as well - * as both having struct attributes appended. + * The named struct can also be explicitly tagged for layer reuse (C only), + * as well as both having struct attributes appended. */ #define __struct_group(TAG, NAME, ATTRS, MEMBERS...) \ union { \ struct { MEMBERS } ATTRS; \ - struct TAG { MEMBERS } ATTRS NAME; \ - } + struct __struct_group_tag(TAG) { MEMBERS } ATTRS NAME; \ + } ATTRS /** * __DECLARE_FLEX_ARRAY() - Declare a flexible array usable in a union -- cgit v1.2.3 From 246068b86b1c36e4590388ab8f278e21f1997dc1 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 19 Dec 2024 17:54:10 +0200 Subject: selftests: net: local_termination: require mausezahn Since the blamed commit, we require mausezahn because send_raw() uses it. Remove the "REQUIRE_MZ=no" line, which overwrites the default of requiring it. Fixes: 237979504264 ("selftests: net: local_termination: add PTP frames to the mix") Signed-off-by: Vladimir Oltean Link: https://patch.msgid.link/20241219155410.1856868-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/forwarding/local_termination.sh | 1 - 1 file changed, 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/local_termination.sh b/tools/testing/selftests/net/forwarding/local_termination.sh index c35548767756..ecd34f364125 100755 --- a/tools/testing/selftests/net/forwarding/local_termination.sh +++ b/tools/testing/selftests/net/forwarding/local_termination.sh @@ -7,7 +7,6 @@ ALL_TESTS="standalone vlan_unaware_bridge vlan_aware_bridge test_vlan \ NUM_NETIFS=2 PING_COUNT=1 REQUIRE_MTOOLS=yes -REQUIRE_MZ=no source lib.sh -- cgit v1.2.3 From 30b981796b94b083da8fdded7cb74cb493608760 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 18 Dec 2024 19:28:33 -0800 Subject: selftests: drv-net: test empty queue and NAPI responses in netlink Make sure kernel doesn't respond to GETs for queues and NAPIs when link is down. Not with valid data, or with empty message, we want a ENOENT. Link: https://patch.msgid.link/20241219032833.1165433-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/queues.py | 28 +++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/queues.py b/tools/testing/selftests/drivers/net/queues.py index 9c5473abbd78..38303da957ee 100755 --- a/tools/testing/selftests/drivers/net/queues.py +++ b/tools/testing/selftests/drivers/net/queues.py @@ -1,10 +1,12 @@ #!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0 -from lib.py import ksft_run, ksft_exit, ksft_eq, KsftSkipEx -from lib.py import EthtoolFamily, NetdevFamily +from lib.py import ksft_disruptive, ksft_exit, ksft_run +from lib.py import ksft_eq, ksft_raises, KsftSkipEx +from lib.py import EthtoolFamily, NetdevFamily, NlError from lib.py import NetDrvEnv -from lib.py import cmd +from lib.py import cmd, defer, ip +import errno import glob @@ -59,9 +61,27 @@ def addremove_queues(cfg, nl) -> None: ksft_eq(queues, expected) +@ksft_disruptive +def check_down(cfg, nl) -> None: + # Check the NAPI IDs before interface goes down and hides them + napis = nl.napi_get({'ifindex': cfg.ifindex}, dump=True) + + ip(f"link set dev {cfg.dev['ifname']} down") + defer(ip, f"link set dev {cfg.dev['ifname']} up") + + with ksft_raises(NlError) as cm: + nl.queue_get({'ifindex': cfg.ifindex, 'id': 0, 'type': 'rx'}) + ksft_eq(cm.exception.nl_msg.error, -errno.ENOENT) + + if napis: + with ksft_raises(NlError) as cm: + nl.napi_get({'id': napis[0]['id']}) + ksft_eq(cm.exception.nl_msg.error, -errno.ENOENT) + + def main() -> None: with NetDrvEnv(__file__, queue_count=100) as cfg: - ksft_run([get_queues, addremove_queues], args=(cfg, NetdevFamily())) + ksft_run([get_queues, addremove_queues, check_down], args=(cfg, NetdevFamily())) ksft_exit() -- cgit v1.2.3 From 976d248bd33356eecb958cdc1b0c37622fd5d595 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 18 Dec 2024 18:15:58 +0100 Subject: selftests: net: lib: Add a couple autodefer helpers Alongside the helper ip_link_set_up(), one to set the link down will be useful as well. Add a helper to determine the link state as well, ip_link_is_up(), and use it to short-circuit any changes if the state is already the desired one. Furthermore, add a helper bridge_vlan_add(). Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Acked-by: Nikolay Aleksandrov Link: https://patch.msgid.link/856d9e01725fdba21b7f6716358f645b19131af2.1734540770.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/lib.sh | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh index 2cd5c743b2d9..0bd9a038a1f0 100644 --- a/tools/testing/selftests/net/lib.sh +++ b/tools/testing/selftests/net/lib.sh @@ -477,12 +477,33 @@ ip_link_set_addr() defer ip link set dev "$name" address "$old_addr" } +ip_link_is_up() +{ + local name=$1; shift + + local state=$(ip -j link show "$name" | + jq -r '(.[].flags[] | select(. == "UP")) // "DOWN"') + [[ $state == "UP" ]] +} + ip_link_set_up() { local name=$1; shift - ip link set dev "$name" up - defer ip link set dev "$name" down + if ! ip_link_is_up "$name"; then + ip link set dev "$name" up + defer ip link set dev "$name" down + fi +} + +ip_link_set_down() +{ + local name=$1; shift + + if ip_link_is_up "$name"; then + ip link set dev "$name" down + defer ip link set dev "$name" up + fi } ip_addr_add() @@ -498,3 +519,9 @@ ip_route_add() ip route add "$@" defer ip route del "$@" } + +bridge_vlan_add() +{ + bridge vlan add "$@" + defer bridge vlan del "$@" +} -- cgit v1.2.3 From dca12e9ab7603d94e47ded65080f750d6527c852 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 18 Dec 2024 18:15:59 +0100 Subject: selftests: net: Add a VLAN bridge binding selftest Add a test that exercises bridge binding. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Acked-by: Nikolay Aleksandrov Link: https://patch.msgid.link/baf7244fd1fe223a6d93e027584fa9f99dee982c.1734540770.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/Makefile | 1 + tools/testing/selftests/net/vlan_bridge_binding.sh | 256 +++++++++++++++++++++ 2 files changed, 257 insertions(+) create mode 100755 tools/testing/selftests/net/vlan_bridge_binding.sh (limited to 'tools') diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index f09bd96cc978..73ee88d6b043 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -96,6 +96,7 @@ TEST_PROGS += test_bridge_backup_port.sh TEST_PROGS += fdb_flush.sh fdb_notify.sh TEST_PROGS += fq_band_pktlimit.sh TEST_PROGS += vlan_hw_filter.sh +TEST_PROGS += vlan_bridge_binding.sh TEST_PROGS += bpf_offload.py TEST_PROGS += ipv6_route_update_soft_lockup.sh TEST_PROGS += busy_poll_test.sh diff --git a/tools/testing/selftests/net/vlan_bridge_binding.sh b/tools/testing/selftests/net/vlan_bridge_binding.sh new file mode 100755 index 000000000000..e7cb8c678bde --- /dev/null +++ b/tools/testing/selftests/net/vlan_bridge_binding.sh @@ -0,0 +1,256 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh + +ALL_TESTS=" + test_binding_on + test_binding_off + test_binding_toggle_on + test_binding_toggle_off + test_binding_toggle_on_when_upper_down + test_binding_toggle_off_when_upper_down + test_binding_toggle_on_when_lower_down + test_binding_toggle_off_when_lower_down +" + +setup_prepare() +{ + local port + + ip_link_add br up type bridge vlan_filtering 1 + + for port in d1 d2 d3; do + ip_link_add $port type veth peer name r$port + ip_link_set_up $port + ip_link_set_up r$port + ip_link_set_master $port br + done + + bridge_vlan_add vid 11 dev br self + bridge_vlan_add vid 11 dev d1 master + + bridge_vlan_add vid 12 dev br self + bridge_vlan_add vid 12 dev d2 master + + bridge_vlan_add vid 13 dev br self + bridge_vlan_add vid 13 dev d1 master + bridge_vlan_add vid 13 dev d2 master + + bridge_vlan_add vid 14 dev br self + bridge_vlan_add vid 14 dev d1 master + bridge_vlan_add vid 14 dev d2 master + bridge_vlan_add vid 14 dev d3 master +} + +operstate_is() +{ + local dev=$1; shift + local expect=$1; shift + + local operstate=$(ip -j link show $dev | jq -r .[].operstate) + if [[ $operstate == UP ]]; then + operstate=1 + elif [[ $operstate == DOWN || $operstate == LOWERLAYERDOWN ]]; then + operstate=0 + fi + echo -n $operstate + [[ $operstate == $expect ]] +} + +check_operstate() +{ + local dev=$1; shift + local expect=$1; shift + local operstate + + operstate=$(busywait 1000 \ + operstate_is "$dev" "$expect") + check_err $? "Got operstate of $operstate, expected $expect" +} + +add_one_vlan() +{ + local link=$1; shift + local id=$1; shift + + ip_link_add $link.$id link $link type vlan id $id "$@" +} + +add_vlans() +{ + add_one_vlan br 11 "$@" + add_one_vlan br 12 "$@" + add_one_vlan br 13 "$@" + add_one_vlan br 14 "$@" +} + +set_vlans() +{ + ip link set dev br.11 "$@" + ip link set dev br.12 "$@" + ip link set dev br.13 "$@" + ip link set dev br.14 "$@" +} + +down_netdevs() +{ + local dev + + for dev in "$@"; do + ip_link_set_down $dev + done +} + +check_operstates() +{ + local opst_11=$1; shift + local opst_12=$1; shift + local opst_13=$1; shift + local opst_14=$1; shift + + check_operstate br.11 $opst_11 + check_operstate br.12 $opst_12 + check_operstate br.13 $opst_13 + check_operstate br.14 $opst_14 +} + +do_test_binding() +{ + local inject=$1; shift + local what=$1; shift + local opsts_d1=$1; shift + local opsts_d2=$1; shift + local opsts_d12=$1; shift + local opsts_d123=$1; shift + + RET=0 + + defer_scope_push + down_netdevs d1 + $inject + check_operstates $opsts_d1 + defer_scope_pop + + defer_scope_push + down_netdevs d2 + $inject + check_operstates $opsts_d2 + defer_scope_pop + + defer_scope_push + down_netdevs d1 d2 + $inject + check_operstates $opsts_d12 + defer_scope_pop + + defer_scope_push + down_netdevs d1 d2 d3 + $inject + check_operstates $opsts_d123 + defer_scope_pop + + log_test "Test bridge_binding $what" +} + +do_test_binding_on() +{ + local inject=$1; shift + local what=$1; shift + + do_test_binding "$inject" "$what" \ + "0 1 1 1" \ + "1 0 1 1" \ + "0 0 0 1" \ + "0 0 0 0" +} + +do_test_binding_off() +{ + local inject=$1; shift + local what=$1; shift + + do_test_binding "$inject" "$what" \ + "1 1 1 1" \ + "1 1 1 1" \ + "1 1 1 1" \ + "0 0 0 0" +} + +test_binding_on() +{ + add_vlans bridge_binding on + set_vlans up + do_test_binding_on : "on" +} + +test_binding_off() +{ + add_vlans bridge_binding off + set_vlans up + do_test_binding_off : "off" +} + +test_binding_toggle_on() +{ + add_vlans bridge_binding off + set_vlans up + set_vlans type vlan bridge_binding on + do_test_binding_on : "off->on" +} + +test_binding_toggle_off() +{ + add_vlans bridge_binding on + set_vlans up + set_vlans type vlan bridge_binding off + do_test_binding_off : "on->off" +} + +dfr_set_binding_on() +{ + set_vlans type vlan bridge_binding on + defer set_vlans type vlan bridge_binding off +} + +dfr_set_binding_off() +{ + set_vlans type vlan bridge_binding off + defer set_vlans type vlan bridge_binding on +} + +test_binding_toggle_on_when_lower_down() +{ + add_vlans bridge_binding off + set_vlans up + do_test_binding_on dfr_set_binding_on "off->on when lower down" +} + +test_binding_toggle_off_when_lower_down() +{ + add_vlans bridge_binding on + set_vlans up + do_test_binding_off dfr_set_binding_off "on->off when lower down" +} + +test_binding_toggle_on_when_upper_down() +{ + add_vlans bridge_binding off + set_vlans type vlan bridge_binding on + set_vlans up + do_test_binding_on : "off->on when upper down" +} + +test_binding_toggle_off_when_upper_down() +{ + add_vlans bridge_binding on + set_vlans type vlan bridge_binding off + set_vlans up + do_test_binding_off : "on->off when upper down" +} + +trap defer_scopes_cleanup EXIT +setup_prepare +tests_run + +exit $EXIT_STATUS -- cgit v1.2.3 From 9ee0c7b8654346d60c823babe4b3747357a30477 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 12 Dec 2024 19:40:55 -0800 Subject: selftests/bpf: Add a BPF selftest for bpf_skb_change_tail() As requested by Daniel, we need to add a selftest to cover bpf_skb_change_tail() cases in skb_verdict. Here we test trimming, growing and error cases, and validate its expected return values and the expected sizes of the payload. Signed-off-by: Cong Wang Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20241213034057.246437-3-xiyou.wangcong@gmail.com --- .../selftests/bpf/prog_tests/sockmap_basic.c | 51 ++++++++++++++++++++++ .../selftests/bpf/progs/test_sockmap_change_tail.c | 40 +++++++++++++++++ 2 files changed, 91 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/test_sockmap_change_tail.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index 248754296d97..884ad87783d5 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -12,6 +12,7 @@ #include "test_sockmap_progs_query.skel.h" #include "test_sockmap_pass_prog.skel.h" #include "test_sockmap_drop_prog.skel.h" +#include "test_sockmap_change_tail.skel.h" #include "bpf_iter_sockmap.skel.h" #include "sockmap_helpers.h" @@ -643,6 +644,54 @@ out: test_sockmap_drop_prog__destroy(drop); } +static void test_sockmap_skb_verdict_change_tail(void) +{ + struct test_sockmap_change_tail *skel; + int err, map, verdict; + int c1, p1, sent, recvd; + int zero = 0; + char buf[2]; + + skel = test_sockmap_change_tail__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + return; + verdict = bpf_program__fd(skel->progs.prog_skb_verdict); + map = bpf_map__fd(skel->maps.sock_map_rx); + + err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0); + if (!ASSERT_OK(err, "bpf_prog_attach")) + goto out; + err = create_pair(AF_INET, SOCK_STREAM, &c1, &p1); + if (!ASSERT_OK(err, "create_pair()")) + goto out; + err = bpf_map_update_elem(map, &zero, &c1, BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem(c1)")) + goto out_close; + sent = xsend(p1, "Tr", 2, 0); + ASSERT_EQ(sent, 2, "xsend(p1)"); + recvd = recv(c1, buf, 2, 0); + ASSERT_EQ(recvd, 1, "recv(c1)"); + ASSERT_EQ(skel->data->change_tail_ret, 0, "change_tail_ret"); + + sent = xsend(p1, "G", 1, 0); + ASSERT_EQ(sent, 1, "xsend(p1)"); + recvd = recv(c1, buf, 2, 0); + ASSERT_EQ(recvd, 2, "recv(c1)"); + ASSERT_EQ(skel->data->change_tail_ret, 0, "change_tail_ret"); + + sent = xsend(p1, "E", 1, 0); + ASSERT_EQ(sent, 1, "xsend(p1)"); + recvd = recv(c1, buf, 1, 0); + ASSERT_EQ(recvd, 1, "recv(c1)"); + ASSERT_EQ(skel->data->change_tail_ret, -EINVAL, "change_tail_ret"); + +out_close: + close(c1); + close(p1); +out: + test_sockmap_change_tail__destroy(skel); +} + static void test_sockmap_skb_verdict_peek_helper(int map) { int err, c1, p1, zero = 0, sent, recvd, avail; @@ -1058,6 +1107,8 @@ void test_sockmap_basic(void) test_sockmap_skb_verdict_fionread(true); if (test__start_subtest("sockmap skb_verdict fionread on drop")) test_sockmap_skb_verdict_fionread(false); + if (test__start_subtest("sockmap skb_verdict change tail")) + test_sockmap_skb_verdict_change_tail(); if (test__start_subtest("sockmap skb_verdict msg_f_peek")) test_sockmap_skb_verdict_peek(); if (test__start_subtest("sockmap skb_verdict msg_f_peek with link")) diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_change_tail.c b/tools/testing/selftests/bpf/progs/test_sockmap_change_tail.c new file mode 100644 index 000000000000..2796dd8545eb --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_sockmap_change_tail.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 ByteDance */ +#include +#include + +struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(max_entries, 1); + __type(key, int); + __type(value, int); +} sock_map_rx SEC(".maps"); + +long change_tail_ret = 1; + +SEC("sk_skb") +int prog_skb_verdict(struct __sk_buff *skb) +{ + char *data, *data_end; + + bpf_skb_pull_data(skb, 1); + data = (char *)(unsigned long)skb->data; + data_end = (char *)(unsigned long)skb->data_end; + + if (data + 1 > data_end) + return SK_PASS; + + if (data[0] == 'T') { /* Trim the packet */ + change_tail_ret = bpf_skb_change_tail(skb, skb->len - 1, 0); + return SK_PASS; + } else if (data[0] == 'G') { /* Grow the packet */ + change_tail_ret = bpf_skb_change_tail(skb, skb->len + 1, 0); + return SK_PASS; + } else if (data[0] == 'E') { /* Error */ + change_tail_ret = bpf_skb_change_tail(skb, 65535, 0); + return SK_PASS; + } + return SK_PASS; +} + +char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From 472759c9f5377912c7483cca5da847888a27cecc Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 12 Dec 2024 19:40:56 -0800 Subject: selftests/bpf: Introduce socket_helpers.h for TC tests Pull socket helpers out of sockmap_helpers.h so that they can be reused for TC tests as well. This prepares for the next patch. Signed-off-by: Cong Wang Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20241213034057.246437-4-xiyou.wangcong@gmail.com --- .../selftests/bpf/prog_tests/socket_helpers.h | 394 +++++++++++++++++++++ .../selftests/bpf/prog_tests/sockmap_helpers.h | 385 +------------------- 2 files changed, 395 insertions(+), 384 deletions(-) create mode 100644 tools/testing/selftests/bpf/prog_tests/socket_helpers.h (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/socket_helpers.h b/tools/testing/selftests/bpf/prog_tests/socket_helpers.h new file mode 100644 index 000000000000..1bdfb79ef009 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/socket_helpers.h @@ -0,0 +1,394 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __SOCKET_HELPERS__ +#define __SOCKET_HELPERS__ + +#include + +/* include/linux/net.h */ +#define SOCK_TYPE_MASK 0xf + +#define IO_TIMEOUT_SEC 30 +#define MAX_STRERR_LEN 256 + +/* workaround for older vm_sockets.h */ +#ifndef VMADDR_CID_LOCAL +#define VMADDR_CID_LOCAL 1 +#endif + +/* include/linux/cleanup.h */ +#define __get_and_null(p, nullvalue) \ + ({ \ + __auto_type __ptr = &(p); \ + __auto_type __val = *__ptr; \ + *__ptr = nullvalue; \ + __val; \ + }) + +#define take_fd(fd) __get_and_null(fd, -EBADF) + +/* Wrappers that fail the test on error and report it. */ + +#define _FAIL(errnum, fmt...) \ + ({ \ + error_at_line(0, (errnum), __func__, __LINE__, fmt); \ + CHECK_FAIL(true); \ + }) +#define FAIL(fmt...) _FAIL(0, fmt) +#define FAIL_ERRNO(fmt...) _FAIL(errno, fmt) +#define FAIL_LIBBPF(err, msg) \ + ({ \ + char __buf[MAX_STRERR_LEN]; \ + libbpf_strerror((err), __buf, sizeof(__buf)); \ + FAIL("%s: %s", (msg), __buf); \ + }) + + +#define xaccept_nonblock(fd, addr, len) \ + ({ \ + int __ret = \ + accept_timeout((fd), (addr), (len), IO_TIMEOUT_SEC); \ + if (__ret == -1) \ + FAIL_ERRNO("accept"); \ + __ret; \ + }) + +#define xbind(fd, addr, len) \ + ({ \ + int __ret = bind((fd), (addr), (len)); \ + if (__ret == -1) \ + FAIL_ERRNO("bind"); \ + __ret; \ + }) + +#define xclose(fd) \ + ({ \ + int __ret = close((fd)); \ + if (__ret == -1) \ + FAIL_ERRNO("close"); \ + __ret; \ + }) + +#define xconnect(fd, addr, len) \ + ({ \ + int __ret = connect((fd), (addr), (len)); \ + if (__ret == -1) \ + FAIL_ERRNO("connect"); \ + __ret; \ + }) + +#define xgetsockname(fd, addr, len) \ + ({ \ + int __ret = getsockname((fd), (addr), (len)); \ + if (__ret == -1) \ + FAIL_ERRNO("getsockname"); \ + __ret; \ + }) + +#define xgetsockopt(fd, level, name, val, len) \ + ({ \ + int __ret = getsockopt((fd), (level), (name), (val), (len)); \ + if (__ret == -1) \ + FAIL_ERRNO("getsockopt(" #name ")"); \ + __ret; \ + }) + +#define xlisten(fd, backlog) \ + ({ \ + int __ret = listen((fd), (backlog)); \ + if (__ret == -1) \ + FAIL_ERRNO("listen"); \ + __ret; \ + }) + +#define xsetsockopt(fd, level, name, val, len) \ + ({ \ + int __ret = setsockopt((fd), (level), (name), (val), (len)); \ + if (__ret == -1) \ + FAIL_ERRNO("setsockopt(" #name ")"); \ + __ret; \ + }) + +#define xsend(fd, buf, len, flags) \ + ({ \ + ssize_t __ret = send((fd), (buf), (len), (flags)); \ + if (__ret == -1) \ + FAIL_ERRNO("send"); \ + __ret; \ + }) + +#define xrecv_nonblock(fd, buf, len, flags) \ + ({ \ + ssize_t __ret = recv_timeout((fd), (buf), (len), (flags), \ + IO_TIMEOUT_SEC); \ + if (__ret == -1) \ + FAIL_ERRNO("recv"); \ + __ret; \ + }) + +#define xsocket(family, sotype, flags) \ + ({ \ + int __ret = socket(family, sotype, flags); \ + if (__ret == -1) \ + FAIL_ERRNO("socket"); \ + __ret; \ + }) + +static inline void close_fd(int *fd) +{ + if (*fd >= 0) + xclose(*fd); +} + +#define __close_fd __attribute__((cleanup(close_fd))) + +static inline struct sockaddr *sockaddr(struct sockaddr_storage *ss) +{ + return (struct sockaddr *)ss; +} + +static inline void init_addr_loopback4(struct sockaddr_storage *ss, + socklen_t *len) +{ + struct sockaddr_in *addr4 = memset(ss, 0, sizeof(*ss)); + + addr4->sin_family = AF_INET; + addr4->sin_port = 0; + addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK); + *len = sizeof(*addr4); +} + +static inline void init_addr_loopback6(struct sockaddr_storage *ss, + socklen_t *len) +{ + struct sockaddr_in6 *addr6 = memset(ss, 0, sizeof(*ss)); + + addr6->sin6_family = AF_INET6; + addr6->sin6_port = 0; + addr6->sin6_addr = in6addr_loopback; + *len = sizeof(*addr6); +} + +static inline void init_addr_loopback_vsock(struct sockaddr_storage *ss, + socklen_t *len) +{ + struct sockaddr_vm *addr = memset(ss, 0, sizeof(*ss)); + + addr->svm_family = AF_VSOCK; + addr->svm_port = VMADDR_PORT_ANY; + addr->svm_cid = VMADDR_CID_LOCAL; + *len = sizeof(*addr); +} + +static inline void init_addr_loopback(int family, struct sockaddr_storage *ss, + socklen_t *len) +{ + switch (family) { + case AF_INET: + init_addr_loopback4(ss, len); + return; + case AF_INET6: + init_addr_loopback6(ss, len); + return; + case AF_VSOCK: + init_addr_loopback_vsock(ss, len); + return; + default: + FAIL("unsupported address family %d", family); + } +} + +static inline int enable_reuseport(int s, int progfd) +{ + int err, one = 1; + + err = xsetsockopt(s, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)); + if (err) + return -1; + err = xsetsockopt(s, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &progfd, + sizeof(progfd)); + if (err) + return -1; + + return 0; +} + +static inline int socket_loopback_reuseport(int family, int sotype, int progfd) +{ + struct sockaddr_storage addr; + socklen_t len = 0; + int err, s; + + init_addr_loopback(family, &addr, &len); + + s = xsocket(family, sotype, 0); + if (s == -1) + return -1; + + if (progfd >= 0) + enable_reuseport(s, progfd); + + err = xbind(s, sockaddr(&addr), len); + if (err) + goto close; + + if (sotype & SOCK_DGRAM) + return s; + + err = xlisten(s, SOMAXCONN); + if (err) + goto close; + + return s; +close: + xclose(s); + return -1; +} + +static inline int socket_loopback(int family, int sotype) +{ + return socket_loopback_reuseport(family, sotype, -1); +} + +static inline int poll_connect(int fd, unsigned int timeout_sec) +{ + struct timeval timeout = { .tv_sec = timeout_sec }; + fd_set wfds; + int r, eval; + socklen_t esize = sizeof(eval); + + FD_ZERO(&wfds); + FD_SET(fd, &wfds); + + r = select(fd + 1, NULL, &wfds, NULL, &timeout); + if (r == 0) + errno = ETIME; + if (r != 1) + return -1; + + if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &eval, &esize) < 0) + return -1; + if (eval != 0) { + errno = eval; + return -1; + } + + return 0; +} + +static inline int poll_read(int fd, unsigned int timeout_sec) +{ + struct timeval timeout = { .tv_sec = timeout_sec }; + fd_set rfds; + int r; + + FD_ZERO(&rfds); + FD_SET(fd, &rfds); + + r = select(fd + 1, &rfds, NULL, NULL, &timeout); + if (r == 0) + errno = ETIME; + + return r == 1 ? 0 : -1; +} + +static inline int accept_timeout(int fd, struct sockaddr *addr, socklen_t *len, + unsigned int timeout_sec) +{ + if (poll_read(fd, timeout_sec)) + return -1; + + return accept(fd, addr, len); +} + +static inline int recv_timeout(int fd, void *buf, size_t len, int flags, + unsigned int timeout_sec) +{ + if (poll_read(fd, timeout_sec)) + return -1; + + return recv(fd, buf, len, flags); +} + + +static inline int create_pair(int family, int sotype, int *p0, int *p1) +{ + __close_fd int s, c = -1, p = -1; + struct sockaddr_storage addr; + socklen_t len = sizeof(addr); + int err; + + s = socket_loopback(family, sotype); + if (s < 0) + return s; + + err = xgetsockname(s, sockaddr(&addr), &len); + if (err) + return err; + + c = xsocket(family, sotype, 0); + if (c < 0) + return c; + + err = connect(c, sockaddr(&addr), len); + if (err) { + if (errno != EINPROGRESS) { + FAIL_ERRNO("connect"); + return err; + } + + err = poll_connect(c, IO_TIMEOUT_SEC); + if (err) { + FAIL_ERRNO("poll_connect"); + return err; + } + } + + switch (sotype & SOCK_TYPE_MASK) { + case SOCK_DGRAM: + err = xgetsockname(c, sockaddr(&addr), &len); + if (err) + return err; + + err = xconnect(s, sockaddr(&addr), len); + if (err) + return err; + + *p0 = take_fd(s); + break; + case SOCK_STREAM: + case SOCK_SEQPACKET: + p = xaccept_nonblock(s, NULL, NULL); + if (p < 0) + return p; + + *p0 = take_fd(p); + break; + default: + FAIL("Unsupported socket type %#x", sotype); + return -EOPNOTSUPP; + } + + *p1 = take_fd(c); + return 0; +} + +static inline int create_socket_pairs(int family, int sotype, int *c0, int *c1, + int *p0, int *p1) +{ + int err; + + err = create_pair(family, sotype, c0, p0); + if (err) + return err; + + err = create_pair(family, sotype, c1, p1); + if (err) { + close(*c0); + close(*p0); + } + + return err; +} + +#endif // __SOCKET_HELPERS__ diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h b/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h index 38e35c72bdaa..3e5571dd578d 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h @@ -1,139 +1,12 @@ #ifndef __SOCKMAP_HELPERS__ #define __SOCKMAP_HELPERS__ -#include +#include "socket_helpers.h" -/* include/linux/net.h */ -#define SOCK_TYPE_MASK 0xf - -#define IO_TIMEOUT_SEC 30 -#define MAX_STRERR_LEN 256 #define MAX_TEST_NAME 80 -/* workaround for older vm_sockets.h */ -#ifndef VMADDR_CID_LOCAL -#define VMADDR_CID_LOCAL 1 -#endif - #define __always_unused __attribute__((__unused__)) -/* include/linux/cleanup.h */ -#define __get_and_null(p, nullvalue) \ - ({ \ - __auto_type __ptr = &(p); \ - __auto_type __val = *__ptr; \ - *__ptr = nullvalue; \ - __val; \ - }) - -#define take_fd(fd) __get_and_null(fd, -EBADF) - -#define _FAIL(errnum, fmt...) \ - ({ \ - error_at_line(0, (errnum), __func__, __LINE__, fmt); \ - CHECK_FAIL(true); \ - }) -#define FAIL(fmt...) _FAIL(0, fmt) -#define FAIL_ERRNO(fmt...) _FAIL(errno, fmt) -#define FAIL_LIBBPF(err, msg) \ - ({ \ - char __buf[MAX_STRERR_LEN]; \ - libbpf_strerror((err), __buf, sizeof(__buf)); \ - FAIL("%s: %s", (msg), __buf); \ - }) - -/* Wrappers that fail the test on error and report it. */ - -#define xaccept_nonblock(fd, addr, len) \ - ({ \ - int __ret = \ - accept_timeout((fd), (addr), (len), IO_TIMEOUT_SEC); \ - if (__ret == -1) \ - FAIL_ERRNO("accept"); \ - __ret; \ - }) - -#define xbind(fd, addr, len) \ - ({ \ - int __ret = bind((fd), (addr), (len)); \ - if (__ret == -1) \ - FAIL_ERRNO("bind"); \ - __ret; \ - }) - -#define xclose(fd) \ - ({ \ - int __ret = close((fd)); \ - if (__ret == -1) \ - FAIL_ERRNO("close"); \ - __ret; \ - }) - -#define xconnect(fd, addr, len) \ - ({ \ - int __ret = connect((fd), (addr), (len)); \ - if (__ret == -1) \ - FAIL_ERRNO("connect"); \ - __ret; \ - }) - -#define xgetsockname(fd, addr, len) \ - ({ \ - int __ret = getsockname((fd), (addr), (len)); \ - if (__ret == -1) \ - FAIL_ERRNO("getsockname"); \ - __ret; \ - }) - -#define xgetsockopt(fd, level, name, val, len) \ - ({ \ - int __ret = getsockopt((fd), (level), (name), (val), (len)); \ - if (__ret == -1) \ - FAIL_ERRNO("getsockopt(" #name ")"); \ - __ret; \ - }) - -#define xlisten(fd, backlog) \ - ({ \ - int __ret = listen((fd), (backlog)); \ - if (__ret == -1) \ - FAIL_ERRNO("listen"); \ - __ret; \ - }) - -#define xsetsockopt(fd, level, name, val, len) \ - ({ \ - int __ret = setsockopt((fd), (level), (name), (val), (len)); \ - if (__ret == -1) \ - FAIL_ERRNO("setsockopt(" #name ")"); \ - __ret; \ - }) - -#define xsend(fd, buf, len, flags) \ - ({ \ - ssize_t __ret = send((fd), (buf), (len), (flags)); \ - if (__ret == -1) \ - FAIL_ERRNO("send"); \ - __ret; \ - }) - -#define xrecv_nonblock(fd, buf, len, flags) \ - ({ \ - ssize_t __ret = recv_timeout((fd), (buf), (len), (flags), \ - IO_TIMEOUT_SEC); \ - if (__ret == -1) \ - FAIL_ERRNO("recv"); \ - __ret; \ - }) - -#define xsocket(family, sotype, flags) \ - ({ \ - int __ret = socket(family, sotype, flags); \ - if (__ret == -1) \ - FAIL_ERRNO("socket"); \ - __ret; \ - }) - #define xbpf_map_delete_elem(fd, key) \ ({ \ int __ret = bpf_map_delete_elem((fd), (key)); \ @@ -193,130 +66,6 @@ __ret; \ }) -static inline void close_fd(int *fd) -{ - if (*fd >= 0) - xclose(*fd); -} - -#define __close_fd __attribute__((cleanup(close_fd))) - -static inline int poll_connect(int fd, unsigned int timeout_sec) -{ - struct timeval timeout = { .tv_sec = timeout_sec }; - fd_set wfds; - int r, eval; - socklen_t esize = sizeof(eval); - - FD_ZERO(&wfds); - FD_SET(fd, &wfds); - - r = select(fd + 1, NULL, &wfds, NULL, &timeout); - if (r == 0) - errno = ETIME; - if (r != 1) - return -1; - - if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &eval, &esize) < 0) - return -1; - if (eval != 0) { - errno = eval; - return -1; - } - - return 0; -} - -static inline int poll_read(int fd, unsigned int timeout_sec) -{ - struct timeval timeout = { .tv_sec = timeout_sec }; - fd_set rfds; - int r; - - FD_ZERO(&rfds); - FD_SET(fd, &rfds); - - r = select(fd + 1, &rfds, NULL, NULL, &timeout); - if (r == 0) - errno = ETIME; - - return r == 1 ? 0 : -1; -} - -static inline int accept_timeout(int fd, struct sockaddr *addr, socklen_t *len, - unsigned int timeout_sec) -{ - if (poll_read(fd, timeout_sec)) - return -1; - - return accept(fd, addr, len); -} - -static inline int recv_timeout(int fd, void *buf, size_t len, int flags, - unsigned int timeout_sec) -{ - if (poll_read(fd, timeout_sec)) - return -1; - - return recv(fd, buf, len, flags); -} - -static inline void init_addr_loopback4(struct sockaddr_storage *ss, - socklen_t *len) -{ - struct sockaddr_in *addr4 = memset(ss, 0, sizeof(*ss)); - - addr4->sin_family = AF_INET; - addr4->sin_port = 0; - addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK); - *len = sizeof(*addr4); -} - -static inline void init_addr_loopback6(struct sockaddr_storage *ss, - socklen_t *len) -{ - struct sockaddr_in6 *addr6 = memset(ss, 0, sizeof(*ss)); - - addr6->sin6_family = AF_INET6; - addr6->sin6_port = 0; - addr6->sin6_addr = in6addr_loopback; - *len = sizeof(*addr6); -} - -static inline void init_addr_loopback_vsock(struct sockaddr_storage *ss, - socklen_t *len) -{ - struct sockaddr_vm *addr = memset(ss, 0, sizeof(*ss)); - - addr->svm_family = AF_VSOCK; - addr->svm_port = VMADDR_PORT_ANY; - addr->svm_cid = VMADDR_CID_LOCAL; - *len = sizeof(*addr); -} - -static inline void init_addr_loopback(int family, struct sockaddr_storage *ss, - socklen_t *len) -{ - switch (family) { - case AF_INET: - init_addr_loopback4(ss, len); - return; - case AF_INET6: - init_addr_loopback6(ss, len); - return; - case AF_VSOCK: - init_addr_loopback_vsock(ss, len); - return; - default: - FAIL("unsupported address family %d", family); - } -} - -static inline struct sockaddr *sockaddr(struct sockaddr_storage *ss) -{ - return (struct sockaddr *)ss; -} - static inline int add_to_sockmap(int sock_mapfd, int fd1, int fd2) { u64 value; @@ -334,136 +83,4 @@ static inline int add_to_sockmap(int sock_mapfd, int fd1, int fd2) return xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST); } -static inline int enable_reuseport(int s, int progfd) -{ - int err, one = 1; - - err = xsetsockopt(s, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)); - if (err) - return -1; - err = xsetsockopt(s, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &progfd, - sizeof(progfd)); - if (err) - return -1; - - return 0; -} - -static inline int socket_loopback_reuseport(int family, int sotype, int progfd) -{ - struct sockaddr_storage addr; - socklen_t len = 0; - int err, s; - - init_addr_loopback(family, &addr, &len); - - s = xsocket(family, sotype, 0); - if (s == -1) - return -1; - - if (progfd >= 0) - enable_reuseport(s, progfd); - - err = xbind(s, sockaddr(&addr), len); - if (err) - goto close; - - if (sotype & SOCK_DGRAM) - return s; - - err = xlisten(s, SOMAXCONN); - if (err) - goto close; - - return s; -close: - xclose(s); - return -1; -} - -static inline int socket_loopback(int family, int sotype) -{ - return socket_loopback_reuseport(family, sotype, -1); -} - -static inline int create_pair(int family, int sotype, int *p0, int *p1) -{ - __close_fd int s, c = -1, p = -1; - struct sockaddr_storage addr; - socklen_t len = sizeof(addr); - int err; - - s = socket_loopback(family, sotype); - if (s < 0) - return s; - - err = xgetsockname(s, sockaddr(&addr), &len); - if (err) - return err; - - c = xsocket(family, sotype, 0); - if (c < 0) - return c; - - err = connect(c, sockaddr(&addr), len); - if (err) { - if (errno != EINPROGRESS) { - FAIL_ERRNO("connect"); - return err; - } - - err = poll_connect(c, IO_TIMEOUT_SEC); - if (err) { - FAIL_ERRNO("poll_connect"); - return err; - } - } - - switch (sotype & SOCK_TYPE_MASK) { - case SOCK_DGRAM: - err = xgetsockname(c, sockaddr(&addr), &len); - if (err) - return err; - - err = xconnect(s, sockaddr(&addr), len); - if (err) - return err; - - *p0 = take_fd(s); - break; - case SOCK_STREAM: - case SOCK_SEQPACKET: - p = xaccept_nonblock(s, NULL, NULL); - if (p < 0) - return p; - - *p0 = take_fd(p); - break; - default: - FAIL("Unsupported socket type %#x", sotype); - return -EOPNOTSUPP; - } - - *p1 = take_fd(c); - return 0; -} - -static inline int create_socket_pairs(int family, int sotype, int *c0, int *c1, - int *p0, int *p1) -{ - int err; - - err = create_pair(family, sotype, c0, p0); - if (err) - return err; - - err = create_pair(family, sotype, c1, p1); - if (err) { - close(*c0); - close(*p0); - } - - return err; -} - #endif // __SOCKMAP_HELPERS__ -- cgit v1.2.3 From 4a58963d10fa3cb654b859e3f9a8aecbcf9f4982 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 12 Dec 2024 19:40:57 -0800 Subject: selftests/bpf: Test bpf_skb_change_tail() in TC ingress Similarly to the previous test, we also need a test case to cover positive offsets as well, TC is an excellent hook for this. Signed-off-by: Cong Wang Signed-off-by: Daniel Borkmann Tested-by: Zijian Zhang Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20241213034057.246437-5-xiyou.wangcong@gmail.com --- .../selftests/bpf/prog_tests/tc_change_tail.c | 62 ++++++++++++ .../selftests/bpf/progs/test_tc_change_tail.c | 106 +++++++++++++++++++++ 2 files changed, 168 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/tc_change_tail.c create mode 100644 tools/testing/selftests/bpf/progs/test_tc_change_tail.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/tc_change_tail.c b/tools/testing/selftests/bpf/prog_tests/tc_change_tail.c new file mode 100644 index 000000000000..74752233e779 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/tc_change_tail.c @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include + +#include "test_tc_change_tail.skel.h" +#include "socket_helpers.h" + +#define LO_IFINDEX 1 + +void test_tc_change_tail(void) +{ + LIBBPF_OPTS(bpf_tcx_opts, tcx_opts); + struct test_tc_change_tail *skel = NULL; + struct bpf_link *link; + int c1, p1; + char buf[2]; + int ret; + + skel = test_tc_change_tail__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_tc_change_tail__open_and_load")) + return; + + link = bpf_program__attach_tcx(skel->progs.change_tail, LO_IFINDEX, + &tcx_opts); + if (!ASSERT_OK_PTR(link, "bpf_program__attach_tcx")) + goto destroy; + + skel->links.change_tail = link; + ret = create_pair(AF_INET, SOCK_DGRAM, &c1, &p1); + if (!ASSERT_OK(ret, "create_pair")) + goto destroy; + + ret = xsend(p1, "Tr", 2, 0); + ASSERT_EQ(ret, 2, "xsend(p1)"); + ret = recv(c1, buf, 2, 0); + ASSERT_EQ(ret, 2, "recv(c1)"); + ASSERT_EQ(skel->data->change_tail_ret, 0, "change_tail_ret"); + + ret = xsend(p1, "G", 1, 0); + ASSERT_EQ(ret, 1, "xsend(p1)"); + ret = recv(c1, buf, 2, 0); + ASSERT_EQ(ret, 1, "recv(c1)"); + ASSERT_EQ(skel->data->change_tail_ret, 0, "change_tail_ret"); + + ret = xsend(p1, "E", 1, 0); + ASSERT_EQ(ret, 1, "xsend(p1)"); + ret = recv(c1, buf, 1, 0); + ASSERT_EQ(ret, 1, "recv(c1)"); + ASSERT_EQ(skel->data->change_tail_ret, -EINVAL, "change_tail_ret"); + + ret = xsend(p1, "Z", 1, 0); + ASSERT_EQ(ret, 1, "xsend(p1)"); + ret = recv(c1, buf, 1, 0); + ASSERT_EQ(ret, 1, "recv(c1)"); + ASSERT_EQ(skel->data->change_tail_ret, -EINVAL, "change_tail_ret"); + + close(c1); + close(p1); +destroy: + test_tc_change_tail__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/test_tc_change_tail.c b/tools/testing/selftests/bpf/progs/test_tc_change_tail.c new file mode 100644 index 000000000000..28edafe803f0 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_tc_change_tail.c @@ -0,0 +1,106 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include + +long change_tail_ret = 1; + +static __always_inline struct iphdr *parse_ip_header(struct __sk_buff *skb, int *ip_proto) +{ + void *data_end = (void *)(long)skb->data_end; + void *data = (void *)(long)skb->data; + struct ethhdr *eth = data; + struct iphdr *iph; + + /* Verify Ethernet header */ + if ((void *)(data + sizeof(*eth)) > data_end) + return NULL; + + /* Skip Ethernet header to get to IP header */ + iph = (void *)(data + sizeof(struct ethhdr)); + + /* Verify IP header */ + if ((void *)(data + sizeof(struct ethhdr) + sizeof(*iph)) > data_end) + return NULL; + + /* Basic IP header validation */ + if (iph->version != 4) /* Only support IPv4 */ + return NULL; + + if (iph->ihl < 5) /* Minimum IP header length */ + return NULL; + + *ip_proto = iph->protocol; + return iph; +} + +static __always_inline struct udphdr *parse_udp_header(struct __sk_buff *skb, struct iphdr *iph) +{ + void *data_end = (void *)(long)skb->data_end; + void *hdr = (void *)iph; + struct udphdr *udp; + + /* Calculate UDP header position */ + udp = hdr + (iph->ihl * 4); + hdr = (void *)udp; + + /* Verify UDP header bounds */ + if ((void *)(hdr + sizeof(*udp)) > data_end) + return NULL; + + return udp; +} + +SEC("tc/ingress") +int change_tail(struct __sk_buff *skb) +{ + int len = skb->len; + struct udphdr *udp; + struct iphdr *iph; + void *data_end; + char *payload; + int ip_proto; + + bpf_skb_pull_data(skb, len); + + data_end = (void *)(long)skb->data_end; + iph = parse_ip_header(skb, &ip_proto); + if (!iph) + return TCX_PASS; + + if (ip_proto != IPPROTO_UDP) + return TCX_PASS; + + udp = parse_udp_header(skb, iph); + if (!udp) + return TCX_PASS; + + payload = (char *)udp + (sizeof(struct udphdr)); + if (payload + 1 > (char *)data_end) + return TCX_PASS; + + if (payload[0] == 'T') { /* Trim the packet */ + change_tail_ret = bpf_skb_change_tail(skb, len - 1, 0); + if (!change_tail_ret) + bpf_skb_change_tail(skb, len, 0); + return TCX_PASS; + } else if (payload[0] == 'G') { /* Grow the packet */ + change_tail_ret = bpf_skb_change_tail(skb, len + 1, 0); + if (!change_tail_ret) + bpf_skb_change_tail(skb, len, 0); + return TCX_PASS; + } else if (payload[0] == 'E') { /* Error */ + change_tail_ret = bpf_skb_change_tail(skb, 65535, 0); + return TCX_PASS; + } else if (payload[0] == 'Z') { /* Zero */ + change_tail_ret = bpf_skb_change_tail(skb, 0, 0); + return TCX_PASS; + } + return TCX_DROP; +} + +char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From f63df61651be541cc5699083faa1bfbaa105ed44 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 19 Dec 2024 18:01:33 +0100 Subject: selftests: add pidfd bind-mount tests Link: https://lore.kernel.org/r/20241219-work-pidfs-mount-v1-2-dbc56198b839@kernel.org Signed-off-by: Christian Brauner --- tools/testing/selftests/pidfd/.gitignore | 1 + tools/testing/selftests/pidfd/Makefile | 2 +- tools/testing/selftests/pidfd/pidfd_bind_mount.c | 188 +++++++++++++++++++++++ 3 files changed, 190 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/pidfd/pidfd_bind_mount.c (limited to 'tools') diff --git a/tools/testing/selftests/pidfd/.gitignore b/tools/testing/selftests/pidfd/.gitignore index 224260e1a4a2..bf92481f925c 100644 --- a/tools/testing/selftests/pidfd/.gitignore +++ b/tools/testing/selftests/pidfd/.gitignore @@ -7,3 +7,4 @@ pidfd_fdinfo_test pidfd_getfd_test pidfd_setns_test pidfd_file_handle_test +pidfd_bind_mount diff --git a/tools/testing/selftests/pidfd/Makefile b/tools/testing/selftests/pidfd/Makefile index 3c16d8e77684..301343a11b62 100644 --- a/tools/testing/selftests/pidfd/Makefile +++ b/tools/testing/selftests/pidfd/Makefile @@ -3,7 +3,7 @@ CFLAGS += -g $(KHDR_INCLUDES) -pthread -Wall TEST_GEN_PROGS := pidfd_test pidfd_fdinfo_test pidfd_open_test \ pidfd_poll_test pidfd_wait pidfd_getfd_test pidfd_setns_test \ - pidfd_file_handle_test + pidfd_file_handle_test pidfd_bind_mount include ../lib.mk diff --git a/tools/testing/selftests/pidfd/pidfd_bind_mount.c b/tools/testing/selftests/pidfd/pidfd_bind_mount.c new file mode 100644 index 000000000000..7822dd080258 --- /dev/null +++ b/tools/testing/selftests/pidfd/pidfd_bind_mount.c @@ -0,0 +1,188 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +// Copyright (c) 2024 Christian Brauner + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pidfd.h" +#include "../kselftest_harness.h" + +#ifndef __NR_open_tree + #if defined __alpha__ + #define __NR_open_tree 538 + #elif defined _MIPS_SIM + #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ + #define __NR_open_tree 4428 + #endif + #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ + #define __NR_open_tree 6428 + #endif + #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ + #define __NR_open_tree 5428 + #endif + #elif defined __ia64__ + #define __NR_open_tree (428 + 1024) + #else + #define __NR_open_tree 428 + #endif +#endif + +#ifndef __NR_move_mount + #if defined __alpha__ + #define __NR_move_mount 539 + #elif defined _MIPS_SIM + #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ + #define __NR_move_mount 4429 + #endif + #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ + #define __NR_move_mount 6429 + #endif + #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ + #define __NR_move_mount 5429 + #endif + #elif defined __ia64__ + #define __NR_move_mount (428 + 1024) + #else + #define __NR_move_mount 429 + #endif +#endif + +#ifndef MOVE_MOUNT_F_EMPTY_PATH +#define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 /* Empty from path permitted */ +#endif + +#ifndef MOVE_MOUNT_F_EMPTY_PATH +#define MOVE_MOUNT_T_EMPTY_PATH 0x00000040 /* Empty to path permitted */ +#endif + +static inline int sys_move_mount(int from_dfd, const char *from_pathname, + int to_dfd, const char *to_pathname, + unsigned int flags) +{ + return syscall(__NR_move_mount, from_dfd, from_pathname, to_dfd, + to_pathname, flags); +} + +#ifndef OPEN_TREE_CLONE +#define OPEN_TREE_CLONE 1 +#endif + +#ifndef OPEN_TREE_CLOEXEC +#define OPEN_TREE_CLOEXEC O_CLOEXEC +#endif + +#ifndef AT_RECURSIVE +#define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */ +#endif + +static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags) +{ + return syscall(__NR_open_tree, dfd, filename, flags); +} + +FIXTURE(pidfd_bind_mount) { + char template[PATH_MAX]; + int fd_tmp; + int pidfd; + struct stat st1; + struct stat st2; + __u32 gen1; + __u32 gen2; + bool must_unmount; +}; + +FIXTURE_SETUP(pidfd_bind_mount) +{ + self->fd_tmp = -EBADF; + self->must_unmount = false; + ASSERT_EQ(unshare(CLONE_NEWNS), 0); + ASSERT_LE(snprintf(self->template, PATH_MAX, "%s", P_tmpdir "/pidfd_bind_mount_XXXXXX"), PATH_MAX); + self->fd_tmp = mkstemp(self->template); + ASSERT_GE(self->fd_tmp, 0); + self->pidfd = sys_pidfd_open(getpid(), 0); + ASSERT_GE(self->pidfd, 0); + ASSERT_GE(fstat(self->pidfd, &self->st1), 0); + ASSERT_EQ(ioctl(self->pidfd, FS_IOC_GETVERSION, &self->gen1), 0); +} + +FIXTURE_TEARDOWN(pidfd_bind_mount) +{ + ASSERT_EQ(close(self->fd_tmp), 0); + if (self->must_unmount) + ASSERT_EQ(umount2(self->template, 0), 0); + ASSERT_EQ(unlink(self->template), 0); +} + +/* + * Test that a detached mount can be created for a pidfd and then + * attached to the filesystem hierarchy. + */ +TEST_F(pidfd_bind_mount, bind_mount) +{ + int fd_tree; + + fd_tree = sys_open_tree(self->pidfd, "", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC | AT_EMPTY_PATH); + ASSERT_GE(fd_tree, 0); + + ASSERT_EQ(move_mount(fd_tree, "", self->fd_tmp, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0); + self->must_unmount = true; + + ASSERT_EQ(close(fd_tree), 0); +} + +/* Test that a pidfd can be reopened through procfs. */ +TEST_F(pidfd_bind_mount, reopen) +{ + int pidfd; + char proc_path[PATH_MAX]; + + sprintf(proc_path, "/proc/self/fd/%d", self->pidfd); + pidfd = open(proc_path, O_RDONLY | O_NOCTTY | O_CLOEXEC); + ASSERT_GE(pidfd, 0); + + ASSERT_GE(fstat(self->pidfd, &self->st2), 0); + ASSERT_EQ(ioctl(self->pidfd, FS_IOC_GETVERSION, &self->gen2), 0); + + ASSERT_TRUE(self->st1.st_dev == self->st2.st_dev && self->st1.st_ino == self->st2.st_ino); + ASSERT_TRUE(self->gen1 == self->gen2); + + ASSERT_EQ(close(pidfd), 0); +} + +/* + * Test that a detached mount can be created for a pidfd and then + * attached to the filesystem hierarchy and reopened. + */ +TEST_F(pidfd_bind_mount, bind_mount_reopen) +{ + int fd_tree, fd_pidfd_mnt; + + fd_tree = sys_open_tree(self->pidfd, "", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC | AT_EMPTY_PATH); + ASSERT_GE(fd_tree, 0); + + ASSERT_EQ(move_mount(fd_tree, "", self->fd_tmp, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0); + self->must_unmount = true; + + fd_pidfd_mnt = openat(-EBADF, self->template, O_RDONLY | O_NOCTTY | O_CLOEXEC); + ASSERT_GE(fd_pidfd_mnt, 0); + + ASSERT_GE(fstat(fd_tree, &self->st2), 0); + ASSERT_EQ(ioctl(fd_pidfd_mnt, FS_IOC_GETVERSION, &self->gen2), 0); + + ASSERT_TRUE(self->st1.st_dev == self->st2.st_dev && self->st1.st_ino == self->st2.st_ino); + ASSERT_TRUE(self->gen1 == self->gen2); + + ASSERT_EQ(close(fd_tree), 0); + ASSERT_EQ(close(fd_pidfd_mnt), 0); +} + +TEST_HARNESS_MAIN -- cgit v1.2.3 From f288c7a1ba268a9ed58a7971142a98a1e41a3c73 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 19 Dec 2024 16:31:16 -0800 Subject: selftests: drv-net: assume stats refresh is 0 if no ethtool -c support Tests using HW stats wait for them to stabilize, using data from ethtool -c as the delay. Not all drivers implement ethtool -c so handle the errors gracefully. Reviewed-by: Andrew Lunn Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20241220003116.1458863-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/lib/py/env.py | 9 +++++++-- tools/testing/selftests/net/lib/py/utils.py | 6 ++++-- 2 files changed, 11 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py index 1ea9bb695e94..fea343f209ea 100644 --- a/tools/testing/selftests/drivers/net/lib/py/env.py +++ b/tools/testing/selftests/drivers/net/lib/py/env.py @@ -5,7 +5,7 @@ import time from pathlib import Path from lib.py import KsftSkipEx, KsftXfailEx from lib.py import ksft_setup -from lib.py import cmd, ethtool, ip +from lib.py import cmd, ethtool, ip, CmdExitFailure from lib.py import NetNS, NetdevSimDev from .remote import Remote @@ -234,7 +234,12 @@ class NetDrvEpEnv: Good drivers will tell us via ethtool what their sync period is. """ if self._stats_settle_time is None: - data = ethtool("-c " + self.ifname, json=True)[0] + data = {} + try: + data = ethtool("-c " + self.ifname, json=True)[0] + except CmdExitFailure as e: + if "Operation not supported" not in e.cmd.stderr: + raise self._stats_settle_time = 0.025 + \ data.get('stats-block-usecs', 0) / 1000 / 1000 diff --git a/tools/testing/selftests/net/lib/py/utils.py b/tools/testing/selftests/net/lib/py/utils.py index 72590c3f90f1..9e3bcddcf3e8 100644 --- a/tools/testing/selftests/net/lib/py/utils.py +++ b/tools/testing/selftests/net/lib/py/utils.py @@ -10,7 +10,9 @@ import time class CmdExitFailure(Exception): - pass + def __init__(self, msg, cmd_obj): + super().__init__(msg) + self.cmd = cmd_obj class cmd: @@ -48,7 +50,7 @@ class cmd: if len(stderr) > 0 and stderr[-1] == "\n": stderr = stderr[:-1] raise CmdExitFailure("Command failed: %s\nSTDOUT: %s\nSTDERR: %s" % - (self.proc.args, stdout, stderr)) + (self.proc.args, stdout, stderr), self) class bkg(cmd): -- cgit v1.2.3 From f3af3ba1083836d174ada619366783fa17272f66 Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Thu, 19 Dec 2024 10:49:28 +0100 Subject: vsock/test: Use NSEC_PER_SEC Replace 1000000000ULL with NSEC_PER_SEC. No functional change intended. Reviewed-by: Luigi Leonardi Reviewed-by: Stefano Garzarella Signed-off-by: Michal Luczaj Link: https://patch.msgid.link/20241219-test-vsock-leaks-v4-1-a416e554d9d7@rbox.co Signed-off-by: Jakub Kicinski --- tools/testing/vsock/vsock_test.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index 48f17641ca50..38fd8d96eb83 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "vsock_test_zerocopy.h" #include "timeout.h" @@ -559,7 +560,7 @@ static time_t current_nsec(void) exit(EXIT_FAILURE); } - return (ts.tv_sec * 1000000000ULL) + ts.tv_nsec; + return (ts.tv_sec * NSEC_PER_SEC) + ts.tv_nsec; } #define RCVTIMEO_TIMEOUT_SEC 1 @@ -599,7 +600,7 @@ static void test_seqpacket_timeout_client(const struct test_opts *opts) } read_overhead_ns = current_nsec() - read_enter_ns - - 1000000000ULL * RCVTIMEO_TIMEOUT_SEC; + NSEC_PER_SEC * RCVTIMEO_TIMEOUT_SEC; if (read_overhead_ns > READ_OVERHEAD_NSEC) { fprintf(stderr, -- cgit v1.2.3 From ef8bd18f475e969753b1b72588a4932195d420f3 Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Thu, 19 Dec 2024 10:49:29 +0100 Subject: vsock/test: Introduce option to select tests Allow for selecting specific test IDs to be executed. Reviewed-by: Stefano Garzarella Signed-off-by: Michal Luczaj Link: https://patch.msgid.link/20241219-test-vsock-leaks-v4-2-a416e554d9d7@rbox.co Signed-off-by: Jakub Kicinski --- tools/testing/vsock/util.c | 29 +++++++++++++++++++++++++++-- tools/testing/vsock/util.h | 2 ++ tools/testing/vsock/vsock_test.c | 11 +++++++++++ 3 files changed, 40 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c index 34e9dac0a105..81b9a31059d8 100644 --- a/tools/testing/vsock/util.c +++ b/tools/testing/vsock/util.c @@ -486,8 +486,7 @@ void list_tests(const struct test_case *test_cases) exit(EXIT_FAILURE); } -void skip_test(struct test_case *test_cases, size_t test_cases_len, - const char *test_id_str) +static unsigned long parse_test_id(const char *test_id_str, size_t test_cases_len) { unsigned long test_id; char *endptr = NULL; @@ -505,9 +504,35 @@ void skip_test(struct test_case *test_cases, size_t test_cases_len, exit(EXIT_FAILURE); } + return test_id; +} + +void skip_test(struct test_case *test_cases, size_t test_cases_len, + const char *test_id_str) +{ + unsigned long test_id = parse_test_id(test_id_str, test_cases_len); test_cases[test_id].skip = true; } +void pick_test(struct test_case *test_cases, size_t test_cases_len, + const char *test_id_str) +{ + static bool skip_all = true; + unsigned long test_id; + + if (skip_all) { + unsigned long i; + + for (i = 0; i < test_cases_len; ++i) + test_cases[i].skip = true; + + skip_all = false; + } + + test_id = parse_test_id(test_id_str, test_cases_len); + test_cases[test_id].skip = false; +} + unsigned long hash_djb2(const void *data, size_t len) { unsigned long hash = 5381; diff --git a/tools/testing/vsock/util.h b/tools/testing/vsock/util.h index ba84d296d8b7..e62f46b2b92a 100644 --- a/tools/testing/vsock/util.h +++ b/tools/testing/vsock/util.h @@ -62,6 +62,8 @@ void run_tests(const struct test_case *test_cases, void list_tests(const struct test_case *test_cases); void skip_test(struct test_case *test_cases, size_t test_cases_len, const char *test_id_str); +void pick_test(struct test_case *test_cases, size_t test_cases_len, + const char *test_id_str); unsigned long hash_djb2(const void *data, size_t len); size_t iovec_bytes(const struct iovec *iov, size_t iovnum); unsigned long iovec_hash_djb2(const struct iovec *iov, size_t iovnum); diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index 38fd8d96eb83..8bb2ab41c55f 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -1644,6 +1644,11 @@ static const struct option longopts[] = { .has_arg = required_argument, .val = 's', }, + { + .name = "pick", + .has_arg = required_argument, + .val = 't', + }, { .name = "help", .has_arg = no_argument, @@ -1681,6 +1686,8 @@ static void usage(void) " --peer-cid CID of the other side\n" " --peer-port AF_VSOCK port used for the test [default: %d]\n" " --list List of tests that will be executed\n" + " --pick Test ID to execute selectively;\n" + " use multiple --pick options to select more tests\n" " --skip Test ID to skip;\n" " use multiple --skip options to skip more tests\n", DEFAULT_PEER_PORT @@ -1737,6 +1744,10 @@ int main(int argc, char **argv) skip_test(test_cases, ARRAY_SIZE(test_cases) - 1, optarg); break; + case 't': + pick_test(test_cases, ARRAY_SIZE(test_cases) - 1, + optarg); + break; case '?': default: usage(); -- cgit v1.2.3 From 50f9434463a0be5b972ee442ba6a9704c9afb02a Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Thu, 19 Dec 2024 10:49:30 +0100 Subject: vsock/test: Add README blurb about kmemleak usage Document the suggested use of kmemleak for memory leak detection. Suggested-by: Stefano Garzarella Reviewed-by: Stefano Garzarella Signed-off-by: Michal Luczaj Link: https://patch.msgid.link/20241219-test-vsock-leaks-v4-3-a416e554d9d7@rbox.co Signed-off-by: Jakub Kicinski --- tools/testing/vsock/README | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'tools') diff --git a/tools/testing/vsock/README b/tools/testing/vsock/README index 84ee217ba8ee..680ce666ceb5 100644 --- a/tools/testing/vsock/README +++ b/tools/testing/vsock/README @@ -36,6 +36,21 @@ Invoke test binaries in both directions as follows: --control-port=1234 \ --peer-cid=3 +Some tests are designed to produce kernel memory leaks. Leaks detection, +however, is deferred to Kernel Memory Leak Detector. It is recommended to enable +kmemleak (CONFIG_DEBUG_KMEMLEAK=y) and explicitly trigger a scan after each test +suite run, e.g. + + # echo clear > /sys/kernel/debug/kmemleak + # $TEST_BINARY ... + # echo "wait for any grace periods" && sleep 2 + # echo scan > /sys/kernel/debug/kmemleak + # echo "wait for kmemleak" && sleep 5 + # echo scan > /sys/kernel/debug/kmemleak + # cat /sys/kernel/debug/kmemleak + +For more information see Documentation/dev-tools/kmemleak.rst. + vsock_perf utility ------------------- 'vsock_perf' is a simple tool to measure vsock performance. It works in -- cgit v1.2.3 From f52e7f593b49344b9497c289cbb2ada213f60a7a Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Thu, 19 Dec 2024 10:49:31 +0100 Subject: vsock/test: Adapt send_byte()/recv_byte() to handle MSG_ZEROCOPY For a zerocopy send(), buffer (always byte 'A') needs to be preserved (thus it can not be on the stack) or the data recv()ed check in recv_byte() might fail. While there, change the printf format to 0x%02x so the '\0' bytes can be seen. Reviewed-by: Stefano Garzarella Signed-off-by: Michal Luczaj Link: https://patch.msgid.link/20241219-test-vsock-leaks-v4-4-a416e554d9d7@rbox.co Signed-off-by: Jakub Kicinski --- tools/testing/vsock/util.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c index 81b9a31059d8..7058dc614c25 100644 --- a/tools/testing/vsock/util.c +++ b/tools/testing/vsock/util.c @@ -401,7 +401,7 @@ void recv_buf(int fd, void *buf, size_t len, int flags, ssize_t expected_ret) */ void send_byte(int fd, int expected_ret, int flags) { - const uint8_t byte = 'A'; + static const uint8_t byte = 'A'; send_buf(fd, &byte, sizeof(byte), flags, expected_ret); } @@ -420,7 +420,7 @@ void recv_byte(int fd, int expected_ret, int flags) recv_buf(fd, &byte, sizeof(byte), flags, expected_ret); if (byte != 'A') { - fprintf(stderr, "unexpected byte read %c\n", byte); + fprintf(stderr, "unexpected byte read 0x%02x\n", byte); exit(EXIT_FAILURE); } } -- cgit v1.2.3 From f66ef469a72d19764f943067307a570f83b00dca Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Thu, 19 Dec 2024 10:49:32 +0100 Subject: vsock/test: Add test for accept_queue memory leak Attempt to enqueue a child after the queue was flushed, but before SOCK_DONE flag has been set. Test tries to produce a memory leak, kmemleak should be employed. Dealing with a race condition, test by its very nature may lead to a false negative. Fixed by commit d7b0ff5a8667 ("virtio/vsock: Fix accept_queue memory leak"). Reviewed-by: Stefano Garzarella Signed-off-by: Michal Luczaj Link: https://patch.msgid.link/20241219-test-vsock-leaks-v4-5-a416e554d9d7@rbox.co Signed-off-by: Jakub Kicinski --- tools/testing/vsock/vsock_test.c | 52 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) (limited to 'tools') diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index 8bb2ab41c55f..2a8fcb062d9d 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -29,6 +29,10 @@ #include "control.h" #include "util.h" +/* Basic messages for control_writeulong(), control_readulong() */ +#define CONTROL_CONTINUE 1 +#define CONTROL_DONE 0 + static void test_stream_connection_reset(const struct test_opts *opts) { union { @@ -1474,6 +1478,49 @@ static void test_stream_cred_upd_on_set_rcvlowat(const struct test_opts *opts) test_stream_credit_update_test(opts, false); } +/* The goal of test leak_acceptq is to stress the race between connect() and + * close(listener). Implementation of client/server loops boils down to: + * + * client server + * ------ ------ + * write(CONTINUE) + * expect(CONTINUE) + * listen() + * write(LISTENING) + * expect(LISTENING) + * connect() close() + */ +#define ACCEPTQ_LEAK_RACE_TIMEOUT 2 /* seconds */ + +static void test_stream_leak_acceptq_client(const struct test_opts *opts) +{ + time_t tout; + int fd; + + tout = current_nsec() + ACCEPTQ_LEAK_RACE_TIMEOUT * NSEC_PER_SEC; + do { + control_writeulong(CONTROL_CONTINUE); + + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); + if (fd >= 0) + close(fd); + } while (current_nsec() < tout); + + control_writeulong(CONTROL_DONE); +} + +/* Test for a memory leak. User is expected to run kmemleak scan, see README. */ +static void test_stream_leak_acceptq_server(const struct test_opts *opts) +{ + int fd; + + while (control_readulong() == CONTROL_CONTINUE) { + fd = vsock_stream_listen(VMADDR_CID_ANY, opts->peer_port); + control_writeln("LISTENING"); + close(fd); + } +} + static struct test_case test_cases[] = { { .name = "SOCK_STREAM connection reset", @@ -1604,6 +1651,11 @@ static struct test_case test_cases[] = { .run_client = test_seqpacket_unsent_bytes_client, .run_server = test_seqpacket_unsent_bytes_server, }, + { + .name = "SOCK_STREAM leak accept queue", + .run_client = test_stream_leak_acceptq_client, + .run_server = test_stream_leak_acceptq_server, + }, {}, }; -- cgit v1.2.3 From ec50efee8cf814035d82f3b42dad916144d98b38 Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Thu, 19 Dec 2024 10:49:33 +0100 Subject: vsock/test: Add test for sk_error_queue memory leak Ask for MSG_ZEROCOPY completion notification, but do not recv() it. Test attempts to create a memory leak, kmemleak should be employed. Fixed by commit fbf7085b3ad1 ("vsock: Fix sk_error_queue memory leak"). Reviewed-by: Stefano Garzarella Signed-off-by: Michal Luczaj Link: https://patch.msgid.link/20241219-test-vsock-leaks-v4-6-a416e554d9d7@rbox.co Signed-off-by: Jakub Kicinski --- tools/testing/vsock/vsock_test.c | 45 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) (limited to 'tools') diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index 2a8fcb062d9d..2dec6290b075 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -1521,6 +1521,46 @@ static void test_stream_leak_acceptq_server(const struct test_opts *opts) } } +/* Test for a memory leak. User is expected to run kmemleak scan, see README. */ +static void test_stream_msgzcopy_leak_errq_client(const struct test_opts *opts) +{ + struct pollfd fds = { 0 }; + int fd; + + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + enable_so_zerocopy_check(fd); + send_byte(fd, 1, MSG_ZEROCOPY); + + fds.fd = fd; + fds.events = 0; + if (poll(&fds, 1, -1) < 0) { + perror("poll"); + exit(EXIT_FAILURE); + } + + close(fd); +} + +static void test_stream_msgzcopy_leak_errq_server(const struct test_opts *opts) +{ + int fd; + + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + recv_byte(fd, 1, 0); + vsock_wait_remote_close(fd); + close(fd); +} + static struct test_case test_cases[] = { { .name = "SOCK_STREAM connection reset", @@ -1656,6 +1696,11 @@ static struct test_case test_cases[] = { .run_client = test_stream_leak_acceptq_client, .run_server = test_stream_leak_acceptq_server, }, + { + .name = "SOCK_STREAM MSG_ZEROCOPY leak MSG_ERRQUEUE", + .run_client = test_stream_msgzcopy_leak_errq_client, + .run_server = test_stream_msgzcopy_leak_errq_server, + }, {}, }; -- cgit v1.2.3 From d127ac8b1d4d3524d292b597100fef96dd909c9b Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Thu, 19 Dec 2024 10:49:34 +0100 Subject: vsock/test: Add test for MSG_ZEROCOPY completion memory leak Exercise the ENOMEM error path by attempting to hit net.core.optmem_max limit on send(). Test aims to create a memory leak, kmemleak should be employed. Fixed by commit 60cf6206a1f5 ("virtio/vsock: Improve MSG_ZEROCOPY error handling"). Reviewed-by: Stefano Garzarella Signed-off-by: Michal Luczaj Link: https://patch.msgid.link/20241219-test-vsock-leaks-v4-7-a416e554d9d7@rbox.co Signed-off-by: Jakub Kicinski --- tools/testing/vsock/vsock_test.c | 152 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 152 insertions(+) (limited to 'tools') diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index 2dec6290b075..1eebbc0d5f61 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -1561,6 +1561,153 @@ static void test_stream_msgzcopy_leak_errq_server(const struct test_opts *opts) close(fd); } +/* Test msgzcopy_leak_zcskb is meant to exercise sendmsg() error handling path, + * that might leak an skb. The idea is to fail virtio_transport_init_zcopy_skb() + * by hitting net.core.optmem_max limit in sock_omalloc(), specifically + * + * vsock_connectible_sendmsg + * virtio_transport_stream_enqueue + * virtio_transport_send_pkt_info + * virtio_transport_init_zcopy_skb + * . msg_zerocopy_realloc + * . msg_zerocopy_alloc + * . sock_omalloc + * . sk_omem_alloc + size > sysctl_optmem_max + * return -ENOMEM + * + * We abuse the implementation detail of net/socket.c:____sys_sendmsg(). + * sk_omem_alloc can be precisely bumped by sock_kmalloc(), as it is used to + * fetch user-provided control data. + * + * While this approach works for now, it relies on assumptions regarding the + * implementation and configuration (for example, order of net.core.optmem_max + * can not exceed MAX_PAGE_ORDER), which may not hold in the future. A more + * resilient testing could be implemented by leveraging the Fault injection + * framework (CONFIG_FAULT_INJECTION), e.g. + * + * client# echo N > /sys/kernel/debug/failslab/ignore-gfp-wait + * client# echo 0 > /sys/kernel/debug/failslab/verbose + * + * void client(const struct test_opts *opts) + * { + * char buf[16]; + * int f, s, i; + * + * f = open("/proc/self/fail-nth", O_WRONLY); + * + * for (i = 1; i < 32; i++) { + * control_writeulong(CONTROL_CONTINUE); + * + * s = vsock_stream_connect(opts->peer_cid, opts->peer_port); + * enable_so_zerocopy_check(s); + * + * sprintf(buf, "%d", i); + * write(f, buf, strlen(buf)); + * + * send(s, &(char){ 0 }, 1, MSG_ZEROCOPY); + * + * write(f, "0", 1); + * close(s); + * } + * + * control_writeulong(CONTROL_DONE); + * close(f); + * } + * + * void server(const struct test_opts *opts) + * { + * int fd; + * + * while (control_readulong() == CONTROL_CONTINUE) { + * fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); + * vsock_wait_remote_close(fd); + * close(fd); + * } + * } + * + * Refer to Documentation/fault-injection/fault-injection.rst. + */ +#define MAX_PAGE_ORDER 10 /* usually */ +#define PAGE_SIZE 4096 + +/* Test for a memory leak. User is expected to run kmemleak scan, see README. */ +static void test_stream_msgzcopy_leak_zcskb_client(const struct test_opts *opts) +{ + size_t optmem_max, ctl_len, chunk_size; + struct msghdr msg = { 0 }; + struct iovec iov; + char *chunk; + int fd, res; + FILE *f; + + f = fopen("/proc/sys/net/core/optmem_max", "r"); + if (!f) { + perror("fopen(optmem_max)"); + exit(EXIT_FAILURE); + } + + if (fscanf(f, "%zu", &optmem_max) != 1) { + fprintf(stderr, "fscanf(optmem_max) failed\n"); + exit(EXIT_FAILURE); + } + + fclose(f); + + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + enable_so_zerocopy_check(fd); + + ctl_len = optmem_max - 1; + if (ctl_len > PAGE_SIZE << MAX_PAGE_ORDER) { + fprintf(stderr, "Try with net.core.optmem_max = 100000\n"); + exit(EXIT_FAILURE); + } + + chunk_size = CMSG_SPACE(ctl_len); + chunk = malloc(chunk_size); + if (!chunk) { + perror("malloc"); + exit(EXIT_FAILURE); + } + memset(chunk, 0, chunk_size); + + iov.iov_base = &(char){ 0 }; + iov.iov_len = 1; + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = chunk; + msg.msg_controllen = ctl_len; + + errno = 0; + res = sendmsg(fd, &msg, MSG_ZEROCOPY); + if (res >= 0 || errno != ENOMEM) { + fprintf(stderr, "Expected ENOMEM, got errno=%d res=%d\n", + errno, res); + exit(EXIT_FAILURE); + } + + close(fd); +} + +static void test_stream_msgzcopy_leak_zcskb_server(const struct test_opts *opts) +{ + int fd; + + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + vsock_wait_remote_close(fd); + close(fd); +} + static struct test_case test_cases[] = { { .name = "SOCK_STREAM connection reset", @@ -1701,6 +1848,11 @@ static struct test_case test_cases[] = { .run_client = test_stream_msgzcopy_leak_errq_client, .run_server = test_stream_msgzcopy_leak_errq_server, }, + { + .name = "SOCK_STREAM MSG_ZEROCOPY leak completion skb", + .run_client = test_stream_msgzcopy_leak_zcskb_client, + .run_server = test_stream_msgzcopy_leak_zcskb_server, + }, {}, }; -- cgit v1.2.3 From ce2b93fc1dfa1c82f2576aa571731c4e5dcc8dd7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 24 Dec 2024 14:09:15 -1000 Subject: sched_ext: Fix dsq_local_on selftest The dsp_local_on selftest expects the scheduler to fail by trying to schedule an e.g. CPU-affine task to the wrong CPU. However, this isn't guaranteed to happen in the 1 second window that the test is running. Besides, it's odd to have this particular exception path tested when there are no other tests that verify that the interface is working at all - e.g. the test would pass if dsp_local_on interface is completely broken and fails on any attempt. Flip the test so that it verifies that the feature works. While at it, fix a typo in the info message. Signed-off-by: Tejun Heo Reported-by: Ihor Solodrai Link: http://lkml.kernel.org/r/Z1n9v7Z6iNJ-wKmq@slm.duckdns.org Signed-off-by: Tejun Heo --- tools/testing/selftests/sched_ext/dsp_local_on.bpf.c | 5 ++++- tools/testing/selftests/sched_ext/dsp_local_on.c | 5 +++-- 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c b/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c index 6325bf76f47e..fbda6bf54671 100644 --- a/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c +++ b/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c @@ -43,7 +43,10 @@ void BPF_STRUCT_OPS(dsp_local_on_dispatch, s32 cpu, struct task_struct *prev) if (!p) return; - target = bpf_get_prandom_u32() % nr_cpus; + if (p->nr_cpus_allowed == nr_cpus) + target = bpf_get_prandom_u32() % nr_cpus; + else + target = scx_bpf_task_cpu(p); scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL_ON | target, SCX_SLICE_DFL, 0); bpf_task_release(p); diff --git a/tools/testing/selftests/sched_ext/dsp_local_on.c b/tools/testing/selftests/sched_ext/dsp_local_on.c index 472851b56854..0ff27e57fe43 100644 --- a/tools/testing/selftests/sched_ext/dsp_local_on.c +++ b/tools/testing/selftests/sched_ext/dsp_local_on.c @@ -34,9 +34,10 @@ static enum scx_test_status run(void *ctx) /* Just sleeping is fine, plenty of scheduling events happening */ sleep(1); - SCX_EQ(skel->data->uei.kind, EXIT_KIND(SCX_EXIT_ERROR)); bpf_link__destroy(link); + SCX_EQ(skel->data->uei.kind, EXIT_KIND(SCX_EXIT_UNREG)); + return SCX_TEST_PASS; } @@ -50,7 +51,7 @@ static void cleanup(void *ctx) struct scx_test dsp_local_on = { .name = "dsp_local_on", .description = "Verify we can directly dispatch tasks to a local DSQs " - "from osp.dispatch()", + "from ops.dispatch()", .setup = setup, .run = run, .cleanup = cleanup, -- cgit v1.2.3 From 91fce23a08f6f8cc827b865d1870b7c39bf10455 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Thu, 26 Dec 2024 14:14:43 +0900 Subject: selftests: ftrace: Remove obsolate maxactive syntax check Since the fprobe event does not support maxactive anymore, stop testing the maxactive syntax error checking. Cc: Alexei Starovoitov Cc: Florent Revest Cc: Martin KaFai Lau Cc: bpf Cc: Alexei Starovoitov Cc: Jiri Olsa Cc: Alan Maguire Cc: Mark Rutland Link: https://lore.kernel.org/173519008333.391279.10184048816208739987.stgit@devnote2 Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- .../testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc index 61877d166451..c9425a34fae3 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc @@ -16,9 +16,7 @@ aarch64) REG=%r0 ;; esac -check_error 'f^100 vfs_read' # MAXACT_NO_KPROBE -check_error 'f^1a111 vfs_read' # BAD_MAXACT -check_error 'f^100000 vfs_read' # MAXACT_TOO_BIG +check_error 'f^100 vfs_read' # BAD_MAXACT check_error 'f ^non_exist_func' # BAD_PROBE_ADDR (enoent) check_error 'f ^vfs_read+10' # BAD_PROBE_ADDR -- cgit v1.2.3 From 0c2dd44d3f9b1e6959cd201e2192cd55636d7bbb Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Thu, 26 Dec 2024 14:14:54 +0900 Subject: selftests/ftrace: Add a test case for repeating register/unregister fprobe This test case repeats define and undefine the fprobe dynamic event to ensure that the fprobe does not cause any issue with such operations. Cc: Alexei Starovoitov Cc: Florent Revest Cc: Martin KaFai Lau Cc: bpf Cc: Alexei Starovoitov Cc: Jiri Olsa Cc: Alan Maguire Cc: Mark Rutland Link: https://lore.kernel.org/173519009398.391279.4625924605120064761.stgit@devnote2 Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- .../test.d/dynevent/add_remove_fprobe_repeat.tc | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe_repeat.tc (limited to 'tools') diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe_repeat.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe_repeat.tc new file mode 100644 index 000000000000..b4ad09237e2a --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe_repeat.tc @@ -0,0 +1,19 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: Generic dynamic event - Repeating add/remove fprobe events +# requires: dynamic_events "f[:[/][]] [%return] []":README + +echo 0 > events/enable +echo > dynamic_events + +PLACE=$FUNCTION_FORK +REPEAT_TIMES=64 + +for i in `seq 1 $REPEAT_TIMES`; do + echo "f:myevent $PLACE" >> dynamic_events + grep -q myevent dynamic_events + test -d events/fprobes/myevent + echo > dynamic_events +done + +clear_trace -- cgit v1.2.3 From 8d097444982d7b23a5396169dc9d2923a59b5a79 Mon Sep 17 00:00:00 2001 From: "John B. Wyatt IV" Date: Tue, 24 Dec 2024 01:23:28 -0500 Subject: pm: cpupower: Add header changes for cpufreq.h to SWIG bindings "cpupower: Add support for showing energy performance preference" added two new functions to cpufreq.h. This patch adds them to the bindings. Link: https://lore.kernel.org/linux-pm/8dc731c3-6586-4265-ae6a-d93ed219a963@linuxfoundation.org/T/#t Tested by compiling both libcpupower and the headers; running the test script that does not use the functions as a basic sanity test. Link: https://lore.kernel.org/r/20241224062329.39606-1-jwyatt@redhat.com Signed-off-by: "John B. Wyatt IV" Signed-off-by: "John B. Wyatt IV" Signed-off-by: Shuah Khan --- tools/power/cpupower/bindings/python/raw_pylibcpupower.swg | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/power/cpupower/bindings/python/raw_pylibcpupower.swg b/tools/power/cpupower/bindings/python/raw_pylibcpupower.swg index 96556d87a745..a8226c79cfea 100644 --- a/tools/power/cpupower/bindings/python/raw_pylibcpupower.swg +++ b/tools/power/cpupower/bindings/python/raw_pylibcpupower.swg @@ -134,6 +134,9 @@ void cpufreq_put_stats(struct cpufreq_stats *stats); unsigned long cpufreq_get_transitions(unsigned int cpu); +char *cpufreq_get_energy_performance_preference(unsigned int cpu); +void cpufreq_put_energy_performance_preference(char *ptr); + int cpufreq_set_policy(unsigned int cpu, struct cpufreq_policy *policy); int cpufreq_modify_policy_min(unsigned int cpu, unsigned long min_freq); -- cgit v1.2.3 From 6cc45f8c1f898570916044f606be9890d295e129 Mon Sep 17 00:00:00 2001 From: Tomas Glozar Date: Wed, 27 Nov 2024 14:41:30 +0100 Subject: rtla/timerlat: Fix histogram ALL for zero samples rtla timerlat hist currently computers the minimum, maximum and average latency even in cases when there are zero samples. This leads to nonsensical values being calculated for maximum and minimum, and to divide by zero for average. A similar bug is fixed by 01b05fc0e5f3 ("rtla/timerlat: Fix histogram report when a cpu count is 0") but the bug still remains for printing the sum over all CPUs in timerlat_print_stats_all. The issue can be reproduced with this command: $ rtla timerlat hist -U -d 1s Index over: count: min: avg: max: Floating point exception (core dumped) (There are always no samples with -U unless the user workload is created.) Fix the bug by omitting max/min/avg when sample count is zero, displaying a dash instead, just like we already do for the individual CPUs. The logic is moved into a new function called format_summary_value, which is used for both the individual CPUs and for the overall summary. Cc: stable@vger.kernel.org Link: https://lore.kernel.org/20241127134130.51171-1-tglozar@redhat.com Fixes: 1462501c7a8 ("rtla/timerlat: Add a summary for hist mode") Signed-off-by: Tomas Glozar Signed-off-by: Steven Rostedt (Google) --- tools/tracing/rtla/src/timerlat_hist.c | 177 ++++++++++++++++++--------------- 1 file changed, 96 insertions(+), 81 deletions(-) (limited to 'tools') diff --git a/tools/tracing/rtla/src/timerlat_hist.c b/tools/tracing/rtla/src/timerlat_hist.c index 8b66387e5f35..4403cc4eba30 100644 --- a/tools/tracing/rtla/src/timerlat_hist.c +++ b/tools/tracing/rtla/src/timerlat_hist.c @@ -281,6 +281,21 @@ static void timerlat_hist_header(struct osnoise_tool *tool) trace_seq_reset(s); } +/* + * format_summary_value - format a line of summary value (min, max or avg) + * of hist data + */ +static void format_summary_value(struct trace_seq *seq, + int count, + unsigned long long val, + bool avg) +{ + if (count) + trace_seq_printf(seq, "%9llu ", avg ? val / count : val); + else + trace_seq_printf(seq, "%9c ", '-'); +} + /* * timerlat_print_summary - print the summary of the hist data to the output */ @@ -328,29 +343,23 @@ timerlat_print_summary(struct timerlat_hist_params *params, if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) continue; - if (!params->no_irq) { - if (data->hist[cpu].irq_count) - trace_seq_printf(trace->seq, "%9llu ", - data->hist[cpu].min_irq); - else - trace_seq_printf(trace->seq, " - "); - } + if (!params->no_irq) + format_summary_value(trace->seq, + data->hist[cpu].irq_count, + data->hist[cpu].min_irq, + false); - if (!params->no_thread) { - if (data->hist[cpu].thread_count) - trace_seq_printf(trace->seq, "%9llu ", - data->hist[cpu].min_thread); - else - trace_seq_printf(trace->seq, " - "); - } + if (!params->no_thread) + format_summary_value(trace->seq, + data->hist[cpu].thread_count, + data->hist[cpu].min_thread, + false); - if (params->user_hist) { - if (data->hist[cpu].user_count) - trace_seq_printf(trace->seq, "%9llu ", - data->hist[cpu].min_user); - else - trace_seq_printf(trace->seq, " - "); - } + if (params->user_hist) + format_summary_value(trace->seq, + data->hist[cpu].user_count, + data->hist[cpu].min_user, + false); } trace_seq_printf(trace->seq, "\n"); @@ -364,29 +373,23 @@ timerlat_print_summary(struct timerlat_hist_params *params, if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) continue; - if (!params->no_irq) { - if (data->hist[cpu].irq_count) - trace_seq_printf(trace->seq, "%9llu ", - data->hist[cpu].sum_irq / data->hist[cpu].irq_count); - else - trace_seq_printf(trace->seq, " - "); - } + if (!params->no_irq) + format_summary_value(trace->seq, + data->hist[cpu].irq_count, + data->hist[cpu].sum_irq, + true); - if (!params->no_thread) { - if (data->hist[cpu].thread_count) - trace_seq_printf(trace->seq, "%9llu ", - data->hist[cpu].sum_thread / data->hist[cpu].thread_count); - else - trace_seq_printf(trace->seq, " - "); - } + if (!params->no_thread) + format_summary_value(trace->seq, + data->hist[cpu].thread_count, + data->hist[cpu].sum_thread, + true); - if (params->user_hist) { - if (data->hist[cpu].user_count) - trace_seq_printf(trace->seq, "%9llu ", - data->hist[cpu].sum_user / data->hist[cpu].user_count); - else - trace_seq_printf(trace->seq, " - "); - } + if (params->user_hist) + format_summary_value(trace->seq, + data->hist[cpu].user_count, + data->hist[cpu].sum_user, + true); } trace_seq_printf(trace->seq, "\n"); @@ -400,29 +403,23 @@ timerlat_print_summary(struct timerlat_hist_params *params, if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) continue; - if (!params->no_irq) { - if (data->hist[cpu].irq_count) - trace_seq_printf(trace->seq, "%9llu ", - data->hist[cpu].max_irq); - else - trace_seq_printf(trace->seq, " - "); - } + if (!params->no_irq) + format_summary_value(trace->seq, + data->hist[cpu].irq_count, + data->hist[cpu].max_irq, + false); - if (!params->no_thread) { - if (data->hist[cpu].thread_count) - trace_seq_printf(trace->seq, "%9llu ", - data->hist[cpu].max_thread); - else - trace_seq_printf(trace->seq, " - "); - } + if (!params->no_thread) + format_summary_value(trace->seq, + data->hist[cpu].thread_count, + data->hist[cpu].max_thread, + false); - if (params->user_hist) { - if (data->hist[cpu].user_count) - trace_seq_printf(trace->seq, "%9llu ", - data->hist[cpu].max_user); - else - trace_seq_printf(trace->seq, " - "); - } + if (params->user_hist) + format_summary_value(trace->seq, + data->hist[cpu].user_count, + data->hist[cpu].max_user, + false); } trace_seq_printf(trace->seq, "\n"); trace_seq_do_printf(trace->seq); @@ -506,16 +503,22 @@ timerlat_print_stats_all(struct timerlat_hist_params *params, trace_seq_printf(trace->seq, "min: "); if (!params->no_irq) - trace_seq_printf(trace->seq, "%9llu ", - sum.min_irq); + format_summary_value(trace->seq, + sum.irq_count, + sum.min_irq, + false); if (!params->no_thread) - trace_seq_printf(trace->seq, "%9llu ", - sum.min_thread); + format_summary_value(trace->seq, + sum.thread_count, + sum.min_thread, + false); if (params->user_hist) - trace_seq_printf(trace->seq, "%9llu ", - sum.min_user); + format_summary_value(trace->seq, + sum.user_count, + sum.min_user, + false); trace_seq_printf(trace->seq, "\n"); @@ -523,16 +526,22 @@ timerlat_print_stats_all(struct timerlat_hist_params *params, trace_seq_printf(trace->seq, "avg: "); if (!params->no_irq) - trace_seq_printf(trace->seq, "%9llu ", - sum.sum_irq / sum.irq_count); + format_summary_value(trace->seq, + sum.irq_count, + sum.sum_irq, + true); if (!params->no_thread) - trace_seq_printf(trace->seq, "%9llu ", - sum.sum_thread / sum.thread_count); + format_summary_value(trace->seq, + sum.thread_count, + sum.sum_thread, + true); if (params->user_hist) - trace_seq_printf(trace->seq, "%9llu ", - sum.sum_user / sum.user_count); + format_summary_value(trace->seq, + sum.user_count, + sum.sum_user, + true); trace_seq_printf(trace->seq, "\n"); @@ -540,16 +549,22 @@ timerlat_print_stats_all(struct timerlat_hist_params *params, trace_seq_printf(trace->seq, "max: "); if (!params->no_irq) - trace_seq_printf(trace->seq, "%9llu ", - sum.max_irq); + format_summary_value(trace->seq, + sum.irq_count, + sum.max_irq, + false); if (!params->no_thread) - trace_seq_printf(trace->seq, "%9llu ", - sum.max_thread); + format_summary_value(trace->seq, + sum.thread_count, + sum.max_thread, + false); if (params->user_hist) - trace_seq_printf(trace->seq, "%9llu ", - sum.max_user); + format_summary_value(trace->seq, + sum.user_count, + sum.max_user, + false); trace_seq_printf(trace->seq, "\n"); trace_seq_do_printf(trace->seq); -- cgit v1.2.3 From 6c432b56a16a0727561211a137f37ec47f96f1d0 Mon Sep 17 00:00:00 2001 From: Gabriele Monaco Date: Fri, 27 Dec 2024 15:47:45 +0100 Subject: verification/dot2k: Fix template directory detection dot2k can be run as installed (e.g. make install) or from the kernel tree. In the former case it looks for templates in a known location; in the latter, the PWD has to be `/tools/verification` to properly import python modules. The current version looks for the template in a wrong directory in this latter case. This patch adjusts the directory where dot2k looks for templates if run from the kernel tree (i.e. not installed). Additionally we fix a few simple pylint warnings in boolean expressions. Cc: Juri Lelli Cc: Thomas Gleixner Cc: John Kacur Link: https://lore.kernel.org/20241227144752.362911-2-gmonaco@redhat.com Signed-off-by: Gabriele Monaco Signed-off-by: Steven Rostedt (Google) --- tools/verification/dot2/dot2k.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/verification/dot2/dot2k.py b/tools/verification/dot2/dot2k.py index 016550fccf1f..f6d02e3406a3 100644 --- a/tools/verification/dot2/dot2k.py +++ b/tools/verification/dot2/dot2k.py @@ -14,14 +14,14 @@ import os class dot2k(Dot2c): monitor_types = { "global" : 1, "per_cpu" : 2, "per_task" : 3 } - monitor_templates_dir = "dot2k/rv_templates/" + monitor_templates_dir = "dot2/dot2k_templates/" monitor_type = "per_cpu" def __init__(self, file_path, MonitorType): super().__init__(file_path) self.monitor_type = self.monitor_types.get(MonitorType) - if self.monitor_type == None: + if self.monitor_type is None: raise Exception("Unknown monitor type: %s" % MonitorType) self.monitor_type = MonitorType @@ -31,7 +31,7 @@ class dot2k(Dot2c): def __fill_rv_templates_dir(self): - if os.path.exists(self.monitor_templates_dir) == True: + if os.path.exists(self.monitor_templates_dir): return if platform.system() != "Linux": @@ -39,11 +39,11 @@ class dot2k(Dot2c): kernel_path = "/lib/modules/%s/build/tools/verification/dot2/dot2k_templates/" % (platform.release()) - if os.path.exists(kernel_path) == True: + if os.path.exists(kernel_path): self.monitor_templates_dir = kernel_path return - if os.path.exists("/usr/share/dot2/dot2k_templates/") == True: + if os.path.exists("/usr/share/dot2/dot2k_templates/"): self.monitor_templates_dir = "/usr/share/dot2/dot2k_templates/" return @@ -98,7 +98,7 @@ class dot2k(Dot2c): def fill_main_c(self): main_c = self.main_c min_type = self.get_minimun_type() - nr_events = self.events.__len__() + nr_events = len(self.events) tracepoint_handlers = self.fill_tracepoint_handlers_skel() tracepoint_attach = self.fill_tracepoint_attach_probe() tracepoint_detach = self.fill_tracepoint_detach_helper() @@ -160,8 +160,8 @@ class dot2k(Dot2c): def __get_main_name(self): path = "%s/%s" % (self.name, "main.c") - if os.path.exists(path) == False: - return "main.c" + if not os.path.exists(path): + return "main.c" return "__main.c" def print_files(self): -- cgit v1.2.3 From ca08e071c59d96cb1db19b20ba70e9db7b9d5791 Mon Sep 17 00:00:00 2001 From: Gabriele Monaco Date: Fri, 27 Dec 2024 15:47:46 +0100 Subject: verification/dot2k: Unify main.c templates dot2k has 3 templates, one per monitor type, but the only difference among them is the `DECLARE_DA_MON_*` call, keeping 3 almost identical templates requires more work whenever we introduce a change. This patch removes the 3 dot2k templates and replaces them with a generic one, we then adjust the model type from the script. Cc: Juri Lelli Cc: Thomas Gleixner Cc: John Kacur Link: https://lore.kernel.org/20241227144752.362911-3-gmonaco@redhat.com Signed-off-by: Gabriele Monaco Signed-off-by: Steven Rostedt (Google) --- tools/verification/dot2/dot2k.py | 7 +- tools/verification/dot2/dot2k_templates/main.c | 91 ++++++++++++++++++++++ .../dot2/dot2k_templates/main_global.c | 91 ---------------------- .../dot2/dot2k_templates/main_per_cpu.c | 91 ---------------------- .../dot2/dot2k_templates/main_per_task.c | 91 ---------------------- 5 files changed, 97 insertions(+), 274 deletions(-) create mode 100644 tools/verification/dot2/dot2k_templates/main.c delete mode 100644 tools/verification/dot2/dot2k_templates/main_global.c delete mode 100644 tools/verification/dot2/dot2k_templates/main_per_cpu.c delete mode 100644 tools/verification/dot2/dot2k_templates/main_per_task.c (limited to 'tools') diff --git a/tools/verification/dot2/dot2k.py b/tools/verification/dot2/dot2k.py index f6d02e3406a3..15d6f7048f8d 100644 --- a/tools/verification/dot2/dot2k.py +++ b/tools/verification/dot2/dot2k.py @@ -26,7 +26,7 @@ class dot2k(Dot2c): self.monitor_type = MonitorType self.__fill_rv_templates_dir() - self.main_c = self.__open_file(self.monitor_templates_dir + "main_" + MonitorType + ".c") + self.main_c = self.__open_file(self.monitor_templates_dir + "main.c") self.enum_suffix = "_%s" % self.name def __fill_rv_templates_dir(self): @@ -69,6 +69,9 @@ class dot2k(Dot2c): # cut off the last \n return string[:-1] + def fill_monitor_type(self): + return self.monitor_type.upper() + def fill_tracepoint_handlers_skel(self): buff = [] for event in self.events: @@ -97,12 +100,14 @@ class dot2k(Dot2c): def fill_main_c(self): main_c = self.main_c + monitor_type = self.fill_monitor_type() min_type = self.get_minimun_type() nr_events = len(self.events) tracepoint_handlers = self.fill_tracepoint_handlers_skel() tracepoint_attach = self.fill_tracepoint_attach_probe() tracepoint_detach = self.fill_tracepoint_detach_helper() + main_c = main_c.replace("MONITOR_TYPE", monitor_type) main_c = main_c.replace("MIN_TYPE", min_type) main_c = main_c.replace("MODEL_NAME", self.name) main_c = main_c.replace("NR_EVENTS", str(nr_events)) diff --git a/tools/verification/dot2/dot2k_templates/main.c b/tools/verification/dot2/dot2k_templates/main.c new file mode 100644 index 000000000000..2419a6f89cd8 --- /dev/null +++ b/tools/verification/dot2/dot2k_templates/main.c @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include +#include + +#define MODULE_NAME "MODEL_NAME" + +/* + * XXX: include required tracepoint headers, e.g., + * #include + */ +#include + +/* + * This is the self-generated part of the monitor. Generally, there is no need + * to touch this section. + */ +#include "MODEL_NAME.h" + +/* + * Declare the deterministic automata monitor. + * + * The rv monitor reference is needed for the monitor declaration. + */ +static struct rv_monitor rv_MODEL_NAME; +DECLARE_DA_MON_MONITOR_TYPE(MODEL_NAME, MIN_TYPE); + +/* + * This is the instrumentation part of the monitor. + * + * This is the section where manual work is required. Here the kernel events + * are translated into model's event. + * + */ +TRACEPOINT_HANDLERS_SKEL +static int enable_MODEL_NAME(void) +{ + int retval; + + retval = da_monitor_init_MODEL_NAME(); + if (retval) + return retval; + +TRACEPOINT_ATTACH + + return 0; +} + +static void disable_MODEL_NAME(void) +{ + rv_MODEL_NAME.enabled = 0; + +TRACEPOINT_DETACH + + da_monitor_destroy_MODEL_NAME(); +} + +/* + * This is the monitor register section. + */ +static struct rv_monitor rv_MODEL_NAME = { + .name = "MODEL_NAME", + .description = "auto-generated MODEL_NAME", + .enable = enable_MODEL_NAME, + .disable = disable_MODEL_NAME, + .reset = da_monitor_reset_all_MODEL_NAME, + .enabled = 0, +}; + +static int __init register_MODEL_NAME(void) +{ + rv_register_monitor(&rv_MODEL_NAME); + return 0; +} + +static void __exit unregister_MODEL_NAME(void) +{ + rv_unregister_monitor(&rv_MODEL_NAME); +} + +module_init(register_MODEL_NAME); +module_exit(unregister_MODEL_NAME); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("dot2k: auto-generated"); +MODULE_DESCRIPTION("MODEL_NAME"); diff --git a/tools/verification/dot2/dot2k_templates/main_global.c b/tools/verification/dot2/dot2k_templates/main_global.c deleted file mode 100644 index a5658bfb9044..000000000000 --- a/tools/verification/dot2/dot2k_templates/main_global.c +++ /dev/null @@ -1,91 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include -#include -#include -#include -#include -#include -#include - -#define MODULE_NAME "MODEL_NAME" - -/* - * XXX: include required tracepoint headers, e.g., - * #include - */ -#include - -/* - * This is the self-generated part of the monitor. Generally, there is no need - * to touch this section. - */ -#include "MODEL_NAME.h" - -/* - * Declare the deterministic automata monitor. - * - * The rv monitor reference is needed for the monitor declaration. - */ -static struct rv_monitor rv_MODEL_NAME; -DECLARE_DA_MON_GLOBAL(MODEL_NAME, MIN_TYPE); - -/* - * This is the instrumentation part of the monitor. - * - * This is the section where manual work is required. Here the kernel events - * are translated into model's event. - * - */ -TRACEPOINT_HANDLERS_SKEL -static int enable_MODEL_NAME(void) -{ - int retval; - - retval = da_monitor_init_MODEL_NAME(); - if (retval) - return retval; - -TRACEPOINT_ATTACH - - return 0; -} - -static void disable_MODEL_NAME(void) -{ - rv_MODEL_NAME.enabled = 0; - -TRACEPOINT_DETACH - - da_monitor_destroy_MODEL_NAME(); -} - -/* - * This is the monitor register section. - */ -static struct rv_monitor rv_MODEL_NAME = { - .name = "MODEL_NAME", - .description = "auto-generated MODEL_NAME", - .enable = enable_MODEL_NAME, - .disable = disable_MODEL_NAME, - .reset = da_monitor_reset_all_MODEL_NAME, - .enabled = 0, -}; - -static int __init register_MODEL_NAME(void) -{ - rv_register_monitor(&rv_MODEL_NAME); - return 0; -} - -static void __exit unregister_MODEL_NAME(void) -{ - rv_unregister_monitor(&rv_MODEL_NAME); -} - -module_init(register_MODEL_NAME); -module_exit(unregister_MODEL_NAME); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("dot2k: auto-generated"); -MODULE_DESCRIPTION("MODEL_NAME"); diff --git a/tools/verification/dot2/dot2k_templates/main_per_cpu.c b/tools/verification/dot2/dot2k_templates/main_per_cpu.c deleted file mode 100644 index 03539a97633f..000000000000 --- a/tools/verification/dot2/dot2k_templates/main_per_cpu.c +++ /dev/null @@ -1,91 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include -#include -#include -#include -#include -#include -#include - -#define MODULE_NAME "MODEL_NAME" - -/* - * XXX: include required tracepoint headers, e.g., - * #include - */ -#include - -/* - * This is the self-generated part of the monitor. Generally, there is no need - * to touch this section. - */ -#include "MODEL_NAME.h" - -/* - * Declare the deterministic automata monitor. - * - * The rv monitor reference is needed for the monitor declaration. - */ -static struct rv_monitor rv_MODEL_NAME; -DECLARE_DA_MON_PER_CPU(MODEL_NAME, MIN_TYPE); - -/* - * This is the instrumentation part of the monitor. - * - * This is the section where manual work is required. Here the kernel events - * are translated into model's event. - * - */ -TRACEPOINT_HANDLERS_SKEL -static int enable_MODEL_NAME(void) -{ - int retval; - - retval = da_monitor_init_MODEL_NAME(); - if (retval) - return retval; - -TRACEPOINT_ATTACH - - return 0; -} - -static void disable_MODEL_NAME(void) -{ - rv_MODEL_NAME.enabled = 0; - -TRACEPOINT_DETACH - - da_monitor_destroy_MODEL_NAME(); -} - -/* - * This is the monitor register section. - */ -static struct rv_monitor rv_MODEL_NAME = { - .name = "MODEL_NAME", - .description = "auto-generated MODEL_NAME", - .enable = enable_MODEL_NAME, - .disable = disable_MODEL_NAME, - .reset = da_monitor_reset_all_MODEL_NAME, - .enabled = 0, -}; - -static int __init register_MODEL_NAME(void) -{ - rv_register_monitor(&rv_MODEL_NAME); - return 0; -} - -static void __exit unregister_MODEL_NAME(void) -{ - rv_unregister_monitor(&rv_MODEL_NAME); -} - -module_init(register_MODEL_NAME); -module_exit(unregister_MODEL_NAME); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("dot2k: auto-generated"); -MODULE_DESCRIPTION("MODEL_NAME"); diff --git a/tools/verification/dot2/dot2k_templates/main_per_task.c b/tools/verification/dot2/dot2k_templates/main_per_task.c deleted file mode 100644 index ffd92af87a86..000000000000 --- a/tools/verification/dot2/dot2k_templates/main_per_task.c +++ /dev/null @@ -1,91 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include -#include -#include -#include -#include -#include -#include - -#define MODULE_NAME "MODEL_NAME" - -/* - * XXX: include required tracepoint headers, e.g., - * #include - */ -#include - -/* - * This is the self-generated part of the monitor. Generally, there is no need - * to touch this section. - */ -#include "MODEL_NAME.h" - -/* - * Declare the deterministic automata monitor. - * - * The rv monitor reference is needed for the monitor declaration. - */ -static struct rv_monitor rv_MODEL_NAME; -DECLARE_DA_MON_PER_TASK(MODEL_NAME, MIN_TYPE); - -/* - * This is the instrumentation part of the monitor. - * - * This is the section where manual work is required. Here the kernel events - * are translated into model's event. - * - */ -TRACEPOINT_HANDLERS_SKEL -static int enable_MODEL_NAME(void) -{ - int retval; - - retval = da_monitor_init_MODEL_NAME(); - if (retval) - return retval; - -TRACEPOINT_ATTACH - - return 0; -} - -static void disable_MODEL_NAME(void) -{ - rv_MODEL_NAME.enabled = 0; - -TRACEPOINT_DETACH - - da_monitor_destroy_MODEL_NAME(); -} - -/* - * This is the monitor register section. - */ -static struct rv_monitor rv_MODEL_NAME = { - .name = "MODEL_NAME", - .description = "auto-generated MODEL_NAME", - .enable = enable_MODEL_NAME, - .disable = disable_MODEL_NAME, - .reset = da_monitor_reset_all_MODEL_NAME, - .enabled = 0, -}; - -static int __init register_MODEL_NAME(void) -{ - rv_register_monitor(&rv_MODEL_NAME); - return 0; -} - -static void __exit unregister_MODEL_NAME(void) -{ - rv_unregister_monitor(&rv_MODEL_NAME); -} - -module_init(register_MODEL_NAME); -module_exit(unregister_MODEL_NAME); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("dot2k: auto-generated"); -MODULE_DESCRIPTION("MODEL_NAME"); -- cgit v1.2.3 From 91f3407e13b89b7391ebc5b6143fd22edd901041 Mon Sep 17 00:00:00 2001 From: Gabriele Monaco Date: Fri, 27 Dec 2024 15:47:47 +0100 Subject: verification/dot2k: More robust template variables The dot2k templates currently have variables that are automatically filled by the script marked as an uppercase VARIABLE. This requires some care while adding new variables to avoid using valid keywords and get them unexpectedly substituted. This patch switches the variables to the %%VARIABLE%% notation to make the pattern substitution more robust. Cc: Juri Lelli Cc: Thomas Gleixner Cc: John Kacur Link: https://lore.kernel.org/20241227144752.362911-4-gmonaco@redhat.com Signed-off-by: Gabriele Monaco Signed-off-by: Steven Rostedt (Google) --- tools/verification/dot2/dot2k.py | 14 ++++---- tools/verification/dot2/dot2k_templates/main.c | 50 +++++++++++++------------- 2 files changed, 32 insertions(+), 32 deletions(-) (limited to 'tools') diff --git a/tools/verification/dot2/dot2k.py b/tools/verification/dot2/dot2k.py index 15d6f7048f8d..c88b3c011706 100644 --- a/tools/verification/dot2/dot2k.py +++ b/tools/verification/dot2/dot2k.py @@ -107,13 +107,13 @@ class dot2k(Dot2c): tracepoint_attach = self.fill_tracepoint_attach_probe() tracepoint_detach = self.fill_tracepoint_detach_helper() - main_c = main_c.replace("MONITOR_TYPE", monitor_type) - main_c = main_c.replace("MIN_TYPE", min_type) - main_c = main_c.replace("MODEL_NAME", self.name) - main_c = main_c.replace("NR_EVENTS", str(nr_events)) - main_c = main_c.replace("TRACEPOINT_HANDLERS_SKEL", tracepoint_handlers) - main_c = main_c.replace("TRACEPOINT_ATTACH", tracepoint_attach) - main_c = main_c.replace("TRACEPOINT_DETACH", tracepoint_detach) + main_c = main_c.replace("%%MONITOR_TYPE%%", monitor_type) + main_c = main_c.replace("%%MIN_TYPE%%", min_type) + main_c = main_c.replace("%%MODEL_NAME%%", self.name) + main_c = main_c.replace("%%NR_EVENTS%%", str(nr_events)) + main_c = main_c.replace("%%TRACEPOINT_HANDLERS_SKEL%%", tracepoint_handlers) + main_c = main_c.replace("%%TRACEPOINT_ATTACH%%", tracepoint_attach) + main_c = main_c.replace("%%TRACEPOINT_DETACH%%", tracepoint_detach) return main_c diff --git a/tools/verification/dot2/dot2k_templates/main.c b/tools/verification/dot2/dot2k_templates/main.c index 2419a6f89cd8..4a05fef7f3c7 100644 --- a/tools/verification/dot2/dot2k_templates/main.c +++ b/tools/verification/dot2/dot2k_templates/main.c @@ -8,7 +8,7 @@ #include #include -#define MODULE_NAME "MODEL_NAME" +#define MODULE_NAME "%%MODEL_NAME%%" /* * XXX: include required tracepoint headers, e.g., @@ -20,15 +20,15 @@ * This is the self-generated part of the monitor. Generally, there is no need * to touch this section. */ -#include "MODEL_NAME.h" +#include "%%MODEL_NAME%%.h" /* * Declare the deterministic automata monitor. * * The rv monitor reference is needed for the monitor declaration. */ -static struct rv_monitor rv_MODEL_NAME; -DECLARE_DA_MON_MONITOR_TYPE(MODEL_NAME, MIN_TYPE); +static struct rv_monitor rv_%%MODEL_NAME%%; +DECLARE_DA_MON_%%MONITOR_TYPE%%(%%MODEL_NAME%%, %%MIN_TYPE%%); /* * This is the instrumentation part of the monitor. @@ -37,55 +37,55 @@ DECLARE_DA_MON_MONITOR_TYPE(MODEL_NAME, MIN_TYPE); * are translated into model's event. * */ -TRACEPOINT_HANDLERS_SKEL -static int enable_MODEL_NAME(void) +%%TRACEPOINT_HANDLERS_SKEL%% +static int enable_%%MODEL_NAME%%(void) { int retval; - retval = da_monitor_init_MODEL_NAME(); + retval = da_monitor_init_%%MODEL_NAME%%(); if (retval) return retval; -TRACEPOINT_ATTACH +%%TRACEPOINT_ATTACH%% return 0; } -static void disable_MODEL_NAME(void) +static void disable_%%MODEL_NAME%%(void) { - rv_MODEL_NAME.enabled = 0; + rv_%%MODEL_NAME%%.enabled = 0; -TRACEPOINT_DETACH +%%TRACEPOINT_DETACH%% - da_monitor_destroy_MODEL_NAME(); + da_monitor_destroy_%%MODEL_NAME%%(); } /* * This is the monitor register section. */ -static struct rv_monitor rv_MODEL_NAME = { - .name = "MODEL_NAME", - .description = "auto-generated MODEL_NAME", - .enable = enable_MODEL_NAME, - .disable = disable_MODEL_NAME, - .reset = da_monitor_reset_all_MODEL_NAME, +static struct rv_monitor rv_%%MODEL_NAME%% = { + .name = "%%MODEL_NAME%%", + .description = "auto-generated %%MODEL_NAME%%", + .enable = enable_%%MODEL_NAME%%, + .disable = disable_%%MODEL_NAME%%, + .reset = da_monitor_reset_all_%%MODEL_NAME%%, .enabled = 0, }; -static int __init register_MODEL_NAME(void) +static int __init register_%%MODEL_NAME%%(void) { - rv_register_monitor(&rv_MODEL_NAME); + rv_register_monitor(&rv_%%MODEL_NAME%%); return 0; } -static void __exit unregister_MODEL_NAME(void) +static void __exit unregister_%%MODEL_NAME%%(void) { - rv_unregister_monitor(&rv_MODEL_NAME); + rv_unregister_monitor(&rv_%%MODEL_NAME%%); } -module_init(register_MODEL_NAME); -module_exit(unregister_MODEL_NAME); +module_init(register_%%MODEL_NAME%%); +module_exit(unregister_%%MODEL_NAME%%); MODULE_LICENSE("GPL"); MODULE_AUTHOR("dot2k: auto-generated"); -MODULE_DESCRIPTION("MODEL_NAME"); +MODULE_DESCRIPTION("%%MODEL_NAME%%"); -- cgit v1.2.3 From 64b3e5f0d45329bc593e13b64dcdcf836da006cd Mon Sep 17 00:00:00 2001 From: Gabriele Monaco Date: Fri, 27 Dec 2024 15:47:48 +0100 Subject: verification/dot2k: Add support for name and description options The dot2k command includes options to set a model name with -n and a description with -D, however those are not used in practice. This patch allows to specify a custom model name (by default the name of the dot file without extension) and a description which overrides the one in the C file. Cc: Juri Lelli Cc: Thomas Gleixner Cc: John Kacur Link: https://lore.kernel.org/20241227144752.362911-5-gmonaco@redhat.com Signed-off-by: Gabriele Monaco Signed-off-by: Steven Rostedt (Google) --- tools/verification/dot2/automata.py | 4 ++-- tools/verification/dot2/dot2c.py | 4 ++-- tools/verification/dot2/dot2k | 6 +----- tools/verification/dot2/dot2k.py | 8 +++++--- tools/verification/dot2/dot2k_templates/main.c | 4 ++-- 5 files changed, 12 insertions(+), 14 deletions(-) (limited to 'tools') diff --git a/tools/verification/dot2/automata.py b/tools/verification/dot2/automata.py index bdeb98baa8b0..f6921cf3c914 100644 --- a/tools/verification/dot2/automata.py +++ b/tools/verification/dot2/automata.py @@ -19,9 +19,9 @@ class Automata: invalid_state_str = "INVALID_STATE" - def __init__(self, file_path): + def __init__(self, file_path, model_name=None): self.__dot_path = file_path - self.name = self.__get_model_name() + self.name = model_name or self.__get_model_name() self.__dot_lines = self.__open_dot() self.states, self.initial_state, self.final_states = self.__get_state_variables() self.events = self.__get_event_variables() diff --git a/tools/verification/dot2/dot2c.py b/tools/verification/dot2/dot2c.py index 87d8a1e1470c..fa2816ac7b61 100644 --- a/tools/verification/dot2/dot2c.py +++ b/tools/verification/dot2/dot2c.py @@ -22,8 +22,8 @@ class Dot2c(Automata): struct_automaton_def = "automaton" var_automaton_def = "aut" - def __init__(self, file_path): - super().__init__(file_path) + def __init__(self, file_path, model_name=None): + super().__init__(file_path, model_name) self.line_length = 100 def __buff_to_string(self, buff): diff --git a/tools/verification/dot2/dot2k b/tools/verification/dot2/dot2k index d4d7e52d549e..827b62b8d5e1 100644 --- a/tools/verification/dot2/dot2k +++ b/tools/verification/dot2/dot2k @@ -25,16 +25,12 @@ if __name__ == '__main__': print("Opening and parsing the dot file %s" % params.dot_file) try: - monitor=dot2k(params.dot_file, params.monitor_type) + monitor=dot2k(params.dot_file, params.monitor_type, vars(params)) except Exception as e: print('Error: '+ str(e)) print("Sorry : :-(") sys.exit(1) - # easier than using argparse action. - if params.model_name != None: - print(params.model_name) - print("Writing the monitor into the directory %s" % monitor.name) monitor.print_files() print("Almost done, checklist") diff --git a/tools/verification/dot2/dot2k.py b/tools/verification/dot2/dot2k.py index c88b3c011706..d48ad86a035a 100644 --- a/tools/verification/dot2/dot2k.py +++ b/tools/verification/dot2/dot2k.py @@ -17,17 +17,18 @@ class dot2k(Dot2c): monitor_templates_dir = "dot2/dot2k_templates/" monitor_type = "per_cpu" - def __init__(self, file_path, MonitorType): - super().__init__(file_path) + def __init__(self, file_path, MonitorType, extra_params={}): + super().__init__(file_path, extra_params.get("model_name")) self.monitor_type = self.monitor_types.get(MonitorType) if self.monitor_type is None: - raise Exception("Unknown monitor type: %s" % MonitorType) + raise ValueError("Unknown monitor type: %s" % MonitorType) self.monitor_type = MonitorType self.__fill_rv_templates_dir() self.main_c = self.__open_file(self.monitor_templates_dir + "main.c") self.enum_suffix = "_%s" % self.name + self.description = extra_params.get("description", self.name) or "auto-generated" def __fill_rv_templates_dir(self): @@ -114,6 +115,7 @@ class dot2k(Dot2c): main_c = main_c.replace("%%TRACEPOINT_HANDLERS_SKEL%%", tracepoint_handlers) main_c = main_c.replace("%%TRACEPOINT_ATTACH%%", tracepoint_attach) main_c = main_c.replace("%%TRACEPOINT_DETACH%%", tracepoint_detach) + main_c = main_c.replace("%%DESCRIPTION%%", self.description) return main_c diff --git a/tools/verification/dot2/dot2k_templates/main.c b/tools/verification/dot2/dot2k_templates/main.c index 4a05fef7f3c7..704617168578 100644 --- a/tools/verification/dot2/dot2k_templates/main.c +++ b/tools/verification/dot2/dot2k_templates/main.c @@ -65,7 +65,7 @@ static void disable_%%MODEL_NAME%%(void) */ static struct rv_monitor rv_%%MODEL_NAME%% = { .name = "%%MODEL_NAME%%", - .description = "auto-generated %%MODEL_NAME%%", + .description = "%%DESCRIPTION%%", .enable = enable_%%MODEL_NAME%%, .disable = disable_%%MODEL_NAME%%, .reset = da_monitor_reset_all_%%MODEL_NAME%%, @@ -88,4 +88,4 @@ module_exit(unregister_%%MODEL_NAME%%); MODULE_LICENSE("GPL"); MODULE_AUTHOR("dot2k: auto-generated"); -MODULE_DESCRIPTION("%%MODEL_NAME%%"); +MODULE_DESCRIPTION("%%MODEL_NAME%%: %%DESCRIPTION%%"); -- cgit v1.2.3 From 9c6cfe80980056042f1f80d65c74806021708989 Mon Sep 17 00:00:00 2001 From: Gabriele Monaco Date: Fri, 27 Dec 2024 15:47:50 +0100 Subject: verification/dot2k: Simplify manual steps in monitor creation This patch reduces and simplifies the manual steps still needed in creating a new RV monitor. It extends the dot2k script to create a tracepoint snippet and a Kconfig file for the newly generated monitor. Those files can be kept in the monitor's directory but shall be included in the main tracepoint header and Kconfig. Together with the checklist, dot2k now suggests the lines to add to those files for inclusion and the Makefile line to compile the new monitor: Writing the monitor into the directory monitor_name Almost done, checklist - Edit the monitor_name/monitor_name.c to add the instrumentation - Edit kernel/trace/rv/rv_trace.h: Add this line where other tracepoints are included and DA_MON_EVENTS_ID is defined: #include - Edit kernel/trace/rv/Makefile: Add this line where other monitors are included: obj-$(CONFIG_RV_MON_MONITOR_NAME) += monitors/monitor_name/monitor_name.o - Edit kernel/trace/rv/Kconfig: Add this line where other monitors are included: source "kernel/trace/rv/monitors/monitor_name/Kconfig" - Move monitor_name/ to the kernel's monitor directory (kernel/trace/rv/monitors) Cc: Juri Lelli Cc: Thomas Gleixner Cc: John Kacur Link: https://lore.kernel.org/20241227144752.362911-7-gmonaco@redhat.com Signed-off-by: Gabriele Monaco Signed-off-by: Steven Rostedt (Google) --- tools/verification/dot2/dot2k | 8 +-- tools/verification/dot2/dot2k.py | 86 +++++++++++++++++++++++++ tools/verification/dot2/dot2k_templates/Kconfig | 6 ++ tools/verification/dot2/dot2k_templates/main.c | 2 +- tools/verification/dot2/dot2k_templates/trace.h | 13 ++++ 5 files changed, 110 insertions(+), 5 deletions(-) create mode 100644 tools/verification/dot2/dot2k_templates/Kconfig create mode 100644 tools/verification/dot2/dot2k_templates/trace.h (limited to 'tools') diff --git a/tools/verification/dot2/dot2k b/tools/verification/dot2/dot2k index 827b62b8d5e1..190c974edd0a 100644 --- a/tools/verification/dot2/dot2k +++ b/tools/verification/dot2/dot2k @@ -35,7 +35,7 @@ if __name__ == '__main__': monitor.print_files() print("Almost done, checklist") print(" - Edit the %s/%s.c to add the instrumentation" % (monitor.name, monitor.name)) - print(" - Edit include/trace/events/rv.h to add the tracepoint entry") - print(" - Move it to the kernel's monitor directory") - print(" - Edit kernel/trace/rv/Makefile") - print(" - Edit kernel/trace/rv/Kconfig") + print(monitor.fill_tracepoint_tooltip()) + print(monitor.fill_makefile_tooltip()) + print(monitor.fill_kconfig_tooltip()) + print(" - Move %s/ to the kernel's monitor directory (%s/monitors)" % (monitor.name, monitor.rv_dir)) diff --git a/tools/verification/dot2/dot2k.py b/tools/verification/dot2/dot2k.py index d48ad86a035a..dc56cd1fb0b4 100644 --- a/tools/verification/dot2/dot2k.py +++ b/tools/verification/dot2/dot2k.py @@ -15,6 +15,7 @@ import os class dot2k(Dot2c): monitor_types = { "global" : 1, "per_cpu" : 2, "per_task" : 3 } monitor_templates_dir = "dot2/dot2k_templates/" + rv_dir = "kernel/trace/rv" monitor_type = "per_cpu" def __init__(self, file_path, MonitorType, extra_params={}): @@ -27,6 +28,8 @@ class dot2k(Dot2c): self.monitor_type = MonitorType self.__fill_rv_templates_dir() self.main_c = self.__open_file(self.monitor_templates_dir + "main.c") + self.trace_h = self.__open_file(self.monitor_templates_dir + "trace.h") + self.kconfig = self.__open_file(self.monitor_templates_dir + "Kconfig") self.enum_suffix = "_%s" % self.name self.description = extra_params.get("description", self.name) or "auto-generated" @@ -144,6 +147,82 @@ class dot2k(Dot2c): return self.__buff_to_string(buff) + def fill_monitor_class_type(self): + if self.monitor_type == "per_task": + return "DA_MON_EVENTS_ID" + return "DA_MON_EVENTS_IMPLICIT" + + def fill_monitor_class(self): + if self.monitor_type == "per_task": + return "da_monitor_id" + return "da_monitor" + + def fill_tracepoint_args_skel(self, tp_type): + buff = [] + tp_args_event = [ + ("char *", "state"), + ("char *", "event"), + ("char *", "next_state"), + ("bool ", "final_state"), + ] + tp_args_error = [ + ("char *", "state"), + ("char *", "event"), + ] + tp_args_id = ("int ", "id") + tp_args = tp_args_event if tp_type == "event" else tp_args_error + if self.monitor_type == "per_task": + tp_args.insert(0, tp_args_id) + tp_proto_c = ", ".join([a+b for a,b in tp_args]) + tp_args_c = ", ".join([b for a,b in tp_args]) + buff.append(" TP_PROTO(%s)," % tp_proto_c) + buff.append(" TP_ARGS(%s)" % tp_args_c) + return self.__buff_to_string(buff) + + def fill_trace_h(self): + trace_h = self.trace_h + monitor_class = self.fill_monitor_class() + monitor_class_type = self.fill_monitor_class_type() + tracepoint_args_skel_event = self.fill_tracepoint_args_skel("event") + tracepoint_args_skel_error = self.fill_tracepoint_args_skel("error") + trace_h = trace_h.replace("%%MODEL_NAME%%", self.name) + trace_h = trace_h.replace("%%MODEL_NAME_UP%%", self.name.upper()) + trace_h = trace_h.replace("%%MONITOR_CLASS%%", monitor_class) + trace_h = trace_h.replace("%%MONITOR_CLASS_TYPE%%", monitor_class_type) + trace_h = trace_h.replace("%%TRACEPOINT_ARGS_SKEL_EVENT%%", tracepoint_args_skel_event) + trace_h = trace_h.replace("%%TRACEPOINT_ARGS_SKEL_ERROR%%", tracepoint_args_skel_error) + return trace_h + + def fill_kconfig(self): + kconfig = self.kconfig + monitor_class_type = self.fill_monitor_class_type() + kconfig = kconfig.replace("%%MODEL_NAME%%", self.name) + kconfig = kconfig.replace("%%MODEL_NAME_UP%%", self.name.upper()) + kconfig = kconfig.replace("%%MONITOR_CLASS_TYPE%%", monitor_class_type) + kconfig = kconfig.replace("%%DESCRIPTION%%", self.description) + return kconfig + + def fill_tracepoint_tooltip(self): + monitor_class_type = self.fill_monitor_class_type() + return """ - Edit %s/rv_trace.h: +Add this line where other tracepoints are included and %s is defined: +#include +""" % (self.rv_dir, monitor_class_type, self.name, self.name) + + def fill_kconfig_tooltip(self): + return """ - Edit %s/Kconfig: +Add this line where other monitors are included: +source \"kernel/trace/rv/monitors/%s/Kconfig\" +""" % (self.rv_dir, self.name) + + def fill_makefile_tooltip(self): + name = self.name + name_up = name.upper() + return """ - Edit %s/Makefile: +Add this line where other monitors are included: +obj-$(CONFIG_RV_MON_%s) += monitors/%s/%s.o +""" % (self.rv_dir, name_up, name, name) + def __create_directory(self): try: os.mkdir(self.name) @@ -182,3 +261,10 @@ class dot2k(Dot2c): path = "%s.h" % self.name self.__create_file(path, model_h) + + trace_h = self.fill_trace_h() + path = "%s_trace.h" % self.name + self.__create_file(path, trace_h) + + kconfig = self.fill_kconfig() + self.__create_file("Kconfig", kconfig) diff --git a/tools/verification/dot2/dot2k_templates/Kconfig b/tools/verification/dot2/dot2k_templates/Kconfig new file mode 100644 index 000000000000..90cdc1e9379e --- /dev/null +++ b/tools/verification/dot2/dot2k_templates/Kconfig @@ -0,0 +1,6 @@ +config RV_MON_%%MODEL_NAME_UP%% + depends on RV + select %%MONITOR_CLASS_TYPE%% + bool "%%MODEL_NAME%% monitor" + help + %%DESCRIPTION%% diff --git a/tools/verification/dot2/dot2k_templates/main.c b/tools/verification/dot2/dot2k_templates/main.c index 704617168578..9605ca994416 100644 --- a/tools/verification/dot2/dot2k_templates/main.c +++ b/tools/verification/dot2/dot2k_templates/main.c @@ -14,7 +14,7 @@ * XXX: include required tracepoint headers, e.g., * #include */ -#include +#include /* * This is the self-generated part of the monitor. Generally, there is no need diff --git a/tools/verification/dot2/dot2k_templates/trace.h b/tools/verification/dot2/dot2k_templates/trace.h new file mode 100644 index 000000000000..87d3a1308926 --- /dev/null +++ b/tools/verification/dot2/dot2k_templates/trace.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * Snippet to be included in rv_trace.h + */ + +#ifdef CONFIG_RV_MON_%%MODEL_NAME_UP%% +DEFINE_EVENT(event_%%MONITOR_CLASS%%, event_%%MODEL_NAME%%, +%%TRACEPOINT_ARGS_SKEL_EVENT%%); + +DEFINE_EVENT(error_%%MONITOR_CLASS%%, error_%%MODEL_NAME%%, +%%TRACEPOINT_ARGS_SKEL_ERROR%%); +#endif /* CONFIG_RV_MON_%%MODEL_NAME_UP%% */ -- cgit v1.2.3 From de6f45c2dd226269fe9886290a139533c817c5bc Mon Sep 17 00:00:00 2001 From: Gabriele Monaco Date: Fri, 27 Dec 2024 15:47:51 +0100 Subject: verification/dot2k: Auto patch current kernel source dot2k suggests a list of changes to the kernel tree while adding a monitor: edit tracepoints header, Makefile, Kconfig and moving the monitor folder. Those changes can be easily run automatically. Add a flag to dot2k to alter the kernel source. The kernel source directory can be either assumed from the PWD, or from the running kernel, if installed. This feature works best if the kernel tree is a git repository, so that its easier to make sure there are no unintended changes. The main RV files (e.g. Makefile) have now a comment placeholder that can be useful for manual editing (e.g. to know where to add new monitors) and it is used by the script to append the required lines. We also slightly adapt the file handling functions in dot2k: __open_file is now called __read_file and also closes the file before returning the content; __create_file is now a more general __write_file, we no longer return on FileExistsError (not thrown while opening), a new __create_file simply calls __write_file specifying the monitor folder in the path. Cc: Juri Lelli Cc: Thomas Gleixner Cc: John Kacur Link: https://lore.kernel.org/20241227144752.362911-8-gmonaco@redhat.com Signed-off-by: Gabriele Monaco Signed-off-by: Steven Rostedt (Google) --- tools/verification/dot2/dot2k | 5 ++- tools/verification/dot2/dot2k.py | 92 ++++++++++++++++++++++++++++++++++------ 2 files changed, 82 insertions(+), 15 deletions(-) (limited to 'tools') diff --git a/tools/verification/dot2/dot2k b/tools/verification/dot2/dot2k index 190c974edd0a..559ba191a1f6 100644 --- a/tools/verification/dot2/dot2k +++ b/tools/verification/dot2/dot2k @@ -21,6 +21,9 @@ if __name__ == '__main__': parser.add_argument('-t', "--monitor_type", dest="monitor_type", required=True) parser.add_argument('-n', "--model_name", dest="model_name", required=False) parser.add_argument("-D", "--description", dest="description", required=False) + parser.add_argument("-a", "--auto_patch", dest="auto_patch", + action="store_true", required=False, + help="Patch the kernel in place") params = parser.parse_args() print("Opening and parsing the dot file %s" % params.dot_file) @@ -38,4 +41,4 @@ if __name__ == '__main__': print(monitor.fill_tracepoint_tooltip()) print(monitor.fill_makefile_tooltip()) print(monitor.fill_kconfig_tooltip()) - print(" - Move %s/ to the kernel's monitor directory (%s/monitors)" % (monitor.name, monitor.rv_dir)) + print(monitor.fill_monitor_tooltip()) diff --git a/tools/verification/dot2/dot2k.py b/tools/verification/dot2/dot2k.py index dc56cd1fb0b4..83f4d49853a2 100644 --- a/tools/verification/dot2/dot2k.py +++ b/tools/verification/dot2/dot2k.py @@ -27,11 +27,14 @@ class dot2k(Dot2c): self.monitor_type = MonitorType self.__fill_rv_templates_dir() - self.main_c = self.__open_file(self.monitor_templates_dir + "main.c") - self.trace_h = self.__open_file(self.monitor_templates_dir + "trace.h") - self.kconfig = self.__open_file(self.monitor_templates_dir + "Kconfig") + self.main_c = self.__read_file(self.monitor_templates_dir + "main.c") + self.trace_h = self.__read_file(self.monitor_templates_dir + "trace.h") + self.kconfig = self.__read_file(self.monitor_templates_dir + "Kconfig") self.enum_suffix = "_%s" % self.name self.description = extra_params.get("description", self.name) or "auto-generated" + self.auto_patch = extra_params.get("auto_patch") + if self.auto_patch: + self.__fill_rv_kernel_dir() def __fill_rv_templates_dir(self): @@ -39,7 +42,7 @@ class dot2k(Dot2c): return if platform.system() != "Linux": - raise Exception("I can only run on Linux.") + raise OSError("I can only run on Linux.") kernel_path = "/lib/modules/%s/build/tools/verification/dot2/dot2k_templates/" % (platform.release()) @@ -51,17 +54,43 @@ class dot2k(Dot2c): self.monitor_templates_dir = "/usr/share/dot2/dot2k_templates/" return - raise Exception("Could not find the template directory, do you have the kernel source installed?") + raise FileNotFoundError("Could not find the template directory, do you have the kernel source installed?") + def __fill_rv_kernel_dir(self): - def __open_file(self, path): + # first try if we are running in the kernel tree root + if os.path.exists(self.rv_dir): + return + + # offset if we are running inside the kernel tree from verification/dot2 + kernel_path = os.path.join("../..", self.rv_dir) + + if os.path.exists(kernel_path): + self.rv_dir = kernel_path + return + + if platform.system() != "Linux": + raise OSError("I can only run on Linux.") + + kernel_path = os.path.join("/lib/modules/%s/build" % platform.release(), self.rv_dir) + + # if the current kernel is from a distro this may not be a full kernel tree + # verify that one of the files we are going to modify is available + if os.path.exists(os.path.join(kernel_path, "rv_trace.h")): + self.rv_dir = kernel_path + return + + raise FileNotFoundError("Could not find the rv directory, do you have the kernel source installed?") + + def __read_file(self, path): try: - fd = open(path) + fd = open(path, 'r') except OSError: raise Exception("Cannot open the file: %s" % path) content = fd.read() + fd.close() return content def __buff_to_string(self, buff): @@ -202,14 +231,32 @@ class dot2k(Dot2c): kconfig = kconfig.replace("%%DESCRIPTION%%", self.description) return kconfig + def __patch_file(self, file, marker, line): + file_to_patch = os.path.join(self.rv_dir, file) + content = self.__read_file(file_to_patch) + content = content.replace(marker, line + "\n" + marker) + self.__write_file(file_to_patch, content) + def fill_tracepoint_tooltip(self): monitor_class_type = self.fill_monitor_class_type() + if self.auto_patch: + self.__patch_file("rv_trace.h", + "// Add new monitors based on CONFIG_%s here" % monitor_class_type, + "#include " % (self.name, self.name)) + return " - Patching %s/rv_trace.h, double check the result" % self.rv_dir + return """ - Edit %s/rv_trace.h: Add this line where other tracepoints are included and %s is defined: #include """ % (self.rv_dir, monitor_class_type, self.name, self.name) def fill_kconfig_tooltip(self): + if self.auto_patch: + self.__patch_file("Kconfig", + "# Add new monitors here", + "source \"kernel/trace/rv/monitors/%s/Kconfig\"" % (self.name)) + return " - Patching %s/Kconfig, double check the result" % self.rv_dir + return """ - Edit %s/Kconfig: Add this line where other monitors are included: source \"kernel/trace/rv/monitors/%s/Kconfig\" @@ -218,32 +265,49 @@ source \"kernel/trace/rv/monitors/%s/Kconfig\" def fill_makefile_tooltip(self): name = self.name name_up = name.upper() + if self.auto_patch: + self.__patch_file("Makefile", + "# Add new monitors here", + "obj-$(CONFIG_RV_MON_%s) += monitors/%s/%s.o" % (name_up, name, name)) + return " - Patching %s/Makefile, double check the result" % self.rv_dir + return """ - Edit %s/Makefile: Add this line where other monitors are included: obj-$(CONFIG_RV_MON_%s) += monitors/%s/%s.o """ % (self.rv_dir, name_up, name, name) + def fill_monitor_tooltip(self): + if self.auto_patch: + return " - Monitor created in %s/monitors/%s" % (self.rv_dir, self. name) + return " - Move %s/ to the kernel's monitor directory (%s/monitors)" % (self.name, self.rv_dir) + def __create_directory(self): + path = self.name + if self.auto_patch: + path = os.path.join(self.rv_dir, "monitors", path) try: - os.mkdir(self.name) + os.mkdir(path) except FileExistsError: return except: print("Fail creating the output dir: %s" % self.name) - def __create_file(self, file_name, content): - path = "%s/%s" % (self.name, file_name) + def __write_file(self, file_name, content): try: - file = open(path, 'w') - except FileExistsError: - return + file = open(file_name, 'w') except: - print("Fail creating file: %s" % path) + print("Fail writing to file: %s" % file_name) file.write(content) file.close() + def __create_file(self, file_name, content): + path = "%s/%s" % (self.name, file_name) + if self.auto_patch: + path = os.path.join(self.rv_dir, "monitors", path) + self.__write_file(path, content) + def __get_main_name(self): path = "%s/%s" % (self.name, "main.c") if not os.path.exists(path): -- cgit v1.2.3 From 87c5d7f5e5938f713bde4e7435e6b207372a7f8e Mon Sep 17 00:00:00 2001 From: Gabriele Monaco Date: Fri, 27 Dec 2024 15:47:52 +0100 Subject: verification/dot2k: Implement event type detection Currently dot2k treats all events equally and registers them with a general da_handle_event. This is however just part of the work because some events are necessary to understand when the monitor is entering the initial state. Specifically, the da_handle_start_event takes care of setting the monitor in the initial state and da_handle_start_run_event also registers the current event in the newly enabled monitor. da_handle_start_event can be used on events that only lead to the initial state (as it is currently done in the example monitors), while da_handle_start_run_event could be used on events that are only valid from the initial one. Failing to set at least one of those functions to handle events makes the monitor useless, since it will never be activated. This patch adapts dot2k to parse the events that surely lead to the initial state and set da_handle_start_event for those, if no such event is found but some events are only valid in the initial event, we instead set da_handle_start_run_event (it isn't necessary to set both). We still add a comment to warn the user to make sure this change is matching the model definition. Cc: Juri Lelli Cc: Thomas Gleixner Cc: John Kacur Link: https://lore.kernel.org/20241227144752.362911-9-gmonaco@redhat.com Signed-off-by: Gabriele Monaco Signed-off-by: Steven Rostedt (Google) --- tools/verification/dot2/automata.py | 32 ++++++++++++++++++++++++++++++++ tools/verification/dot2/dot2k.py | 11 +++++++++-- 2 files changed, 41 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/verification/dot2/automata.py b/tools/verification/dot2/automata.py index f6921cf3c914..d9a3fe2b74bf 100644 --- a/tools/verification/dot2/automata.py +++ b/tools/verification/dot2/automata.py @@ -26,6 +26,7 @@ class Automata: self.states, self.initial_state, self.final_states = self.__get_state_variables() self.events = self.__get_event_variables() self.function = self.__create_matrix() + self.events_start, self.events_start_run = self.__store_init_events() def __get_model_name(self): basename = ntpath.basename(self.__dot_path) @@ -172,3 +173,34 @@ class Automata: cursor += 1 return matrix + + def __store_init_events(self): + events_start = [False] * len(self.events) + events_start_run = [False] * len(self.events) + for i, _ in enumerate(self.events): + curr_event_will_init = 0 + curr_event_from_init = False + curr_event_used = 0 + for j, _ in enumerate(self.states): + if self.function[j][i] != self.invalid_state_str: + curr_event_used += 1 + if self.function[j][i] == self.initial_state: + curr_event_will_init += 1 + if self.function[0][i] != self.invalid_state_str: + curr_event_from_init = True + # this event always leads to init + if curr_event_will_init and curr_event_used == curr_event_will_init: + events_start[i] = True + # this event is only called from init + if curr_event_from_init and curr_event_used == 1: + events_start_run[i] = True + return events_start, events_start_run + + def is_start_event(self, event): + return self.events_start[self.events.index(event)] + + def is_start_run_event(self, event): + # prefer handle_start_event if there + if any(self.events_start): + return False + return self.events_start_run[self.events.index(event)] diff --git a/tools/verification/dot2/dot2k.py b/tools/verification/dot2/dot2k.py index 83f4d49853a2..7547eb290b7d 100644 --- a/tools/verification/dot2/dot2k.py +++ b/tools/verification/dot2/dot2k.py @@ -110,11 +110,18 @@ class dot2k(Dot2c): for event in self.events: buff.append("static void handle_%s(void *data, /* XXX: fill header */)" % event) buff.append("{") + handle = "handle_event" + if self.is_start_event(event): + buff.append("\t/* XXX: validate that this event always leads to the initial state */") + handle = "handle_start_event" + elif self.is_start_run_event(event): + buff.append("\t/* XXX: validate that this event is only valid in the initial state */") + handle = "handle_start_run_event" if self.monitor_type == "per_task": buff.append("\tstruct task_struct *p = /* XXX: how do I get p? */;"); - buff.append("\tda_handle_event_%s(p, %s%s);" % (self.name, event, self.enum_suffix)); + buff.append("\tda_%s_%s(p, %s%s);" % (handle, self.name, event, self.enum_suffix)); else: - buff.append("\tda_handle_event_%s(%s%s);" % (self.name, event, self.enum_suffix)); + buff.append("\tda_%s_%s(%s%s);" % (handle, self.name, event, self.enum_suffix)); buff.append("}") buff.append("") return self.__buff_to_string(buff) -- cgit v1.2.3 From 31ad36a271290648e7c2288a03d7b933d20254d6 Mon Sep 17 00:00:00 2001 From: chenchangcheng Date: Fri, 20 Dec 2024 15:48:47 +0800 Subject: objtool: Add bch2_trans_unlocked_error() to bcachefs noreturns Fix the following objtool warning during build time: fs/bcachefs/btree_trans_commit.o: warning: objtool: bch2_trans_commit_write_locked.isra.0() falls through to next function do_bch2_trans_commit.isra.0() fs/bcachefs/btree_trans_commit.o: warning: objtool: .text: unexpected end of section ...... fs/bcachefs/btree_update.o: warning: objtool: bch2_trans_update_get_key_cache() falls through to next function flush_new_cached_update() fs/bcachefs/btree_update.o: warning: objtool: flush_new_cached_update() falls through to next function bch2_trans_update_by_path() bch2_trans_unlocked_error() is an Obviously Correct (tm) panic() wrapper, add it to the list of known noreturns. [ mingo: Improved the changelog ] Fixes: fd104e2967b7 ("bcachefs: bch2_trans_verify_not_unlocked()") Signed-off-by: chenchangcheng Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lkml.kernel.org/r/20241220074847.3418134-1-ccc194101@163.com --- tools/objtool/noreturns.h | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/objtool/noreturns.h b/tools/objtool/noreturns.h index f37614cc2c1b..b2174894f9f7 100644 --- a/tools/objtool/noreturns.h +++ b/tools/objtool/noreturns.h @@ -19,6 +19,7 @@ NORETURN(__x64_sys_exit_group) NORETURN(arch_cpu_idle_dead) NORETURN(bch2_trans_in_restart_error) NORETURN(bch2_trans_restart_error) +NORETURN(bch2_trans_unlocked_error) NORETURN(cpu_bringup_and_idle) NORETURN(cpu_startup_entry) NORETURN(do_exit) -- cgit v1.2.3 From cc57f6cbef65c796a5661decaeffe3f5de397d19 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Thu, 17 Oct 2024 09:45:41 +0200 Subject: KVM: riscv: selftests: Add SBI SUSP to get-reg-list test KVM supports SBI SUSP, so add it to the get-reg-list test. Signed-off-by: Andrew Jones Reviewed-by: Anup Patel Link: https://lore.kernel.org/r/20241017074538.18867-6-ajones@ventanamicro.com Signed-off-by: Anup Patel --- tools/testing/selftests/kvm/riscv/get-reg-list.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c index 4bc1051848e5..ea4f31660bc7 100644 --- a/tools/testing/selftests/kvm/riscv/get-reg-list.c +++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c @@ -112,6 +112,7 @@ bool filter_reg(__u64 reg) case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_HSM: case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_PMU: case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_DBCN: + case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_SUSP: case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_STA: case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_EXPERIMENTAL: case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_VENDOR: @@ -535,10 +536,11 @@ static const char *sbi_ext_single_id_to_str(__u64 reg_off) KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_SRST), KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_HSM), KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_PMU), + KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_DBCN), + KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_SUSP), KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_STA), KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_EXPERIMENTAL), KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_VENDOR), - KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_DBCN), }; if (reg_off >= ARRAY_SIZE(kvm_sbi_ext_reg_name)) @@ -949,6 +951,7 @@ KVM_SBI_EXT_SUBLIST_CONFIG(base, BASE); KVM_SBI_EXT_SUBLIST_CONFIG(sta, STA); KVM_SBI_EXT_SIMPLE_CONFIG(pmu, PMU); KVM_SBI_EXT_SIMPLE_CONFIG(dbcn, DBCN); +KVM_SBI_EXT_SIMPLE_CONFIG(susp, SUSP); KVM_ISA_EXT_SUBLIST_CONFIG(aia, AIA); KVM_ISA_EXT_SUBLIST_CONFIG(fp_f, FP_F); @@ -1017,6 +1020,7 @@ struct vcpu_reg_list *vcpu_configs[] = { &config_sbi_sta, &config_sbi_pmu, &config_sbi_dbcn, + &config_sbi_susp, &config_aia, &config_fp_f, &config_fp_d, -- cgit v1.2.3 From 144dfe4017bfe13cc2d459c2c4a7a4dc832c100c Mon Sep 17 00:00:00 2001 From: Quan Zhou Date: Mon, 2 Dec 2024 11:22:12 +0800 Subject: KVM: riscv: selftests: Add Svvptc/Zabha/Ziccrse exts to get-reg-list test The KVM RISC-V allows Svvptc/Zabha/Ziccrse extensions for Guest/VM so add them to get-reg-list test. Signed-off-by: Quan Zhou Reviewed-by: Andrew Jones Link: https://lore.kernel.org/r/35163f0443993a942e0a021c6006bc5d2f0f5d5f.1732854096.git.zhouquan@iscas.ac.cn Signed-off-by: Anup Patel --- tools/testing/selftests/kvm/riscv/get-reg-list.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c index ea4f31660bc7..8515921dfdbf 100644 --- a/tools/testing/selftests/kvm/riscv/get-reg-list.c +++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c @@ -52,6 +52,8 @@ bool filter_reg(__u64 reg) case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVINVAL: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVNAPOT: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVPBMT: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVVPTC: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZABHA: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZACAS: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZAWRS: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBA: @@ -71,6 +73,7 @@ bool filter_reg(__u64 reg) case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFHMIN: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOM: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOZ: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICCRSE: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICNTR: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICOND: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICSR: @@ -430,6 +433,8 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off) KVM_ISA_EXT_ARR(SVINVAL), KVM_ISA_EXT_ARR(SVNAPOT), KVM_ISA_EXT_ARR(SVPBMT), + KVM_ISA_EXT_ARR(SVVPTC), + KVM_ISA_EXT_ARR(ZABHA), KVM_ISA_EXT_ARR(ZACAS), KVM_ISA_EXT_ARR(ZAWRS), KVM_ISA_EXT_ARR(ZBA), @@ -449,6 +454,7 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off) KVM_ISA_EXT_ARR(ZFHMIN), KVM_ISA_EXT_ARR(ZICBOM), KVM_ISA_EXT_ARR(ZICBOZ), + KVM_ISA_EXT_ARR(ZICCRSE), KVM_ISA_EXT_ARR(ZICNTR), KVM_ISA_EXT_ARR(ZICOND), KVM_ISA_EXT_ARR(ZICSR), @@ -967,6 +973,8 @@ KVM_ISA_EXT_SIMPLE_CONFIG(svadu, SVADU); KVM_ISA_EXT_SIMPLE_CONFIG(svinval, SVINVAL); KVM_ISA_EXT_SIMPLE_CONFIG(svnapot, SVNAPOT); KVM_ISA_EXT_SIMPLE_CONFIG(svpbmt, SVPBMT); +KVM_ISA_EXT_SIMPLE_CONFIG(svvptc, SVVPTC); +KVM_ISA_EXT_SIMPLE_CONFIG(zabha, ZABHA); KVM_ISA_EXT_SIMPLE_CONFIG(zacas, ZACAS); KVM_ISA_EXT_SIMPLE_CONFIG(zawrs, ZAWRS); KVM_ISA_EXT_SIMPLE_CONFIG(zba, ZBA); @@ -986,6 +994,7 @@ KVM_ISA_EXT_SIMPLE_CONFIG(zfh, ZFH); KVM_ISA_EXT_SIMPLE_CONFIG(zfhmin, ZFHMIN); KVM_ISA_EXT_SUBLIST_CONFIG(zicbom, ZICBOM); KVM_ISA_EXT_SUBLIST_CONFIG(zicboz, ZICBOZ); +KVM_ISA_EXT_SIMPLE_CONFIG(ziccrse, ZICCRSE); KVM_ISA_EXT_SIMPLE_CONFIG(zicntr, ZICNTR); KVM_ISA_EXT_SIMPLE_CONFIG(zicond, ZICOND); KVM_ISA_EXT_SIMPLE_CONFIG(zicsr, ZICSR); @@ -1035,6 +1044,8 @@ struct vcpu_reg_list *vcpu_configs[] = { &config_svinval, &config_svnapot, &config_svpbmt, + &config_svvptc, + &config_zabha, &config_zacas, &config_zawrs, &config_zba, @@ -1054,6 +1065,7 @@ struct vcpu_reg_list *vcpu_configs[] = { &config_zfhmin, &config_zicbom, &config_zicboz, + &config_ziccrse, &config_zicntr, &config_zicond, &config_zicsr, -- cgit v1.2.3 From 1846dd8e3a3e28f58e72cadbf4d81f374e63a085 Mon Sep 17 00:00:00 2001 From: Daniel Xu Date: Mon, 30 Dec 2024 14:31:22 -0700 Subject: libbpf: Set MFD_NOEXEC_SEAL when creating memfd Starting from 105ff5339f49 ("mm/memfd: add MFD_NOEXEC_SEAL and MFD_EXEC") and until 1717449b4417 ("memfd: drop warning for missing exec-related flags"), the kernel would print a warning if neither MFD_NOEXEC_SEAL nor MFD_EXEC is set in memfd_create(). If libbpf runs on on a kernel between these two commits (eg. on an improperly backported system), it'll trigger this warning. To avoid this warning (and also be more secure), explicitly set MFD_NOEXEC_SEAL. But since libbpf can be run on potentially very old kernels, leave a fallback for kernels without MFD_NOEXEC_SEAL support. Signed-off-by: Daniel Xu Link: https://lore.kernel.org/r/6e62c2421ad7eb1da49cbf16da95aaaa7f94d394.1735594195.git.dxu@dxuuu.xyz Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/libbpf.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 66173ddb5a2d..46492cc0927d 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1731,12 +1731,24 @@ static int sys_memfd_create(const char *name, unsigned flags) #ifndef MFD_CLOEXEC #define MFD_CLOEXEC 0x0001U #endif +#ifndef MFD_NOEXEC_SEAL +#define MFD_NOEXEC_SEAL 0x0008U +#endif static int create_placeholder_fd(void) { + unsigned int flags = MFD_CLOEXEC | MFD_NOEXEC_SEAL; + const char *name = "libbpf-placeholder-fd"; int fd; - fd = ensure_good_fd(sys_memfd_create("libbpf-placeholder-fd", MFD_CLOEXEC)); + fd = ensure_good_fd(sys_memfd_create(name, flags)); + if (fd >= 0) + return fd; + else if (errno != EINVAL) + return -errno; + + /* Possibly running on kernel without MFD_NOEXEC_SEAL */ + fd = ensure_good_fd(sys_memfd_create(name, flags & ~MFD_NOEXEC_SEAL)); if (fd < 0) return -errno; return fd; -- cgit v1.2.3 From 75137d9ebe9e75358e859fda37fa1ca9f05c1a59 Mon Sep 17 00:00:00 2001 From: Matan Shachnai Date: Tue, 17 Dec 2024 22:23:35 -0500 Subject: selftests/bpf: Add testcases for BPF_MUL The previous commit improves precision of BPF_MUL. Add tests to exercise updated BPF_MUL. Signed-off-by: Matan Shachnai Link: https://lore.kernel.org/r/20241218032337.12214-3-m.shachnai@gmail.com Signed-off-by: Alexei Starovoitov --- .../testing/selftests/bpf/progs/verifier_bounds.c | 134 +++++++++++++++++++++ 1 file changed, 134 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds.c b/tools/testing/selftests/bpf/progs/verifier_bounds.c index a0bb7fb40ea5..0eb33bb801b5 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bounds.c +++ b/tools/testing/selftests/bpf/progs/verifier_bounds.c @@ -1200,4 +1200,138 @@ l0_%=: r0 = 0; \ : __clobber_all); } +SEC("tc") +__description("multiply mixed sign bounds. test 1") +__success __log_level(2) +__msg("r6 *= r7 {{.*}}; R6_w=scalar(smin=umin=0x1bc16d5cd4927ee1,smax=umax=0x1bc16d674ec80000,smax32=0x7ffffeff,umax32=0xfffffeff,var_off=(0x1bc16d4000000000; 0x3ffffffeff))") +__naked void mult_mixed0_sign(void) +{ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "r6 = r0;" + "call %[bpf_get_prandom_u32];" + "r7 = r0;" + "r6 &= 0xf;" + "r6 -= 1000000000;" + "r7 &= 0xf;" + "r7 -= 2000000000;" + "r6 *= r7;" + "exit" + : + : __imm(bpf_get_prandom_u32), + __imm(bpf_skb_store_bytes) + : __clobber_all); +} + +SEC("tc") +__description("multiply mixed sign bounds. test 2") +__success __log_level(2) +__msg("r6 *= r7 {{.*}}; R6_w=scalar(smin=smin32=-100,smax=smax32=200)") +__naked void mult_mixed1_sign(void) +{ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "r6 = r0;" + "call %[bpf_get_prandom_u32];" + "r7 = r0;" + "r6 &= 0xf;" + "r6 -= 0xa;" + "r7 &= 0xf;" + "r7 -= 0x14;" + "r6 *= r7;" + "exit" + : + : __imm(bpf_get_prandom_u32), + __imm(bpf_skb_store_bytes) + : __clobber_all); +} + +SEC("tc") +__description("multiply negative bounds") +__success __log_level(2) +__msg("r6 *= r7 {{.*}}; R6_w=scalar(smin=umin=smin32=umin32=0x3ff280b0,smax=umax=smax32=umax32=0x3fff0001,var_off=(0x3ff00000; 0xf81ff))") +__naked void mult_sign_bounds(void) +{ + asm volatile ( + "r8 = 0x7fff;" + "call %[bpf_get_prandom_u32];" + "r6 = r0;" + "call %[bpf_get_prandom_u32];" + "r7 = r0;" + "r6 &= 0xa;" + "r6 -= r8;" + "r7 &= 0xf;" + "r7 -= r8;" + "r6 *= r7;" + "exit" + : + : __imm(bpf_get_prandom_u32), + __imm(bpf_skb_store_bytes) + : __clobber_all); +} + +SEC("tc") +__description("multiply bounds that don't cross signed boundary") +__success __log_level(2) +__msg("r8 *= r6 {{.*}}; R6_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=11,var_off=(0x0; 0xb)) R8_w=scalar(smin=0,smax=umax=0x7b96bb0a94a3a7cd,var_off=(0x0; 0x7fffffffffffffff))") +__naked void mult_no_sign_crossing(void) +{ + asm volatile ( + "r6 = 0xb;" + "r8 = 0xb3c3f8c99262687 ll;" + "call %[bpf_get_prandom_u32];" + "r7 = r0;" + "r6 &= r7;" + "r8 *= r6;" + "exit" + : + : __imm(bpf_get_prandom_u32), + __imm(bpf_skb_store_bytes) + : __clobber_all); +} + +SEC("tc") +__description("multiplication overflow, result in unbounded reg. test 1") +__success __log_level(2) +__msg("r6 *= r7 {{.*}}; R6_w=scalar()") +__naked void mult_unsign_ovf(void) +{ + asm volatile ( + "r8 = 0x7ffffffffff ll;" + "call %[bpf_get_prandom_u32];" + "r6 = r0;" + "call %[bpf_get_prandom_u32];" + "r7 = r0;" + "r6 &= 0x7fffffff;" + "r7 &= r8;" + "r6 *= r7;" + "exit" + : + : __imm(bpf_get_prandom_u32), + __imm(bpf_skb_store_bytes) + : __clobber_all); +} + +SEC("tc") +__description("multiplication overflow, result in unbounded reg. test 2") +__success __log_level(2) +__msg("r6 *= r7 {{.*}}; R6_w=scalar()") +__naked void mult_sign_ovf(void) +{ + asm volatile ( + "r8 = 0x7ffffffff ll;" + "call %[bpf_get_prandom_u32];" + "r6 = r0;" + "call %[bpf_get_prandom_u32];" + "r7 = r0;" + "r6 &= 0xa;" + "r6 -= r8;" + "r7 &= 0x7fffffff;" + "r6 *= r7;" + "exit" + : + : __imm(bpf_get_prandom_u32), + __imm(bpf_skb_store_bytes) + : __clobber_all); +} char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From 9468f39ba478d001f2603ce5bf0e1ab4b97452b8 Mon Sep 17 00:00:00 2001 From: Mahe Tardy Date: Fri, 20 Dec 2024 15:22:18 +0000 Subject: selftests/bpf: fix veristat comp mode with new stats Commit 82c1f13de315 ("selftests/bpf: Add more stats into veristat") introduced new stats, added by default in the CSV output, that were not added to parse_stat_value, used in parse_stats_csv which is used in comparison mode. Thus it broke comparison mode altogether making it fail with "Unrecognized stat #7" and EINVAL. One quirk is that PROG_TYPE and ATTACH_TYPE have been transformed to strings using libbpf_bpf_prog_type_str and libbpf_bpf_attach_type_str respectively. Since we might not want to compare those string values, we just skip the parsing in this patch. We might want to translate it back to the enum value or compare the string value directly. Fixes: 82c1f13de315 ("selftests/bpf: Add more stats into veristat") Signed-off-by: Mahe Tardy Tested-by: Mykyta Yatsenko Link: https://lore.kernel.org/r/20241220152218.28405-1-mahe.tardy@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/veristat.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c index 9d17b4dfc170..476bf95cf684 100644 --- a/tools/testing/selftests/bpf/veristat.c +++ b/tools/testing/selftests/bpf/veristat.c @@ -1672,7 +1672,10 @@ static int parse_stat_value(const char *str, enum stat_id id, struct verif_stats case TOTAL_STATES: case PEAK_STATES: case MAX_STATES_PER_INSN: - case MARK_READ_MAX_LEN: { + case MARK_READ_MAX_LEN: + case SIZE: + case JITED_SIZE: + case STACK: { long val; int err, n; @@ -1685,6 +1688,9 @@ static int parse_stat_value(const char *str, enum stat_id id, struct verif_stats st->stats[id] = val; break; } + case PROG_TYPE: + case ATTACH_TYPE: + break; default: fprintf(stderr, "Unrecognized stat #%d\n", id); return -EINVAL; -- cgit v1.2.3 From ea0916e01d0b0f2cce1369ac1494239a79827270 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Thu, 28 Nov 2024 15:06:18 +0000 Subject: selftests/memfd: add test for mapping write-sealed memfd read-only Now we have reinstated the ability to map F_SEAL_WRITE mappings read-only, assert that we are able to do this in a test to ensure that we do not regress this again. Link: https://lkml.kernel.org/r/a6377ec470b14c0539b4600cf8fa24bf2e4858ae.1732804776.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Cc: Jann Horn Cc: Julian Orth Cc: Liam R. Howlett Cc: Linus Torvalds Cc: Shuah Khan Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- tools/testing/selftests/memfd/memfd_test.c | 43 ++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c index 0a0b55516028..c0c53451a16d 100644 --- a/tools/testing/selftests/memfd/memfd_test.c +++ b/tools/testing/selftests/memfd/memfd_test.c @@ -282,6 +282,24 @@ static void *mfd_assert_mmap_shared(int fd) return p; } +static void *mfd_assert_mmap_read_shared(int fd) +{ + void *p; + + p = mmap(NULL, + mfd_def_size, + PROT_READ, + MAP_SHARED, + fd, + 0); + if (p == MAP_FAILED) { + printf("mmap() failed: %m\n"); + abort(); + } + + return p; +} + static void *mfd_assert_mmap_private(int fd) { void *p; @@ -980,6 +998,30 @@ static void test_seal_future_write(void) close(fd); } +static void test_seal_write_map_read_shared(void) +{ + int fd; + void *p; + + printf("%s SEAL-WRITE-MAP-READ\n", memfd_str); + + fd = mfd_assert_new("kern_memfd_seal_write_map_read", + mfd_def_size, + MFD_CLOEXEC | MFD_ALLOW_SEALING); + + mfd_assert_add_seals(fd, F_SEAL_WRITE); + mfd_assert_has_seals(fd, F_SEAL_WRITE); + + p = mfd_assert_mmap_read_shared(fd); + + mfd_assert_read(fd); + mfd_assert_read_shared(fd); + mfd_fail_write(fd); + + munmap(p, mfd_def_size); + close(fd); +} + /* * Test SEAL_SHRINK * Test whether SEAL_SHRINK actually prevents shrinking @@ -1593,6 +1635,7 @@ int main(int argc, char **argv) test_seal_write(); test_seal_future_write(); + test_seal_write_map_read_shared(); test_seal_shrink(); test_seal_grow(); test_seal_resize(); -- cgit v1.2.3 From 2f84d072bdcb7d6ec66cc4d0de9f37a3dc394cd2 Mon Sep 17 00:00:00 2001 From: Alejandro Lucero Date: Tue, 3 Dec 2024 16:21:12 +0000 Subject: cxl/pci: Add CXL Type 1/2 support to cxl_dvsec_rr_decode() In cxl_dvsec_rr_decode() the pci driver expects to retrieve a cxlds, struct cxl_dev_state, from the driver_data field of struct device. While that works for Type 3, drivers for Type 1/2 devices may not put a cxlds in the driver_data field. In preparation for supporting Type 1/2 devices, replace parameter 'struct device' with 'struct cxl_dev_state' in cxl_dvsec_rr_decode(). Remove the unused parameter 'cxl_port' in cxl_dvsec_rr_decode(). Signed-off-by: Alejandro Lucero Reviewed-by: Jonathan Cameron Reviewed-by: Alison Schofield Link: https://patch.msgid.link/20241203162112.5088-1-alucerop@amd.com Signed-off-by: Dave Jiang --- tools/testing/cxl/test/mock.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c index 450c7566c33f..af2594e4f35d 100644 --- a/tools/testing/cxl/test/mock.c +++ b/tools/testing/cxl/test/mock.c @@ -228,16 +228,16 @@ int __wrap_cxl_hdm_decode_init(struct cxl_dev_state *cxlds, } EXPORT_SYMBOL_NS_GPL(__wrap_cxl_hdm_decode_init, "CXL"); -int __wrap_cxl_dvsec_rr_decode(struct device *dev, struct cxl_port *port, +int __wrap_cxl_dvsec_rr_decode(struct cxl_dev_state *cxlds, struct cxl_endpoint_dvsec_info *info) { int rc = 0, index; struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); - if (ops && ops->is_mock_dev(dev)) + if (ops && ops->is_mock_dev(cxlds->dev)) rc = 0; else - rc = cxl_dvsec_rr_decode(dev, port, info); + rc = cxl_dvsec_rr_decode(cxlds, info); put_cxl_mock_ops(index); return rc; -- cgit v1.2.3 From f1e8bf56320a7fb32095b6c51b707459361b403b Mon Sep 17 00:00:00 2001 From: Zijun Hu Date: Tue, 24 Dec 2024 21:05:03 +0800 Subject: driver core: Constify API device_find_child() and adapt for various usages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Constify the following API: struct device *device_find_child(struct device *dev, void *data, int (*match)(struct device *dev, void *data)); To : struct device *device_find_child(struct device *dev, const void *data, device_match_t match); typedef int (*device_match_t)(struct device *dev, const void *data); with the following reasons: - Protect caller's match data @*data which is for comparison and lookup and the API does not actually need to modify @*data. - Make the API's parameters (@match)() and @data have the same type as all of other device finding APIs (bus|class|driver)_find_device(). - All kinds of existing device match functions can be directly taken as the API's argument, they were exported by driver core. Constify the API and adapt for various existing usages. BTW, various subsystem changes are squashed into this commit to meet 'git bisect' requirement, and this commit has the minimal and simplest changes to complement squashing shortcoming, and that may bring extra code improvement. Reviewed-by: Alison Schofield Reviewed-by: Takashi Sakamoto Acked-by: Uwe Kleine-König # for drivers/pwm Signed-off-by: Zijun Hu Reviewed-by: Jonathan Cameron Reviewed-by: Mathieu Poirier Link: https://lore.kernel.org/r/20241224-const_dfc_done-v5-4-6623037414d4@quicinc.com Signed-off-by: Greg Kroah-Hartman --- tools/testing/cxl/test/cxl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index d0337c11f9ee..cc8948f49117 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -725,7 +725,7 @@ static void default_mock_decoder(struct cxl_decoder *cxld) cxld->reset = mock_decoder_reset; } -static int first_decoder(struct device *dev, void *data) +static int first_decoder(struct device *dev, const void *data) { struct cxl_decoder *cxld; -- cgit v1.2.3 From 991c8aacfb6e3088027b8c776ad31d2c093905b8 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Wed, 20 Nov 2024 08:26:15 -0800 Subject: tools/power/x86/intel-speed-select: Fix TRL restore after SST-TF disable When SST-TF is disabled, the TRL (Turbo Ratio Limit) of config level 0 is getting restored. But the TRL of current level should be restored which may not be config level 0. This is caused by a bug in treating config level as TRL level. So arguments needs to be swapped. Signed-off-by: Srinivas Pandruvada --- tools/power/x86/intel-speed-select/isst-core-tpmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/power/x86/intel-speed-select/isst-core-tpmi.c b/tools/power/x86/intel-speed-select/isst-core-tpmi.c index 32ea70c7dbd8..da53aaa27fc9 100644 --- a/tools/power/x86/intel-speed-select/isst-core-tpmi.c +++ b/tools/power/x86/intel-speed-select/isst-core-tpmi.c @@ -329,7 +329,7 @@ static int tpmi_get_get_trls(struct isst_id *id, int config_index, return 0; } -static int tpmi_get_get_trl(struct isst_id *id, int level, int config_index, +static int tpmi_get_get_trl(struct isst_id *id, int config_index, int level, int *trl) { struct isst_pkg_ctdp_level_info ctdp_level; -- cgit v1.2.3 From 600c8f24319cebe671a70722df99b8006daebe21 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Wed, 20 Nov 2024 08:35:40 -0800 Subject: tools/power/x86/intel-speed-select: v1.21 release This version has one fix: - Fix restoring TRL after SST-TF disable Signed-off-by: Srinivas Pandruvada --- tools/power/x86/intel-speed-select/isst-config.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c index 5127be34869e..fadfb02b8611 100644 --- a/tools/power/x86/intel-speed-select/isst-config.c +++ b/tools/power/x86/intel-speed-select/isst-config.c @@ -16,7 +16,7 @@ struct process_cmd_struct { int arg; }; -static const char *version_str = "v1.20"; +static const char *version_str = "v1.21"; static const int supported_api_ver = 3; static struct isst_if_platform_info isst_platform_info; -- cgit v1.2.3 From 15858da53542360931a457f32bcdc4287d13731f Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Thu, 2 Jan 2025 09:22:57 +0100 Subject: selftests: coredump: Add stackdump test Add a test which checks that the kstkesp field in /proc/pid/stat can be read for all threads of a coredumping process. For full details including the motivation for this test and how it works, see the README file added by this commit. Reviewed-by: John Ogness Signed-off-by: Nam Cao Link: https://lore.kernel.org/r/50e737b6576208566d14efcf1934fe840de6b1f4.1735805772.git.namcao@linutronix.de Signed-off-by: Christian Brauner --- tools/testing/selftests/coredump/Makefile | 7 + tools/testing/selftests/coredump/README.rst | 50 +++++++ tools/testing/selftests/coredump/stackdump | 14 ++ tools/testing/selftests/coredump/stackdump_test.c | 151 ++++++++++++++++++++++ 4 files changed, 222 insertions(+) create mode 100644 tools/testing/selftests/coredump/Makefile create mode 100644 tools/testing/selftests/coredump/README.rst create mode 100755 tools/testing/selftests/coredump/stackdump create mode 100644 tools/testing/selftests/coredump/stackdump_test.c (limited to 'tools') diff --git a/tools/testing/selftests/coredump/Makefile b/tools/testing/selftests/coredump/Makefile new file mode 100644 index 000000000000..ed210037b29d --- /dev/null +++ b/tools/testing/selftests/coredump/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0-only +CFLAGS = $(KHDR_INCLUDES) + +TEST_GEN_PROGS := stackdump_test +TEST_FILES := stackdump + +include ../lib.mk diff --git a/tools/testing/selftests/coredump/README.rst b/tools/testing/selftests/coredump/README.rst new file mode 100644 index 000000000000..164a7aa181c8 --- /dev/null +++ b/tools/testing/selftests/coredump/README.rst @@ -0,0 +1,50 @@ +coredump selftest +================= + +Background context +------------------ + +`coredump` is a feature which dumps a process's memory space when the process terminates +unexpectedly (e.g. due to segmentation fault), which can be useful for debugging. By default, +`coredump` dumps the memory to the file named `core`, but this behavior can be changed by writing a +different file name to `/proc/sys/kernel/core_pattern`. Furthermore, `coredump` can be piped to a +user-space program by writing the pipe symbol (`|`) followed by the command to be executed to +`/proc/sys/kernel/core_pattern`. For the full description, see `man 5 core`. + +The piped user program may be interested in reading the stack pointers of the crashed process. The +crashed process's stack pointers can be read from `procfs`: it is the `kstkesp` field in +`/proc/$PID/stat`. See `man 5 proc` for all the details. + +The problem +----------- +While a thread is active, the stack pointer is unsafe to read and therefore the `kstkesp` field +reads zero. But when the thread is dead (e.g. during a coredump), this field should have valid +value. + +However, this was broken in the past and `kstkesp` was zero even during coredump: + +* commit 0a1eb2d474ed ("fs/proc: Stop reporting eip and esp in /proc/PID/stat") changed kstkesp to + always be zero + +* commit fd7d56270b52 ("fs/proc: Report eip/esp in /prod/PID/stat for coredumping") fixed it for the + coredumping thread. However, other threads in a coredumping process still had the problem. + +* commit cb8f381f1613 ("fs/proc/array.c: allow reporting eip/esp for all coredumping threads") fixed + for all threads in a coredumping process. + +* commit 92307383082d ("coredump: Don't perform any cleanups before dumping core") broke it again + for the other threads in a coredumping process. + +The problem has been fixed now, but considering the history, it may appear again in the future. + +The goal of this test +--------------------- +This test detects problem with reading `kstkesp` during coredump by doing the following: + +#. Tell the kernel to execute the "stackdump" script when a coredump happens. This script + reads the stack pointers of all threads of crashed processes. + +#. Spawn a child process who creates some threads and then crashes. + +#. Read the output from the "stackdump" script, and make sure all stack pointer values are + non-zero. diff --git a/tools/testing/selftests/coredump/stackdump b/tools/testing/selftests/coredump/stackdump new file mode 100755 index 000000000000..96714ce42d12 --- /dev/null +++ b/tools/testing/selftests/coredump/stackdump @@ -0,0 +1,14 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +CRASH_PROGRAM_ID=$1 +STACKDUMP_FILE=$2 + +TMP=$(mktemp) + +for t in /proc/$CRASH_PROGRAM_ID/task/*; do + tid=$(basename $t) + cat /proc/$tid/stat | awk '{print $29}' >> $TMP +done + +mv $TMP $STACKDUMP_FILE diff --git a/tools/testing/selftests/coredump/stackdump_test.c b/tools/testing/selftests/coredump/stackdump_test.c new file mode 100644 index 000000000000..137b2364a082 --- /dev/null +++ b/tools/testing/selftests/coredump/stackdump_test.c @@ -0,0 +1,151 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include +#include +#include +#include + +#include "../kselftest_harness.h" + +#define STACKDUMP_FILE "stack_values" +#define STACKDUMP_SCRIPT "stackdump" +#define NUM_THREAD_SPAWN 128 + +static void *do_nothing(void *) +{ + while (1) + pause(); +} + +static void crashing_child(void) +{ + pthread_t thread; + int i; + + for (i = 0; i < NUM_THREAD_SPAWN; ++i) + pthread_create(&thread, NULL, do_nothing, NULL); + + /* crash on purpose */ + i = *(int *)NULL; +} + +FIXTURE(coredump) +{ + char original_core_pattern[256]; +}; + +FIXTURE_SETUP(coredump) +{ + char buf[PATH_MAX]; + FILE *file; + char *dir; + int ret; + + file = fopen("/proc/sys/kernel/core_pattern", "r"); + ASSERT_NE(NULL, file); + + ret = fread(self->original_core_pattern, 1, sizeof(self->original_core_pattern), file); + ASSERT_TRUE(ret || feof(file)); + ASSERT_LT(ret, sizeof(self->original_core_pattern)); + + self->original_core_pattern[ret] = '\0'; + + ret = fclose(file); + ASSERT_EQ(0, ret); +} + +FIXTURE_TEARDOWN(coredump) +{ + const char *reason; + FILE *file; + int ret; + + unlink(STACKDUMP_FILE); + + file = fopen("/proc/sys/kernel/core_pattern", "w"); + if (!file) { + reason = "Unable to open core_pattern"; + goto fail; + } + + ret = fprintf(file, "%s", self->original_core_pattern); + if (ret < 0) { + reason = "Unable to write to core_pattern"; + goto fail; + } + + ret = fclose(file); + if (ret) { + reason = "Unable to close core_pattern"; + goto fail; + } + + return; +fail: + /* This should never happen */ + fprintf(stderr, "Failed to cleanup stackdump test: %s\n", reason); +} + +TEST_F(coredump, stackdump) +{ + struct sigaction action = {}; + unsigned long long stack; + char *test_dir, *line; + size_t line_length; + char buf[PATH_MAX]; + int ret, i; + FILE *file; + pid_t pid; + + /* + * Step 1: Setup core_pattern so that the stackdump script is executed when the child + * process crashes + */ + ret = readlink("/proc/self/exe", buf, sizeof(buf)); + ASSERT_NE(-1, ret); + ASSERT_LT(ret, sizeof(buf)); + buf[ret] = '\0'; + + test_dir = dirname(buf); + + file = fopen("/proc/sys/kernel/core_pattern", "w"); + ASSERT_NE(NULL, file); + + ret = fprintf(file, "|%1$s/%2$s %%P %1$s/%3$s", test_dir, STACKDUMP_SCRIPT, STACKDUMP_FILE); + ASSERT_LT(0, ret); + + ret = fclose(file); + ASSERT_EQ(0, ret); + + /* Step 2: Create a process who spawns some threads then crashes */ + pid = fork(); + ASSERT_TRUE(pid >= 0); + if (pid == 0) + crashing_child(); + + /* + * Step 3: Wait for the stackdump script to write the stack pointers to the stackdump file + */ + for (i = 0; i < 10; ++i) { + file = fopen(STACKDUMP_FILE, "r"); + if (file) + break; + sleep(1); + } + ASSERT_NE(file, NULL); + + /* Step 4: Make sure all stack pointer values are non-zero */ + for (i = 0; -1 != getline(&line, &line_length, file); ++i) { + stack = strtoull(line, NULL, 10); + ASSERT_NE(stack, 0); + } + + ASSERT_EQ(i, 1 + NUM_THREAD_SPAWN); + + fclose(file); +} + +TEST_HARNESS_MAIN -- cgit v1.2.3 From e95274dfe86490ec2a5633035c24b2de6722841f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 3 Jan 2025 10:24:58 -0800 Subject: selftests: tc-testing: reduce rshift value After previous change rshift >= 32 is no longer allowed. Modify the test to use 31, the test doesn't seem to send any traffic so the exact value shouldn't matter. Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250103182458.1213486-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/tc-testing/tc-tests/filters/flow.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/flow.json b/tools/testing/selftests/tc-testing/tc-tests/filters/flow.json index 996448afe31b..91d120548bf5 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/flow.json +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/flow.json @@ -78,10 +78,10 @@ "setup": [ "$TC qdisc add dev $DEV1 ingress" ], - "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key dst rshift 0xff", + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key dst rshift 0x1f", "expExitCode": "0", "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 1 flow", - "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 map keys dst rshift 255 baseclass", + "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 map keys dst rshift 31 baseclass", "matchCount": "1", "teardown": [ "$TC qdisc del dev $DEV1 ingress" -- cgit v1.2.3 From b9ed315d3c4c0c294a4348edb6874d489bac47fa Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 21 Dec 2024 00:46:56 +0100 Subject: netkit: Allow for configuring needed_{head,tail}room Allow the user to configure needed_{head,tail}room for both netkit devices. The idea is similar to 163e529200af ("veth: implement ndo_set_rx_headroom") with the difference that the two parameters can be specified upon device creation. By default the current behavior stays as is which is needed_{head,tail}room is 0. In case of Cilium, for example, the netkit devices are not enslaved into a bridge or openvswitch device (rather, BPF-based redirection is used out of tcx), and as such these parameters are not propagated into the Pod's netns via peer device. Given Cilium can run in vxlan/geneve tunneling mode (needed_headroom) and/or be used in combination with WireGuard (needed_{head,tail}room), allow the Cilium CNI plugin to specify these two upon netkit device creation. Signed-off-by: Daniel Borkmann Reviewed-by: Jakub Kicinski Acked-by: Nikolay Aleksandrov Link: https://lore.kernel.org/bpf/20241220234658.490686-1-daniel@iogearbox.net --- tools/include/uapi/linux/if_link.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h index 8516c1ccd57a..7e46ca4cd31b 100644 --- a/tools/include/uapi/linux/if_link.h +++ b/tools/include/uapi/linux/if_link.h @@ -1315,6 +1315,8 @@ enum { IFLA_NETKIT_MODE, IFLA_NETKIT_SCRUB, IFLA_NETKIT_PEER_SCRUB, + IFLA_NETKIT_HEADROOM, + IFLA_NETKIT_TAILROOM, __IFLA_NETKIT_MAX, }; #define IFLA_NETKIT_MAX (__IFLA_NETKIT_MAX - 1) -- cgit v1.2.3 From 058268e23fcadc2bdb9297c6dff3a010c70f9762 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 21 Dec 2024 00:46:58 +0100 Subject: selftests/bpf: Extend netkit tests to validate set {head,tail}room Extend the netkit selftests to specify and validate the {head,tail}room on the netdevice: # ./vmtest.sh -- ./test_progs -t netkit [...] ./test_progs -t netkit [ 1.174147] bpf_testmod: loading out-of-tree module taints kernel. [ 1.174585] bpf_testmod: module verification failed: signature and/or required key missing - tainting kernel [ 1.422307] tsc: Refined TSC clocksource calibration: 3407.983 MHz [ 1.424511] clocksource: tsc: mask: 0xffffffffffffffff max_cycles: 0x311fc3e5084, max_idle_ns: 440795359833 ns [ 1.428092] clocksource: Switched to clocksource tsc #363 tc_netkit_basic:OK #364 tc_netkit_device:OK #365 tc_netkit_multi_links:OK #366 tc_netkit_multi_opts:OK #367 tc_netkit_neigh_links:OK #368 tc_netkit_pkt_type:OK #369 tc_netkit_scrub:OK Summary: 7/0 PASSED, 0 SKIPPED, 0 FAILED Signed-off-by: Daniel Borkmann Acked-by: Nikolay Aleksandrov Link: https://lore.kernel.org/bpf/20241220234658.490686-3-daniel@iogearbox.net --- tools/testing/selftests/bpf/prog_tests/tc_netkit.c | 49 ++++++++++++++-------- tools/testing/selftests/bpf/progs/test_tc_link.c | 15 +++++++ 2 files changed, 46 insertions(+), 18 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/tc_netkit.c b/tools/testing/selftests/bpf/prog_tests/tc_netkit.c index 151a4210028f..2461d183dee5 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_netkit.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_netkit.c @@ -14,10 +14,16 @@ #include "netlink_helpers.h" #include "tc_helpers.h" +#define NETKIT_HEADROOM 32 +#define NETKIT_TAILROOM 8 + #define MARK 42 #define PRIO 0xeb9f #define ICMP_ECHO 8 +#define FLAG_ADJUST_ROOM (1 << 0) +#define FLAG_SAME_NETNS (1 << 1) + struct icmphdr { __u8 type; __u8 code; @@ -35,7 +41,7 @@ struct iplink_req { }; static int create_netkit(int mode, int policy, int peer_policy, int *ifindex, - bool same_netns, int scrub, int peer_scrub) + int scrub, int peer_scrub, __u32 flags) { struct rtnl_handle rth = { .fd = -1 }; struct iplink_req req = {}; @@ -63,6 +69,10 @@ static int create_netkit(int mode, int policy, int peer_policy, int *ifindex, addattr32(&req.n, sizeof(req), IFLA_NETKIT_SCRUB, scrub); addattr32(&req.n, sizeof(req), IFLA_NETKIT_PEER_SCRUB, peer_scrub); addattr32(&req.n, sizeof(req), IFLA_NETKIT_MODE, mode); + if (flags & FLAG_ADJUST_ROOM) { + addattr16(&req.n, sizeof(req), IFLA_NETKIT_HEADROOM, NETKIT_HEADROOM); + addattr16(&req.n, sizeof(req), IFLA_NETKIT_TAILROOM, NETKIT_TAILROOM); + } addattr_nest_end(&req.n, data); addattr_nest_end(&req.n, linkinfo); @@ -87,7 +97,7 @@ static int create_netkit(int mode, int policy, int peer_policy, int *ifindex, " addr ee:ff:bb:cc:aa:dd"), "set hwaddress"); } - if (same_netns) { + if (flags & FLAG_SAME_NETNS) { ASSERT_OK(system("ip link set dev " netkit_peer " up"), "up peer"); ASSERT_OK(system("ip addr add dev " netkit_peer " 10.0.0.2/24"), @@ -184,8 +194,8 @@ void serial_test_tc_netkit_basic(void) int err, ifindex; err = create_netkit(NETKIT_L2, NETKIT_PASS, NETKIT_PASS, - &ifindex, false, NETKIT_SCRUB_DEFAULT, - NETKIT_SCRUB_DEFAULT); + &ifindex, NETKIT_SCRUB_DEFAULT, + NETKIT_SCRUB_DEFAULT, 0); if (err) return; @@ -299,8 +309,8 @@ static void serial_test_tc_netkit_multi_links_target(int mode, int target) int err, ifindex; err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS, - &ifindex, false, NETKIT_SCRUB_DEFAULT, - NETKIT_SCRUB_DEFAULT); + &ifindex, NETKIT_SCRUB_DEFAULT, + NETKIT_SCRUB_DEFAULT, 0); if (err) return; @@ -428,8 +438,8 @@ static void serial_test_tc_netkit_multi_opts_target(int mode, int target) int err, ifindex; err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS, - &ifindex, false, NETKIT_SCRUB_DEFAULT, - NETKIT_SCRUB_DEFAULT); + &ifindex, NETKIT_SCRUB_DEFAULT, + NETKIT_SCRUB_DEFAULT, 0); if (err) return; @@ -543,8 +553,8 @@ void serial_test_tc_netkit_device(void) int err, ifindex, ifindex2; err = create_netkit(NETKIT_L3, NETKIT_PASS, NETKIT_PASS, - &ifindex, true, NETKIT_SCRUB_DEFAULT, - NETKIT_SCRUB_DEFAULT); + &ifindex, NETKIT_SCRUB_DEFAULT, + NETKIT_SCRUB_DEFAULT, FLAG_SAME_NETNS); if (err) return; @@ -655,8 +665,8 @@ static void serial_test_tc_netkit_neigh_links_target(int mode, int target) int err, ifindex; err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS, - &ifindex, false, NETKIT_SCRUB_DEFAULT, - NETKIT_SCRUB_DEFAULT); + &ifindex, NETKIT_SCRUB_DEFAULT, + NETKIT_SCRUB_DEFAULT, 0); if (err) return; @@ -733,8 +743,8 @@ static void serial_test_tc_netkit_pkt_type_mode(int mode) struct bpf_link *link; err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS, - &ifindex, true, NETKIT_SCRUB_DEFAULT, - NETKIT_SCRUB_DEFAULT); + &ifindex, NETKIT_SCRUB_DEFAULT, + NETKIT_SCRUB_DEFAULT, FLAG_SAME_NETNS); if (err) return; @@ -799,7 +809,7 @@ void serial_test_tc_netkit_pkt_type(void) serial_test_tc_netkit_pkt_type_mode(NETKIT_L3); } -static void serial_test_tc_netkit_scrub_type(int scrub) +static void serial_test_tc_netkit_scrub_type(int scrub, bool room) { LIBBPF_OPTS(bpf_netkit_opts, optl); struct test_tc_link *skel; @@ -807,7 +817,8 @@ static void serial_test_tc_netkit_scrub_type(int scrub) int err, ifindex; err = create_netkit(NETKIT_L2, NETKIT_PASS, NETKIT_PASS, - &ifindex, false, scrub, scrub); + &ifindex, scrub, scrub, + room ? FLAG_ADJUST_ROOM : 0); if (err) return; @@ -842,6 +853,8 @@ static void serial_test_tc_netkit_scrub_type(int scrub) ASSERT_EQ(skel->bss->seen_tc8, true, "seen_tc8"); ASSERT_EQ(skel->bss->mark, scrub == NETKIT_SCRUB_NONE ? MARK : 0, "mark"); ASSERT_EQ(skel->bss->prio, scrub == NETKIT_SCRUB_NONE ? PRIO : 0, "prio"); + ASSERT_EQ(skel->bss->headroom, room ? NETKIT_HEADROOM : 0, "headroom"); + ASSERT_EQ(skel->bss->tailroom, room ? NETKIT_TAILROOM : 0, "tailroom"); cleanup: test_tc_link__destroy(skel); @@ -852,6 +865,6 @@ cleanup: void serial_test_tc_netkit_scrub(void) { - serial_test_tc_netkit_scrub_type(NETKIT_SCRUB_DEFAULT); - serial_test_tc_netkit_scrub_type(NETKIT_SCRUB_NONE); + serial_test_tc_netkit_scrub_type(NETKIT_SCRUB_DEFAULT, false); + serial_test_tc_netkit_scrub_type(NETKIT_SCRUB_NONE, true); } diff --git a/tools/testing/selftests/bpf/progs/test_tc_link.c b/tools/testing/selftests/bpf/progs/test_tc_link.c index 10d825928499..630f12e51b07 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_link.c +++ b/tools/testing/selftests/bpf/progs/test_tc_link.c @@ -8,6 +8,7 @@ #include #include #include +#include char LICENSE[] SEC("license") = "GPL"; @@ -27,6 +28,7 @@ bool seen_host; bool seen_mcast; int mark, prio; +unsigned short headroom, tailroom; SEC("tc/ingress") int tc1(struct __sk_buff *skb) @@ -104,11 +106,24 @@ out: return TCX_PASS; } +struct sk_buff { + struct net_device *dev; +}; + +struct net_device { + unsigned short needed_headroom; + unsigned short needed_tailroom; +}; + SEC("tc/egress") int tc8(struct __sk_buff *skb) { + struct net_device *dev = BPF_CORE_READ((struct sk_buff *)skb, dev); + seen_tc8 = true; mark = skb->mark; prio = skb->priority; + headroom = BPF_CORE_READ(dev, needed_headroom); + tailroom = BPF_CORE_READ(dev, needed_tailroom); return TCX_PASS; } -- cgit v1.2.3 From 73b9075f334f5debf28646884a320b796b27768d Mon Sep 17 00:00:00 2001 From: Jiayuan Chen Date: Tue, 24 Dec 2024 15:59:57 +0800 Subject: selftests/bpf: Avoid generating untracked files when running bpf selftests Currently, when we run the BPF selftests with the following command: make -C tools/testing/selftests TARGETS=bpf SKIP_TARGETS="" The command generates untracked files and directories with make version less than 4.4: ''' Untracked files: (use "git add ..." to include in what will be committed) tools/testing/selftests/bpfFEATURE-DUMP.selftests tools/testing/selftests/bpffeature/ ''' We lost slash after word "bpf". The reason is slash appending code is as follow: ''' OUTPUT := $(OUTPUT)/ $(eval include ../../../build/Makefile.feature) OUTPUT := $(patsubst %/,%,$(OUTPUT)) ''' This way of assigning values to OUTPUT will never be effective for the variable OUTPUT provided via the command argument [1] and BPF makefile is called from parent Makfile(tools/testing/selftests/Makefile) like: ''' all: ... $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET ''' According to GNU make, we can use override Directive to fix this issue [2]. [1] https://www.gnu.org/software/make/manual/make.html#Overriding [2] https://www.gnu.org/software/make/manual/make.html#Override-Directive Fixes: dc3a8804d790 ("selftests/bpf: Adapt OUTPUT appending logic to lower versions of Make") Signed-off-by: Jiayuan Chen Signed-off-by: Daniel Borkmann Acked-by: Jiri Olsa Link: https://lore.kernel.org/bpf/20241224075957.288018-1-mrpre@163.com --- tools/testing/selftests/bpf/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 9e870e519c30..eb4d21651aa7 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -202,9 +202,9 @@ ifeq ($(shell expr $(MAKE_VERSION) \>= 4.4), 1) $(let OUTPUT,$(OUTPUT)/,\ $(eval include ../../../build/Makefile.feature)) else -OUTPUT := $(OUTPUT)/ +override OUTPUT := $(OUTPUT)/ $(eval include ../../../build/Makefile.feature) -OUTPUT := $(patsubst %/,%,$(OUTPUT)) +override OUTPUT := $(patsubst %/,%,$(OUTPUT)) endif endif -- cgit v1.2.3 From 87091dd986db51406e64dd5e8c9d22617c66c6af Mon Sep 17 00:00:00 2001 From: Emil Tsalapatis Date: Sat, 4 Jan 2025 15:25:28 -0500 Subject: selftests/bpf: test bpf_for within spin lock section Add a selftest to ensure BPF for loops within critical sections are accepted by the verifier. Signed-off-by: Emil Tsalapatis (Meta) Acked-by: Eduard Zingerman Link: https://lore.kernel.org/r/20250104202528.882482-3-emil@etsalapatis.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/progs/verifier_spin_lock.c | 26 ++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/verifier_spin_lock.c b/tools/testing/selftests/bpf/progs/verifier_spin_lock.c index 25599eac9a70..d9d7b05cf6d2 100644 --- a/tools/testing/selftests/bpf/progs/verifier_spin_lock.c +++ b/tools/testing/selftests/bpf/progs/verifier_spin_lock.c @@ -530,4 +530,30 @@ l1_%=: exit; \ : __clobber_all); } +SEC("tc") +__description("spin_lock: loop within a locked region") +__success __failure_unpriv __msg_unpriv("") +__retval(0) +int bpf_loop_inside_locked_region(void) +{ + const int zero = 0; + struct val *val; + int i, j = 0; + + val = bpf_map_lookup_elem(&map_spin_lock, &zero); + if (!val) + return -1; + + bpf_spin_lock(&val->l); + bpf_for(i, 0, 10) { + j++; + /* Silence "unused variable" warnings. */ + if (j == 10) + break; + } + bpf_spin_unlock(&val->l); + + return 0; +} + char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From f44275e7155dc310d36516fc25be503da099781c Mon Sep 17 00:00:00 2001 From: Ihor Solodrai Date: Mon, 6 Jan 2025 20:17:31 +0000 Subject: selftests/bpf: add -fno-strict-aliasing to BPF_CFLAGS Following the discussion at [1], set -fno-strict-aliasing flag for all BPF object build rules. Remove now unnecessary -CFLAGS variables. [1] https://lore.kernel.org/bpf/20250106185447.951609-1-ihor.solodrai@pm.me/ CC: Jose E. Marchesi Signed-off-by: Ihor Solodrai Acked-by: Eduard Zingerman Link: https://lore.kernel.org/r/20250106201728.1219791-1-ihor.solodrai@pm.me Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/Makefile | 28 +--------------------------- 1 file changed, 1 insertion(+), 27 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index eb4d21651aa7..d5be2f94deef 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -54,21 +54,6 @@ PCAP_LIBS := $(shell $(PKG_CONFIG) --libs libpcap 2>/dev/null) LDLIBS += $(PCAP_LIBS) CFLAGS += $(PCAP_CFLAGS) -# The following tests perform type punning and they may break strict -# aliasing rules, which are exploited by both GCC and clang by default -# while optimizing. This can lead to broken programs. -progs/bind4_prog.c-CFLAGS := -fno-strict-aliasing -progs/bind6_prog.c-CFLAGS := -fno-strict-aliasing -progs/dynptr_fail.c-CFLAGS := -fno-strict-aliasing -progs/linked_list_fail.c-CFLAGS := -fno-strict-aliasing -progs/map_kptr_fail.c-CFLAGS := -fno-strict-aliasing -progs/syscall.c-CFLAGS := -fno-strict-aliasing -progs/test_pkt_md_access.c-CFLAGS := -fno-strict-aliasing -progs/test_sk_lookup.c-CFLAGS := -fno-strict-aliasing -progs/timer_crash.c-CFLAGS := -fno-strict-aliasing -progs/test_global_func9.c-CFLAGS := -fno-strict-aliasing -progs/verifier_nocsr.c-CFLAGS := -fno-strict-aliasing - # Some utility functions use LLVM libraries jit_disasm_helpers.c-CFLAGS = $(LLVM_CFLAGS) @@ -103,18 +88,6 @@ progs/btf_dump_test_case_packing.c-bpf_gcc-CFLAGS := -Wno-error progs/btf_dump_test_case_padding.c-bpf_gcc-CFLAGS := -Wno-error progs/btf_dump_test_case_syntax.c-bpf_gcc-CFLAGS := -Wno-error -# The following tests do type-punning, via the __imm_insn macro, from -# `struct bpf_insn' to long and then uses the value. This triggers an -# "is used uninitialized" warning in GCC due to strict-aliasing -# rules. -progs/verifier_ref_tracking.c-bpf_gcc-CFLAGS := -fno-strict-aliasing -progs/verifier_unpriv.c-bpf_gcc-CFLAGS := -fno-strict-aliasing -progs/verifier_cgroup_storage.c-bpf_gcc-CFLAGS := -fno-strict-aliasing -progs/verifier_ld_ind.c-bpf_gcc-CFLAGS := -fno-strict-aliasing -progs/verifier_map_ret_val.c-bpf_gcc-CFLAGS := -fno-strict-aliasing -progs/verifier_spill_fill.c-bpf_gcc-CFLAGS := -fno-strict-aliasing -progs/verifier_subprog_precision.c-bpf_gcc-CFLAGS := -fno-strict-aliasing -progs/verifier_uninit.c-bpf_gcc-CFLAGS := -fno-strict-aliasing endif ifneq ($(CLANG_CPUV4),) @@ -474,6 +447,7 @@ CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH)) BPF_CFLAGS = -g -Wall -Werror -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \ -I$(INCLUDE_DIR) -I$(CURDIR) -I$(APIDIR) \ -I$(abspath $(OUTPUT)/../usr/include) \ + -fno-strict-aliasing \ -Wno-compare-distinct-pointer-types # TODO: enable me -Wsign-compare -- cgit v1.2.3 From 46c61cbeb82f8a4e6354a692d2be1a35cb0bde29 Mon Sep 17 00:00:00 2001 From: Mykyta Yatsenko Date: Mon, 6 Jan 2025 14:43:21 +0000 Subject: selftests/bpf: Handle prog/attach type comparison in veristat Implemented handling of prog type and attach type stats comparison in veristat. To test this change: ``` ./veristat pyperf600.bpf.o -o csv > base1.csv ./veristat pyperf600.bpf.o -o csv > base2.csv ./veristat -C base2.csv base1.csv -o csv ...,raw_tracepoint,raw_tracepoint,MATCH, ...,cgroup_inet_ingress,cgroup_inet_ingress,MATCH ``` Signed-off-by: Mykyta Yatsenko Signed-off-by: Andrii Nakryiko Tested-by: Eduard Zingerman Link: https://lore.kernel.org/bpf/20250106144321.32337-1-mykyta.yatsenko5@gmail.com --- tools/testing/selftests/bpf/veristat.c | 37 ++++++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c index 476bf95cf684..974c808f9321 100644 --- a/tools/testing/selftests/bpf/veristat.c +++ b/tools/testing/selftests/bpf/veristat.c @@ -1688,9 +1688,42 @@ static int parse_stat_value(const char *str, enum stat_id id, struct verif_stats st->stats[id] = val; break; } - case PROG_TYPE: - case ATTACH_TYPE: + case PROG_TYPE: { + enum bpf_prog_type prog_type = 0; + const char *type; + + while ((type = libbpf_bpf_prog_type_str(prog_type))) { + if (strcmp(type, str) == 0) { + st->stats[id] = prog_type; + break; + } + prog_type++; + } + + if (!type) { + fprintf(stderr, "Unrecognized prog type %s\n", str); + return -EINVAL; + } break; + } + case ATTACH_TYPE: { + enum bpf_attach_type attach_type = 0; + const char *type; + + while ((type = libbpf_bpf_attach_type_str(attach_type))) { + if (strcmp(type, str) == 0) { + st->stats[id] = attach_type; + break; + } + attach_type++; + } + + if (!type) { + fprintf(stderr, "Unrecognized attach type %s\n", str); + return -EINVAL; + } + break; + } default: fprintf(stderr, "Unrecognized stat #%d\n", id); return -EINVAL; -- cgit v1.2.3 From 912d6f6697251b0024e56ed24b7873b4800822e7 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 3 Jan 2025 06:31:14 -0500 Subject: selftests/net: packetdrill: report benign debug flakes as xfail A few recently added packetdrill tests that are known time sensitive (e.g., because testing timestamping) occasionally fail in debug mode: https://netdev.bots.linux.dev/contest.html?executor=vmksft-packetdrill-dbg These failures are well understood. Correctness of the tests is verified in non-debug mode. Continue running in debug mode also, to keep coverage with debug instrumentation. But, only in debug mode, mark these tests with well understood timing issues as XFAIL (known failing) rather than FAIL when failing. Introduce an allow list xfail_list with known cases. Expand the ktap infrastructure with XFAIL support. Fixes: eab35989cc37 ("selftests/net: packetdrill: import tcp/fast_recovery, tcp/nagle, tcp/timestamping") Reported-by: Jakub Kicinski Closes: https://lore.kernel.org/netdev/20241218100013.0c698629@kernel.org/ Signed-off-by: Willem de Bruijn Link: https://patch.msgid.link/20250103113142.129251-1-willemdebruijn.kernel@gmail.com Signed-off-by: Paolo Abeni --- tools/testing/selftests/kselftest/ktap_helpers.sh | 15 ++++++++++++-- .../selftests/net/packetdrill/ksft_runner.sh | 23 +++++++++++++++++----- 2 files changed, 31 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kselftest/ktap_helpers.sh b/tools/testing/selftests/kselftest/ktap_helpers.sh index 79a125eb24c2..05a461890671 100644 --- a/tools/testing/selftests/kselftest/ktap_helpers.sh +++ b/tools/testing/selftests/kselftest/ktap_helpers.sh @@ -7,6 +7,7 @@ KTAP_TESTNO=1 KTAP_CNT_PASS=0 KTAP_CNT_FAIL=0 +KTAP_CNT_XFAIL=0 KTAP_CNT_SKIP=0 KSFT_PASS=0 @@ -69,6 +70,16 @@ ktap_test_skip() { KTAP_CNT_SKIP=$((KTAP_CNT_SKIP+1)) } +ktap_test_xfail() { + description="$1" + + result="ok" + directive="XFAIL" + __ktap_test "$result" "$description" "$directive" + + KTAP_CNT_XFAIL=$((KTAP_CNT_XFAIL+1)) +} + ktap_test_fail() { description="$1" @@ -99,7 +110,7 @@ ktap_exit_fail_msg() { ktap_finished() { ktap_print_totals - if [ $((KTAP_CNT_PASS + KTAP_CNT_SKIP)) -eq "$KSFT_NUM_TESTS" ]; then + if [ $((KTAP_CNT_PASS + KTAP_CNT_SKIP + KTAP_CNT_XFAIL)) -eq "$KSFT_NUM_TESTS" ]; then exit "$KSFT_PASS" else exit "$KSFT_FAIL" @@ -107,5 +118,5 @@ ktap_finished() { } ktap_print_totals() { - echo "# Totals: pass:$KTAP_CNT_PASS fail:$KTAP_CNT_FAIL xfail:0 xpass:0 skip:$KTAP_CNT_SKIP error:0" + echo "# Totals: pass:$KTAP_CNT_PASS fail:$KTAP_CNT_FAIL xfail:$KTAP_CNT_XFAIL xpass:0 skip:$KTAP_CNT_SKIP error:0" } diff --git a/tools/testing/selftests/net/packetdrill/ksft_runner.sh b/tools/testing/selftests/net/packetdrill/ksft_runner.sh index 4071c133f29e..ff989c325eef 100755 --- a/tools/testing/selftests/net/packetdrill/ksft_runner.sh +++ b/tools/testing/selftests/net/packetdrill/ksft_runner.sh @@ -23,7 +23,7 @@ if [ $# -ne 1 ]; then ktap_exit_fail_msg "usage: $0