From a63ca4236e6799cf4343f9aec9d92afdfa582446 Mon Sep 17 00:00:00 2001 From: Ackerley Tng Date: Thu, 16 Oct 2025 10:28:44 -0700 Subject: KVM: guest_memfd: Use guest mem inodes instead of anonymous inodes guest_memfd's inode represents memory the guest_memfd is providing. guest_memfd's file represents a struct kvm's view of that memory. Using a custom inode allows customization of the inode teardown process via callbacks. For example, ->evict_inode() allows customization of the truncation process on file close, and ->destroy_inode() and ->free_inode() allow customization of the inode freeing process. Customizing the truncation process allows flexibility in management of guest_memfd memory and customization of the inode freeing process allows proper cleanup of memory metadata stored on the inode. Memory metadata is more appropriately stored on the inode (as opposed to the file), since the metadata is for the memory and is not unique to a specific binding and struct kvm. Acked-by: David Hildenbrand Co-developed-by: Fuad Tabba Signed-off-by: Fuad Tabba Signed-off-by: Ackerley Tng Signed-off-by: Shivank Garg Tested-by: Ashish Kalra [sean: drop helpers, open code logic in __kvm_gmem_create()] Link: https://lore.kernel.org/r/20251016172853.52451-4-seanjc@google.com Signed-off-by: Sean Christopherson --- include/uapi/linux/magic.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h index bb575f3ab45e..638ca21b7a90 100644 --- a/include/uapi/linux/magic.h +++ b/include/uapi/linux/magic.h @@ -103,5 +103,6 @@ #define DEVMEM_MAGIC 0x454d444d /* "DMEM" */ #define SECRETMEM_MAGIC 0x5345434d /* "SECM" */ #define PID_FS_MAGIC 0x50494446 /* "PIDF" */ +#define GUEST_MEMFD_MAGIC 0x474d454d /* "GMEM" */ #endif /* __LINUX_MAGIC_H__ */ -- cgit v1.2.3 From ad9c62bd8946621ed02ac94131a921222508a8bc Mon Sep 17 00:00:00 2001 From: Jiaqi Yan Date: Mon, 13 Oct 2025 18:59:01 +0000 Subject: KVM: arm64: VM exit to userspace to handle SEA When APEI fails to handle a stage-2 synchronous external abort (SEA), today KVM injects an asynchronous SError to the VCPU then resumes it, which usually results in unpleasant guest kernel panic. One major situation of guest SEA is when vCPU consumes recoverable uncorrected memory error (UER). Although SError and guest kernel panic effectively stops the propagation of corrupted memory, guest may re-use the corrupted memory if auto-rebooted; in worse case, guest boot may run into poisoned memory. So there is room to recover from an UER in a more graceful manner. Alternatively KVM can redirect the synchronous SEA event to VMM to - Reduce blast radius if possible. VMM can inject a SEA to VCPU via KVM's existing KVM_SET_VCPU_EVENTS API. If the memory poison consumption or fault is not from guest kernel, blast radius can be limited to the triggering thread in guest userspace, so VM can keep running. - Allow VMM to protect from future memory poison consumption by unmapping the page from stage-2, or to interrupt guest of the poisoned page so guest kernel can unmap it from stage-1 page table. - Allow VMM to track SEA events that VM customers care about, to restart VM when certain number of distinct poison events have happened, to provide observability to customers in log management UI. Introduce an userspace-visible feature to enable VMM handle SEA: - KVM_CAP_ARM_SEA_TO_USER. As the alternative fallback behavior when host APEI fails to claim a SEA, userspace can opt in this new capability to let KVM exit to userspace during SEA if it is not owned by host. - KVM_EXIT_ARM_SEA. A new exit reason is introduced for this. KVM fills kvm_run.arm_sea with as much as possible information about the SEA, enabling VMM to emulate SEA to guest by itself. - Sanitized ESR_EL2. The general rule is to keep only the bits useful for userspace and relevant to guest memory. - Flags indicating if faulting guest physical address is valid. - Faulting guest physical and virtual addresses if valid. Signed-off-by: Jiaqi Yan Co-developed-by: Oliver Upton Signed-off-by: Oliver Upton Link: https://msgid.link/20251013185903.1372553-2-jiaqiyan@google.com Signed-off-by: Oliver Upton --- include/uapi/linux/kvm.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 52f6000ab020..1e541193e98d 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -179,6 +179,7 @@ struct kvm_xen_exit { #define KVM_EXIT_LOONGARCH_IOCSR 38 #define KVM_EXIT_MEMORY_FAULT 39 #define KVM_EXIT_TDX 40 +#define KVM_EXIT_ARM_SEA 41 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -473,6 +474,14 @@ struct kvm_run { } setup_event_notify; }; } tdx; + /* KVM_EXIT_ARM_SEA */ + struct { +#define KVM_EXIT_ARM_SEA_FLAG_GPA_VALID (1ULL << 0) + __u64 flags; + __u64 esr; + __u64 gva; + __u64 gpa; + } arm_sea; /* Fix the size of the union. */ char padding[256]; }; @@ -963,6 +972,7 @@ struct kvm_enable_cap { #define KVM_CAP_RISCV_MP_STATE_RESET 242 #define KVM_CAP_ARM_CACHEABLE_PFNMAP_SUPPORTED 243 #define KVM_CAP_GUEST_MEMFD_FLAGS 244 +#define KVM_CAP_ARM_SEA_TO_USER 245 struct kvm_irq_routing_irqchip { __u32 irqchip; -- cgit v1.2.3 From 8e8678e740ecde2ae4a0404fd9b4ed2b726e236d Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Tue, 8 Jul 2025 12:57:57 +0000 Subject: KVM: s390: Add capability that forwards operation exceptions Setting KVM_CAP_S390_USER_OPEREXEC will forward all operation exceptions to user space. This also includes the 0x0000 instructions managed by KVM_CAP_S390_USER_INSTR0. It's helpful if user space wants to emulate instructions which do not (yet) have an opcode. While we're at it refine the documentation for KVM_CAP_S390_USER_INSTR0. Signed-off-by: Janosch Frank Reviewed-by: Claudio Imbrenda Acked-by: Christian Borntraeger Signed-off-by: Janosch Frank --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 52f6000ab020..8ab07396ce3b 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -963,6 +963,7 @@ struct kvm_enable_cap { #define KVM_CAP_RISCV_MP_STATE_RESET 242 #define KVM_CAP_ARM_CACHEABLE_PFNMAP_SUPPORTED 243 #define KVM_CAP_GUEST_MEMFD_FLAGS 244 +#define KVM_CAP_S390_USER_OPEREXEC 245 struct kvm_irq_routing_irqchip { __u32 irqchip; -- cgit v1.2.3