From 1c0180cd42839fe7a60a61412b7c28cd43553c6b Mon Sep 17 00:00:00 2001 From: Qiang Ma Date: Mon, 29 Dec 2025 15:25:30 +0800 Subject: RISC-V: KVM: Remove unnecessary 'ret' assignment If execution reaches "ret = 0" assignment in kvm_riscv_vcpu_pmu_event_info() then it means kvm_vcpu_write_guest() returned 0 hence ret is already zero and does not need to be assigned 0. Fixes: e309fd113b9f ("RISC-V: KVM: Implement get event info function") Signed-off-by: Qiang Ma Reviewed-by: Anup Patel Link: https://lore.kernel.org/r/20251229072530.3075496-1-maqianga@uniontech.com Signed-off-by: Anup Patel --- arch/riscv/kvm/vcpu_pmu.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/riscv/kvm/vcpu_pmu.c b/arch/riscv/kvm/vcpu_pmu.c index a2fae70ee174..4d8d5e9aa53d 100644 --- a/arch/riscv/kvm/vcpu_pmu.c +++ b/arch/riscv/kvm/vcpu_pmu.c @@ -494,12 +494,9 @@ int kvm_riscv_vcpu_pmu_event_info(struct kvm_vcpu *vcpu, unsigned long saddr_low } ret = kvm_vcpu_write_guest(vcpu, shmem, einfo, shmem_size); - if (ret) { + if (ret) ret = SBI_ERR_INVALID_ADDRESS; - goto free_mem; - } - ret = 0; free_mem: kfree(einfo); out: -- cgit v1.2.3 From 11366ead4f1412befea660777576c89a1bac0c1e Mon Sep 17 00:00:00 2001 From: Jiakai Xu Date: Sun, 25 Jan 2026 14:33:44 +0000 Subject: RISC-V: KVM: Fix null pointer dereference in kvm_riscv_aia_imsic_has_attr() Add a null pointer check for imsic_state before dereferencing it in kvm_riscv_aia_imsic_has_attr(). While the function checks that the vcpu exists, it doesn't verify that the vcpu's imsic_state has been initialized, leading to a null pointer dereference when accessed. This issue was discovered during fuzzing of RISC-V KVM code. The crash occurs when userspace calls KVM_HAS_DEVICE_ATTR ioctl on an AIA IMSIC device before the IMSIC state has been fully initialized for a vcpu. The crash manifests as: Unable to handle kernel paging request at virtual address dfffffff00000001 ... epc : kvm_riscv_aia_imsic_has_attr+0x464/0x50e arch/riscv/kvm/aia_imsic.c:998 ... kvm_riscv_aia_imsic_has_attr+0x464/0x50e arch/riscv/kvm/aia_imsic.c:998 aia_has_attr+0x128/0x2bc arch/riscv/kvm/aia_device.c:471 kvm_device_ioctl_attr virt/kvm/kvm_main.c:4722 [inline] kvm_device_ioctl+0x296/0x374 virt/kvm/kvm_main.c:4739 ... The fix adds a check to return -ENODEV if imsic_state is NULL, which is consistent with other error handling in the function and prevents the null pointer dereference. Fixes: 5463091a51cf ("RISC-V: KVM: Expose IMSIC registers as attributes of AIA irqchip") Signed-off-by: Jiakai Xu Signed-off-by: Jiakai Xu Reviewed-by: Nutty Liu Reviewed-by: Anup Patel Link: https://lore.kernel.org/r/20260125143344.2515451-1-xujiakai2025@iscas.ac.cn Signed-off-by: Anup Patel --- arch/riscv/kvm/aia_imsic.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/riscv/kvm/aia_imsic.c b/arch/riscv/kvm/aia_imsic.c index e597e86491c3..cd070d83663a 100644 --- a/arch/riscv/kvm/aia_imsic.c +++ b/arch/riscv/kvm/aia_imsic.c @@ -993,8 +993,11 @@ int kvm_riscv_aia_imsic_has_attr(struct kvm *kvm, unsigned long type) if (!vcpu) return -ENODEV; - isel = KVM_DEV_RISCV_AIA_IMSIC_GET_ISEL(type); imsic = vcpu->arch.aia_context.imsic_state; + if (!imsic) + return -ENODEV; + + isel = KVM_DEV_RISCV_AIA_IMSIC_GET_ISEL(type); return imsic_mrif_isel_check(imsic->nr_eix, isel); } -- cgit v1.2.3 From aeb1d17d1af5924f7357d7204a293bd8fc06ea13 Mon Sep 17 00:00:00 2001 From: Jiakai Xu Date: Tue, 27 Jan 2026 07:22:19 +0000 Subject: RISC-V: KVM: Fix null pointer dereference in kvm_riscv_aia_imsic_rw_attr() Add a null pointer check for imsic_state before dereferencing it in kvm_riscv_aia_imsic_rw_attr(). While the function checks that the vcpu exists, it doesn't verify that the vcpu's imsic_state has been initialized, leading to a null pointer dereference when accessed. The crash manifests as: Unable to handle kernel paging request at virtual address dfffffff00000006 ... kvm_riscv_aia_imsic_rw_attr+0x2d8/0x854 arch/riscv/kvm/aia_imsic.c:958 aia_set_attr+0x2ee/0x1726 arch/riscv/kvm/aia_device.c:354 kvm_device_ioctl_attr virt/kvm/kvm_main.c:4744 [inline] kvm_device_ioctl+0x296/0x374 virt/kvm/kvm_main.c:4761 vfs_ioctl fs/ioctl.c:51 [inline] ... The fix adds a check to return -ENODEV if imsic_state is NULL and moves isel assignment after imsic_state NULL check. Fixes: 5463091a51cfaa ("RISC-V: KVM: Expose IMSIC registers as attributes of AIA irqchip") Signed-off-by: Jiakai Xu Signed-off-by: Jiakai Xu Reviewed-by: Anup Patel Link: https://lore.kernel.org/r/20260127072219.3366607-1-xujiakai2025@iscas.ac.cn Signed-off-by: Anup Patel --- arch/riscv/kvm/aia_imsic.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/riscv/kvm/aia_imsic.c b/arch/riscv/kvm/aia_imsic.c index cd070d83663a..7edfd129242f 100644 --- a/arch/riscv/kvm/aia_imsic.c +++ b/arch/riscv/kvm/aia_imsic.c @@ -952,8 +952,10 @@ int kvm_riscv_aia_imsic_rw_attr(struct kvm *kvm, unsigned long type, if (!vcpu) return -ENODEV; - isel = KVM_DEV_RISCV_AIA_IMSIC_GET_ISEL(type); imsic = vcpu->arch.aia_context.imsic_state; + if (!imsic) + return -ENODEV; + isel = KVM_DEV_RISCV_AIA_IMSIC_GET_ISEL(type); read_lock_irqsave(&imsic->vsfile_lock, flags); -- cgit v1.2.3 From 003b9dae53ae795930c54945e0b0a41bbd527b44 Mon Sep 17 00:00:00 2001 From: Jiakai Xu Date: Tue, 27 Jan 2026 08:43:13 +0000 Subject: RISC-V: KVM: Skip IMSIC update if vCPU IMSIC state is not initialized kvm_riscv_vcpu_aia_imsic_update() assumes that the vCPU IMSIC state has already been initialized and unconditionally accesses imsic->vsfile_lock. However, in fuzzed ioctl sequences, the AIA device may be initialized at the VM level while the per-vCPU IMSIC state is still NULL. This leads to invalid access when entering the vCPU run loop before IMSIC initialization has completed. The crash manifests as: Unable to handle kernel paging request at virtual address dfffffff00000006 ... kvm_riscv_vcpu_aia_imsic_update arch/riscv/kvm/aia_imsic.c:801 kvm_riscv_vcpu_aia_update arch/riscv/kvm/aia_device.c:493 kvm_arch_vcpu_ioctl_run arch/riscv/kvm/vcpu.c:927 ... Add a guard to skip the IMSIC update path when imsic_state is NULL. This allows the vCPU run loop to continue safely. This issue was discovered during fuzzing of RISC-V KVM code. Fixes: db8b7e97d6137a ("RISC-V: KVM: Add in-kernel virtualization of AIA IMSIC") Signed-off-by: Jiakai Xu Signed-off-by: Jiakai Xu Reviewed-by: Anup Patel Link: https://lore.kernel.org/r/20260127084313.3496485-1-xujiakai2025@iscas.ac.cn Signed-off-by: Anup Patel --- arch/riscv/kvm/aia_imsic.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/riscv/kvm/aia_imsic.c b/arch/riscv/kvm/aia_imsic.c index 7edfd129242f..60da5fa155a6 100644 --- a/arch/riscv/kvm/aia_imsic.c +++ b/arch/riscv/kvm/aia_imsic.c @@ -797,6 +797,10 @@ int kvm_riscv_vcpu_aia_imsic_update(struct kvm_vcpu *vcpu) if (kvm->arch.aia.mode == KVM_DEV_RISCV_AIA_MODE_EMUL) return 1; + /* IMSIC vCPU state may not be initialized yet */ + if (!imsic) + return 1; + /* Read old IMSIC VS-file details */ read_lock_irqsave(&imsic->vsfile_lock, flags); old_vsfile_hgei = imsic->vsfile_hgei; -- cgit v1.2.3 From f326e846ff89c82083c09fa495457442c252983e Mon Sep 17 00:00:00 2001 From: Pincheng Wang Date: Wed, 27 Aug 2025 00:29:38 +0800 Subject: riscv: KVM: allow Zilsd and Zclsd extensions for Guest/VM Extend the KVM ISA extension ONE_REG interface to allow KVM user space to detect and enable Zilsd and Zclsd extensions for Guest/VM. Signed-off-by: Pincheng Wang Reviewed-by: Nutty Liu Reviewed-by: Anup Patel Link: https://lore.kernel.org/r/20250826162939.1494021-5-pincheng.plct@isrc.iscas.ac.cn Signed-off-by: Anup Patel --- arch/riscv/include/uapi/asm/kvm.h | 2 ++ arch/riscv/kvm/vcpu_onereg.c | 2 ++ 2 files changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h index 54f3ad7ed2e4..6c536b2823c3 100644 --- a/arch/riscv/include/uapi/asm/kvm.h +++ b/arch/riscv/include/uapi/asm/kvm.h @@ -192,6 +192,8 @@ enum KVM_RISCV_ISA_EXT_ID { KVM_RISCV_ISA_EXT_ZFBFMIN, KVM_RISCV_ISA_EXT_ZVFBFMIN, KVM_RISCV_ISA_EXT_ZVFBFWMA, + KVM_RISCV_ISA_EXT_ZCLSD, + KVM_RISCV_ISA_EXT_ZILSD, KVM_RISCV_ISA_EXT_MAX, }; diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c index 865dae903aa0..347c0a902d28 100644 --- a/arch/riscv/kvm/vcpu_onereg.c +++ b/arch/riscv/kvm/vcpu_onereg.c @@ -63,6 +63,7 @@ static const unsigned long kvm_isa_ext_arr[] = { KVM_ISA_EXT_ARR(ZCB), KVM_ISA_EXT_ARR(ZCD), KVM_ISA_EXT_ARR(ZCF), + KVM_ISA_EXT_ARR(ZCLSD), KVM_ISA_EXT_ARR(ZCMOP), KVM_ISA_EXT_ARR(ZFA), KVM_ISA_EXT_ARR(ZFBFMIN), @@ -79,6 +80,7 @@ static const unsigned long kvm_isa_ext_arr[] = { KVM_ISA_EXT_ARR(ZIHINTNTL), KVM_ISA_EXT_ARR(ZIHINTPAUSE), KVM_ISA_EXT_ARR(ZIHPM), + KVM_ISA_EXT_ARR(ZILSD), KVM_ISA_EXT_ARR(ZIMOP), KVM_ISA_EXT_ARR(ZKND), KVM_ISA_EXT_ARR(ZKNE), -- cgit v1.2.3 From 655d330c058f4e16de46d5c9b203008c630b59c8 Mon Sep 17 00:00:00 2001 From: Xu Lu Date: Mon, 20 Oct 2025 12:24:56 +0800 Subject: RISC-V: KVM: Allow Zalasr extensions for Guest/VM Extend the KVM ISA extension ONE_REG interface to allow KVM user space to detect and enable Zalasr extensions for Guest/VM. Signed-off-by: Xu Lu Reviewed-by: Anup Patel Link: https://lore.kernel.org/r/20251020042457.30915-5-luxu.kernel@bytedance.com Signed-off-by: Anup Patel --- arch/riscv/include/uapi/asm/kvm.h | 1 + arch/riscv/kvm/vcpu_onereg.c | 2 ++ 2 files changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h index 6c536b2823c3..6a89c1d00a72 100644 --- a/arch/riscv/include/uapi/asm/kvm.h +++ b/arch/riscv/include/uapi/asm/kvm.h @@ -194,6 +194,7 @@ enum KVM_RISCV_ISA_EXT_ID { KVM_RISCV_ISA_EXT_ZVFBFWMA, KVM_RISCV_ISA_EXT_ZCLSD, KVM_RISCV_ISA_EXT_ZILSD, + KVM_RISCV_ISA_EXT_ZALASR, KVM_RISCV_ISA_EXT_MAX, }; diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c index 347c0a902d28..e7ab6cb00646 100644 --- a/arch/riscv/kvm/vcpu_onereg.c +++ b/arch/riscv/kvm/vcpu_onereg.c @@ -50,6 +50,7 @@ static const unsigned long kvm_isa_ext_arr[] = { KVM_ISA_EXT_ARR(ZAAMO), KVM_ISA_EXT_ARR(ZABHA), KVM_ISA_EXT_ARR(ZACAS), + KVM_ISA_EXT_ARR(ZALASR), KVM_ISA_EXT_ARR(ZALRSC), KVM_ISA_EXT_ARR(ZAWRS), KVM_ISA_EXT_ARR(ZBA), @@ -189,6 +190,7 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext) case KVM_RISCV_ISA_EXT_ZAAMO: case KVM_RISCV_ISA_EXT_ZABHA: case KVM_RISCV_ISA_EXT_ZACAS: + case KVM_RISCV_ISA_EXT_ZALASR: case KVM_RISCV_ISA_EXT_ZALRSC: case KVM_RISCV_ISA_EXT_ZAWRS: case KVM_RISCV_ISA_EXT_ZBA: -- cgit v1.2.3 From ed7ae7a34bea06f81270866dfbe619a22518666b Mon Sep 17 00:00:00 2001 From: Jessica Liu Date: Thu, 27 Nov 2025 16:51:37 +0800 Subject: RISC-V: KVM: Transparent huge page support Use block mapping if backed by a THP, as implemented in architectures like ARM and x86_64. Signed-off-by: Jessica Liu Reviewed-by: Anup Patel Link: https://lore.kernel.org/r/20251127165137780QbUOVPKPAfWSGAFl5qtRy@zte.com.cn Signed-off-by: Anup Patel --- arch/riscv/kvm/mmu.c | 140 ++++++++++++++++++++++++++++++++++++++++++++++++ arch/riscv/mm/pgtable.c | 2 + 2 files changed, 142 insertions(+) (limited to 'arch') diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c index 4ab06697bfc0..0b75eb2a1820 100644 --- a/arch/riscv/kvm/mmu.c +++ b/arch/riscv/kvm/mmu.c @@ -305,6 +305,142 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) return pte_young(ptep_get(ptep)); } +static bool fault_supports_gstage_huge_mapping(struct kvm_memory_slot *memslot, + unsigned long hva) +{ + hva_t uaddr_start, uaddr_end; + gpa_t gpa_start; + size_t size; + + size = memslot->npages * PAGE_SIZE; + uaddr_start = memslot->userspace_addr; + uaddr_end = uaddr_start + size; + + gpa_start = memslot->base_gfn << PAGE_SHIFT; + + /* + * Pages belonging to memslots that don't have the same alignment + * within a PMD for userspace and GPA cannot be mapped with g-stage + * PMD entries, because we'll end up mapping the wrong pages. + * + * Consider a layout like the following: + * + * memslot->userspace_addr: + * +-----+--------------------+--------------------+---+ + * |abcde|fgh vs-stage block | vs-stage block tv|xyz| + * +-----+--------------------+--------------------+---+ + * + * memslot->base_gfn << PAGE_SHIFT: + * +---+--------------------+--------------------+-----+ + * |abc|def g-stage block | g-stage block |tvxyz| + * +---+--------------------+--------------------+-----+ + * + * If we create those g-stage blocks, we'll end up with this incorrect + * mapping: + * d -> f + * e -> g + * f -> h + */ + if ((gpa_start & (PMD_SIZE - 1)) != (uaddr_start & (PMD_SIZE - 1))) + return false; + + /* + * Next, let's make sure we're not trying to map anything not covered + * by the memslot. This means we have to prohibit block size mappings + * for the beginning and end of a non-block aligned and non-block sized + * memory slot (illustrated by the head and tail parts of the + * userspace view above containing pages 'abcde' and 'xyz', + * respectively). + * + * Note that it doesn't matter if we do the check using the + * userspace_addr or the base_gfn, as both are equally aligned (per + * the check above) and equally sized. + */ + return (hva >= ALIGN(uaddr_start, PMD_SIZE)) && (hva < ALIGN_DOWN(uaddr_end, PMD_SIZE)); +} + +static int get_hva_mapping_size(struct kvm *kvm, + unsigned long hva) +{ + int size = PAGE_SIZE; + unsigned long flags; + pgd_t pgd; + p4d_t p4d; + pud_t pud; + pmd_t pmd; + + /* + * Disable IRQs to prevent concurrent tear down of host page tables, + * e.g. if the primary MMU promotes a P*D to a huge page and then frees + * the original page table. + */ + local_irq_save(flags); + + /* + * Read each entry once. As above, a non-leaf entry can be promoted to + * a huge page _during_ this walk. Re-reading the entry could send the + * walk into the weeks, e.g. p*d_leaf() returns false (sees the old + * value) and then p*d_offset() walks into the target huge page instead + * of the old page table (sees the new value). + */ + pgd = pgdp_get(pgd_offset(kvm->mm, hva)); + if (pgd_none(pgd)) + goto out; + + p4d = p4dp_get(p4d_offset(&pgd, hva)); + if (p4d_none(p4d) || !p4d_present(p4d)) + goto out; + + pud = pudp_get(pud_offset(&p4d, hva)); + if (pud_none(pud) || !pud_present(pud)) + goto out; + + if (pud_leaf(pud)) { + size = PUD_SIZE; + goto out; + } + + pmd = pmdp_get(pmd_offset(&pud, hva)); + if (pmd_none(pmd) || !pmd_present(pmd)) + goto out; + + if (pmd_leaf(pmd)) + size = PMD_SIZE; + +out: + local_irq_restore(flags); + return size; +} + +static unsigned long transparent_hugepage_adjust(struct kvm *kvm, + struct kvm_memory_slot *memslot, + unsigned long hva, + kvm_pfn_t *hfnp, gpa_t *gpa) +{ + kvm_pfn_t hfn = *hfnp; + + /* + * Make sure the adjustment is done only for THP pages. Also make + * sure that the HVA and GPA are sufficiently aligned and that the + * block map is contained within the memslot. + */ + if (fault_supports_gstage_huge_mapping(memslot, hva)) { + int sz; + + sz = get_hva_mapping_size(kvm, hva); + if (sz < PMD_SIZE) + return sz; + + *gpa &= PMD_MASK; + hfn &= ~(PTRS_PER_PMD - 1); + *hfnp = hfn; + + return PMD_SIZE; + } + + return PAGE_SIZE; +} + int kvm_riscv_mmu_map(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot, gpa_t gpa, unsigned long hva, bool is_write, struct kvm_gstage_mapping *out_map) @@ -398,6 +534,10 @@ int kvm_riscv_mmu_map(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot, if (mmu_invalidate_retry(kvm, mmu_seq)) goto out_unlock; + /* Check if we are backed by a THP and thus use block mapping if possible */ + if (vma_pagesize == PAGE_SIZE) + vma_pagesize = transparent_hugepage_adjust(kvm, memslot, hva, &hfn, &gpa); + if (writable) { mark_page_dirty_in_slot(kvm, memslot, gfn); ret = kvm_riscv_gstage_map_page(&gstage, pcache, gpa, hfn << PAGE_SHIFT, diff --git a/arch/riscv/mm/pgtable.c b/arch/riscv/mm/pgtable.c index 807c0a0de182..b02bf71db33d 100644 --- a/arch/riscv/mm/pgtable.c +++ b/arch/riscv/mm/pgtable.c @@ -47,6 +47,7 @@ pud_t *pud_offset(p4d_t *p4d, unsigned long address) return (pud_t *)p4d; } +EXPORT_SYMBOL_GPL(pud_offset); p4d_t *p4d_offset(pgd_t *pgd, unsigned long address) { @@ -55,6 +56,7 @@ p4d_t *p4d_offset(pgd_t *pgd, unsigned long address) return (p4d_t *)pgd; } +EXPORT_SYMBOL_GPL(p4d_offset); #endif #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP -- cgit v1.2.3 From 376e2f8cca2816c489a9196e65cc904d1a907fd2 Mon Sep 17 00:00:00 2001 From: Xu Lu Date: Sun, 4 Jan 2026 21:34:57 +0800 Subject: irqchip/riscv-imsic: Adjust the number of available guest irq files Currently, KVM assumes the minimum of implemented HGEIE bits and "BIT(gc->guest_index_bits) - 1" as the number of guest files available across all CPUs. This will not work when CPUs have different number of guest files because KVM may incorrectly allocate a guest file on a CPU with fewer guest files. To address above, during initialization, calculate the number of available guest interrupt files according to MMIO resources and constrain the number of guest interrupt files that can be allocated by KVM. Signed-off-by: Xu Lu Reviewed-by: Nutty Liu Reviewed-by: Anup Patel Acked-by: Thomas Gleixner Link: https://lore.kernel.org/r/20260104133457.57742-1-luxu.kernel@bytedance.com Signed-off-by: Anup Patel --- arch/riscv/kvm/aia.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/riscv/kvm/aia.c b/arch/riscv/kvm/aia.c index dad318185660..cac3c2b51d72 100644 --- a/arch/riscv/kvm/aia.c +++ b/arch/riscv/kvm/aia.c @@ -630,7 +630,7 @@ int kvm_riscv_aia_init(void) */ if (gc) kvm_riscv_aia_nr_hgei = min((ulong)kvm_riscv_aia_nr_hgei, - BIT(gc->guest_index_bits) - 1); + gc->nr_guest_files); else kvm_riscv_aia_nr_hgei = 0; -- cgit v1.2.3