diff options
| author | Paolo Bonzini <pbonzini@redhat.com> | 2026-02-09 19:05:42 +0100 |
|---|---|---|
| committer | Paolo Bonzini <pbonzini@redhat.com> | 2026-02-11 12:45:00 -0500 |
| commit | 54f15ebfc61ee8499a97f2dbfc18b1b13fdcb524 (patch) | |
| tree | 4e64e081daaa35891dbb5cfb049701ad943d0dbb /arch | |
| parent | 9e03b7caf4e65f5a5841dfed540fdcc3ff061734 (diff) | |
| parent | 376e2f8cca2816c489a9196e65cc904d1a907fd2 (diff) | |
Merge tag 'kvm-riscv-6.20-1' of https://github.com/kvm-riscv/linux into HEAD
KVM/riscv changes for 6.20
- Fixes for issues discoverd by KVM API fuzzing in
kvm_riscv_aia_imsic_has_attr(), kvm_riscv_aia_imsic_rw_attr(),
and kvm_riscv_vcpu_aia_imsic_update()
- Allow Zalasr, Zilsd and Zclsd extensions for Guest/VM
- Add riscv vm satp modes in KVM selftests
- Transparent huge page support for G-stage
- Adjust the number of available guest irq files based on
MMIO register sizes in DeviceTree or ACPI
Diffstat (limited to 'arch')
| -rw-r--r-- | arch/riscv/include/uapi/asm/kvm.h | 3 | ||||
| -rw-r--r-- | arch/riscv/kvm/aia.c | 2 | ||||
| -rw-r--r-- | arch/riscv/kvm/aia_imsic.c | 13 | ||||
| -rw-r--r-- | arch/riscv/kvm/mmu.c | 140 | ||||
| -rw-r--r-- | arch/riscv/kvm/vcpu_onereg.c | 4 | ||||
| -rw-r--r-- | arch/riscv/kvm/vcpu_pmu.c | 5 | ||||
| -rw-r--r-- | arch/riscv/mm/pgtable.c | 2 |
7 files changed, 162 insertions, 7 deletions
diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h index 54f3ad7ed2e4..6a89c1d00a72 100644 --- a/arch/riscv/include/uapi/asm/kvm.h +++ b/arch/riscv/include/uapi/asm/kvm.h @@ -192,6 +192,9 @@ enum KVM_RISCV_ISA_EXT_ID { KVM_RISCV_ISA_EXT_ZFBFMIN, KVM_RISCV_ISA_EXT_ZVFBFMIN, KVM_RISCV_ISA_EXT_ZVFBFWMA, + KVM_RISCV_ISA_EXT_ZCLSD, + KVM_RISCV_ISA_EXT_ZILSD, + KVM_RISCV_ISA_EXT_ZALASR, KVM_RISCV_ISA_EXT_MAX, }; diff --git a/arch/riscv/kvm/aia.c b/arch/riscv/kvm/aia.c index dad318185660..cac3c2b51d72 100644 --- a/arch/riscv/kvm/aia.c +++ b/arch/riscv/kvm/aia.c @@ -630,7 +630,7 @@ int kvm_riscv_aia_init(void) */ if (gc) kvm_riscv_aia_nr_hgei = min((ulong)kvm_riscv_aia_nr_hgei, - BIT(gc->guest_index_bits) - 1); + gc->nr_guest_files); else kvm_riscv_aia_nr_hgei = 0; diff --git a/arch/riscv/kvm/aia_imsic.c b/arch/riscv/kvm/aia_imsic.c index e597e86491c3..60da5fa155a6 100644 --- a/arch/riscv/kvm/aia_imsic.c +++ b/arch/riscv/kvm/aia_imsic.c @@ -797,6 +797,10 @@ int kvm_riscv_vcpu_aia_imsic_update(struct kvm_vcpu *vcpu) if (kvm->arch.aia.mode == KVM_DEV_RISCV_AIA_MODE_EMUL) return 1; + /* IMSIC vCPU state may not be initialized yet */ + if (!imsic) + return 1; + /* Read old IMSIC VS-file details */ read_lock_irqsave(&imsic->vsfile_lock, flags); old_vsfile_hgei = imsic->vsfile_hgei; @@ -952,8 +956,10 @@ int kvm_riscv_aia_imsic_rw_attr(struct kvm *kvm, unsigned long type, if (!vcpu) return -ENODEV; - isel = KVM_DEV_RISCV_AIA_IMSIC_GET_ISEL(type); imsic = vcpu->arch.aia_context.imsic_state; + if (!imsic) + return -ENODEV; + isel = KVM_DEV_RISCV_AIA_IMSIC_GET_ISEL(type); read_lock_irqsave(&imsic->vsfile_lock, flags); @@ -993,8 +999,11 @@ int kvm_riscv_aia_imsic_has_attr(struct kvm *kvm, unsigned long type) if (!vcpu) return -ENODEV; - isel = KVM_DEV_RISCV_AIA_IMSIC_GET_ISEL(type); imsic = vcpu->arch.aia_context.imsic_state; + if (!imsic) + return -ENODEV; + + isel = KVM_DEV_RISCV_AIA_IMSIC_GET_ISEL(type); return imsic_mrif_isel_check(imsic->nr_eix, isel); } diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c index 4ab06697bfc0..0b75eb2a1820 100644 --- a/arch/riscv/kvm/mmu.c +++ b/arch/riscv/kvm/mmu.c @@ -305,6 +305,142 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) return pte_young(ptep_get(ptep)); } +static bool fault_supports_gstage_huge_mapping(struct kvm_memory_slot *memslot, + unsigned long hva) +{ + hva_t uaddr_start, uaddr_end; + gpa_t gpa_start; + size_t size; + + size = memslot->npages * PAGE_SIZE; + uaddr_start = memslot->userspace_addr; + uaddr_end = uaddr_start + size; + + gpa_start = memslot->base_gfn << PAGE_SHIFT; + + /* + * Pages belonging to memslots that don't have the same alignment + * within a PMD for userspace and GPA cannot be mapped with g-stage + * PMD entries, because we'll end up mapping the wrong pages. + * + * Consider a layout like the following: + * + * memslot->userspace_addr: + * +-----+--------------------+--------------------+---+ + * |abcde|fgh vs-stage block | vs-stage block tv|xyz| + * +-----+--------------------+--------------------+---+ + * + * memslot->base_gfn << PAGE_SHIFT: + * +---+--------------------+--------------------+-----+ + * |abc|def g-stage block | g-stage block |tvxyz| + * +---+--------------------+--------------------+-----+ + * + * If we create those g-stage blocks, we'll end up with this incorrect + * mapping: + * d -> f + * e -> g + * f -> h + */ + if ((gpa_start & (PMD_SIZE - 1)) != (uaddr_start & (PMD_SIZE - 1))) + return false; + + /* + * Next, let's make sure we're not trying to map anything not covered + * by the memslot. This means we have to prohibit block size mappings + * for the beginning and end of a non-block aligned and non-block sized + * memory slot (illustrated by the head and tail parts of the + * userspace view above containing pages 'abcde' and 'xyz', + * respectively). + * + * Note that it doesn't matter if we do the check using the + * userspace_addr or the base_gfn, as both are equally aligned (per + * the check above) and equally sized. + */ + return (hva >= ALIGN(uaddr_start, PMD_SIZE)) && (hva < ALIGN_DOWN(uaddr_end, PMD_SIZE)); +} + +static int get_hva_mapping_size(struct kvm *kvm, + unsigned long hva) +{ + int size = PAGE_SIZE; + unsigned long flags; + pgd_t pgd; + p4d_t p4d; + pud_t pud; + pmd_t pmd; + + /* + * Disable IRQs to prevent concurrent tear down of host page tables, + * e.g. if the primary MMU promotes a P*D to a huge page and then frees + * the original page table. + */ + local_irq_save(flags); + + /* + * Read each entry once. As above, a non-leaf entry can be promoted to + * a huge page _during_ this walk. Re-reading the entry could send the + * walk into the weeks, e.g. p*d_leaf() returns false (sees the old + * value) and then p*d_offset() walks into the target huge page instead + * of the old page table (sees the new value). + */ + pgd = pgdp_get(pgd_offset(kvm->mm, hva)); + if (pgd_none(pgd)) + goto out; + + p4d = p4dp_get(p4d_offset(&pgd, hva)); + if (p4d_none(p4d) || !p4d_present(p4d)) + goto out; + + pud = pudp_get(pud_offset(&p4d, hva)); + if (pud_none(pud) || !pud_present(pud)) + goto out; + + if (pud_leaf(pud)) { + size = PUD_SIZE; + goto out; + } + + pmd = pmdp_get(pmd_offset(&pud, hva)); + if (pmd_none(pmd) || !pmd_present(pmd)) + goto out; + + if (pmd_leaf(pmd)) + size = PMD_SIZE; + +out: + local_irq_restore(flags); + return size; +} + +static unsigned long transparent_hugepage_adjust(struct kvm *kvm, + struct kvm_memory_slot *memslot, + unsigned long hva, + kvm_pfn_t *hfnp, gpa_t *gpa) +{ + kvm_pfn_t hfn = *hfnp; + + /* + * Make sure the adjustment is done only for THP pages. Also make + * sure that the HVA and GPA are sufficiently aligned and that the + * block map is contained within the memslot. + */ + if (fault_supports_gstage_huge_mapping(memslot, hva)) { + int sz; + + sz = get_hva_mapping_size(kvm, hva); + if (sz < PMD_SIZE) + return sz; + + *gpa &= PMD_MASK; + hfn &= ~(PTRS_PER_PMD - 1); + *hfnp = hfn; + + return PMD_SIZE; + } + + return PAGE_SIZE; +} + int kvm_riscv_mmu_map(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot, gpa_t gpa, unsigned long hva, bool is_write, struct kvm_gstage_mapping *out_map) @@ -398,6 +534,10 @@ int kvm_riscv_mmu_map(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot, if (mmu_invalidate_retry(kvm, mmu_seq)) goto out_unlock; + /* Check if we are backed by a THP and thus use block mapping if possible */ + if (vma_pagesize == PAGE_SIZE) + vma_pagesize = transparent_hugepage_adjust(kvm, memslot, hva, &hfn, &gpa); + if (writable) { mark_page_dirty_in_slot(kvm, memslot, gfn); ret = kvm_riscv_gstage_map_page(&gstage, pcache, gpa, hfn << PAGE_SHIFT, diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c index 865dae903aa0..e7ab6cb00646 100644 --- a/arch/riscv/kvm/vcpu_onereg.c +++ b/arch/riscv/kvm/vcpu_onereg.c @@ -50,6 +50,7 @@ static const unsigned long kvm_isa_ext_arr[] = { KVM_ISA_EXT_ARR(ZAAMO), KVM_ISA_EXT_ARR(ZABHA), KVM_ISA_EXT_ARR(ZACAS), + KVM_ISA_EXT_ARR(ZALASR), KVM_ISA_EXT_ARR(ZALRSC), KVM_ISA_EXT_ARR(ZAWRS), KVM_ISA_EXT_ARR(ZBA), @@ -63,6 +64,7 @@ static const unsigned long kvm_isa_ext_arr[] = { KVM_ISA_EXT_ARR(ZCB), KVM_ISA_EXT_ARR(ZCD), KVM_ISA_EXT_ARR(ZCF), + KVM_ISA_EXT_ARR(ZCLSD), KVM_ISA_EXT_ARR(ZCMOP), KVM_ISA_EXT_ARR(ZFA), KVM_ISA_EXT_ARR(ZFBFMIN), @@ -79,6 +81,7 @@ static const unsigned long kvm_isa_ext_arr[] = { KVM_ISA_EXT_ARR(ZIHINTNTL), KVM_ISA_EXT_ARR(ZIHINTPAUSE), KVM_ISA_EXT_ARR(ZIHPM), + KVM_ISA_EXT_ARR(ZILSD), KVM_ISA_EXT_ARR(ZIMOP), KVM_ISA_EXT_ARR(ZKND), KVM_ISA_EXT_ARR(ZKNE), @@ -187,6 +190,7 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext) case KVM_RISCV_ISA_EXT_ZAAMO: case KVM_RISCV_ISA_EXT_ZABHA: case KVM_RISCV_ISA_EXT_ZACAS: + case KVM_RISCV_ISA_EXT_ZALASR: case KVM_RISCV_ISA_EXT_ZALRSC: case KVM_RISCV_ISA_EXT_ZAWRS: case KVM_RISCV_ISA_EXT_ZBA: diff --git a/arch/riscv/kvm/vcpu_pmu.c b/arch/riscv/kvm/vcpu_pmu.c index a2fae70ee174..4d8d5e9aa53d 100644 --- a/arch/riscv/kvm/vcpu_pmu.c +++ b/arch/riscv/kvm/vcpu_pmu.c @@ -494,12 +494,9 @@ int kvm_riscv_vcpu_pmu_event_info(struct kvm_vcpu *vcpu, unsigned long saddr_low } ret = kvm_vcpu_write_guest(vcpu, shmem, einfo, shmem_size); - if (ret) { + if (ret) ret = SBI_ERR_INVALID_ADDRESS; - goto free_mem; - } - ret = 0; free_mem: kfree(einfo); out: diff --git a/arch/riscv/mm/pgtable.c b/arch/riscv/mm/pgtable.c index 807c0a0de182..b02bf71db33d 100644 --- a/arch/riscv/mm/pgtable.c +++ b/arch/riscv/mm/pgtable.c @@ -47,6 +47,7 @@ pud_t *pud_offset(p4d_t *p4d, unsigned long address) return (pud_t *)p4d; } +EXPORT_SYMBOL_GPL(pud_offset); p4d_t *p4d_offset(pgd_t *pgd, unsigned long address) { @@ -55,6 +56,7 @@ p4d_t *p4d_offset(pgd_t *pgd, unsigned long address) return (p4d_t *)pgd; } +EXPORT_SYMBOL_GPL(p4d_offset); #endif #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP |
