summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
authorPaolo Bonzini <pbonzini@redhat.com>2026-02-09 19:05:42 +0100
committerPaolo Bonzini <pbonzini@redhat.com>2026-02-11 12:45:00 -0500
commit54f15ebfc61ee8499a97f2dbfc18b1b13fdcb524 (patch)
tree4e64e081daaa35891dbb5cfb049701ad943d0dbb /arch
parent9e03b7caf4e65f5a5841dfed540fdcc3ff061734 (diff)
parent376e2f8cca2816c489a9196e65cc904d1a907fd2 (diff)
Merge tag 'kvm-riscv-6.20-1' of https://github.com/kvm-riscv/linux into HEAD
KVM/riscv changes for 6.20 - Fixes for issues discoverd by KVM API fuzzing in kvm_riscv_aia_imsic_has_attr(), kvm_riscv_aia_imsic_rw_attr(), and kvm_riscv_vcpu_aia_imsic_update() - Allow Zalasr, Zilsd and Zclsd extensions for Guest/VM - Add riscv vm satp modes in KVM selftests - Transparent huge page support for G-stage - Adjust the number of available guest irq files based on MMIO register sizes in DeviceTree or ACPI
Diffstat (limited to 'arch')
-rw-r--r--arch/riscv/include/uapi/asm/kvm.h3
-rw-r--r--arch/riscv/kvm/aia.c2
-rw-r--r--arch/riscv/kvm/aia_imsic.c13
-rw-r--r--arch/riscv/kvm/mmu.c140
-rw-r--r--arch/riscv/kvm/vcpu_onereg.c4
-rw-r--r--arch/riscv/kvm/vcpu_pmu.c5
-rw-r--r--arch/riscv/mm/pgtable.c2
7 files changed, 162 insertions, 7 deletions
diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h
index 54f3ad7ed2e4..6a89c1d00a72 100644
--- a/arch/riscv/include/uapi/asm/kvm.h
+++ b/arch/riscv/include/uapi/asm/kvm.h
@@ -192,6 +192,9 @@ enum KVM_RISCV_ISA_EXT_ID {
KVM_RISCV_ISA_EXT_ZFBFMIN,
KVM_RISCV_ISA_EXT_ZVFBFMIN,
KVM_RISCV_ISA_EXT_ZVFBFWMA,
+ KVM_RISCV_ISA_EXT_ZCLSD,
+ KVM_RISCV_ISA_EXT_ZILSD,
+ KVM_RISCV_ISA_EXT_ZALASR,
KVM_RISCV_ISA_EXT_MAX,
};
diff --git a/arch/riscv/kvm/aia.c b/arch/riscv/kvm/aia.c
index dad318185660..cac3c2b51d72 100644
--- a/arch/riscv/kvm/aia.c
+++ b/arch/riscv/kvm/aia.c
@@ -630,7 +630,7 @@ int kvm_riscv_aia_init(void)
*/
if (gc)
kvm_riscv_aia_nr_hgei = min((ulong)kvm_riscv_aia_nr_hgei,
- BIT(gc->guest_index_bits) - 1);
+ gc->nr_guest_files);
else
kvm_riscv_aia_nr_hgei = 0;
diff --git a/arch/riscv/kvm/aia_imsic.c b/arch/riscv/kvm/aia_imsic.c
index e597e86491c3..60da5fa155a6 100644
--- a/arch/riscv/kvm/aia_imsic.c
+++ b/arch/riscv/kvm/aia_imsic.c
@@ -797,6 +797,10 @@ int kvm_riscv_vcpu_aia_imsic_update(struct kvm_vcpu *vcpu)
if (kvm->arch.aia.mode == KVM_DEV_RISCV_AIA_MODE_EMUL)
return 1;
+ /* IMSIC vCPU state may not be initialized yet */
+ if (!imsic)
+ return 1;
+
/* Read old IMSIC VS-file details */
read_lock_irqsave(&imsic->vsfile_lock, flags);
old_vsfile_hgei = imsic->vsfile_hgei;
@@ -952,8 +956,10 @@ int kvm_riscv_aia_imsic_rw_attr(struct kvm *kvm, unsigned long type,
if (!vcpu)
return -ENODEV;
- isel = KVM_DEV_RISCV_AIA_IMSIC_GET_ISEL(type);
imsic = vcpu->arch.aia_context.imsic_state;
+ if (!imsic)
+ return -ENODEV;
+ isel = KVM_DEV_RISCV_AIA_IMSIC_GET_ISEL(type);
read_lock_irqsave(&imsic->vsfile_lock, flags);
@@ -993,8 +999,11 @@ int kvm_riscv_aia_imsic_has_attr(struct kvm *kvm, unsigned long type)
if (!vcpu)
return -ENODEV;
- isel = KVM_DEV_RISCV_AIA_IMSIC_GET_ISEL(type);
imsic = vcpu->arch.aia_context.imsic_state;
+ if (!imsic)
+ return -ENODEV;
+
+ isel = KVM_DEV_RISCV_AIA_IMSIC_GET_ISEL(type);
return imsic_mrif_isel_check(imsic->nr_eix, isel);
}
diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c
index 4ab06697bfc0..0b75eb2a1820 100644
--- a/arch/riscv/kvm/mmu.c
+++ b/arch/riscv/kvm/mmu.c
@@ -305,6 +305,142 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
return pte_young(ptep_get(ptep));
}
+static bool fault_supports_gstage_huge_mapping(struct kvm_memory_slot *memslot,
+ unsigned long hva)
+{
+ hva_t uaddr_start, uaddr_end;
+ gpa_t gpa_start;
+ size_t size;
+
+ size = memslot->npages * PAGE_SIZE;
+ uaddr_start = memslot->userspace_addr;
+ uaddr_end = uaddr_start + size;
+
+ gpa_start = memslot->base_gfn << PAGE_SHIFT;
+
+ /*
+ * Pages belonging to memslots that don't have the same alignment
+ * within a PMD for userspace and GPA cannot be mapped with g-stage
+ * PMD entries, because we'll end up mapping the wrong pages.
+ *
+ * Consider a layout like the following:
+ *
+ * memslot->userspace_addr:
+ * +-----+--------------------+--------------------+---+
+ * |abcde|fgh vs-stage block | vs-stage block tv|xyz|
+ * +-----+--------------------+--------------------+---+
+ *
+ * memslot->base_gfn << PAGE_SHIFT:
+ * +---+--------------------+--------------------+-----+
+ * |abc|def g-stage block | g-stage block |tvxyz|
+ * +---+--------------------+--------------------+-----+
+ *
+ * If we create those g-stage blocks, we'll end up with this incorrect
+ * mapping:
+ * d -> f
+ * e -> g
+ * f -> h
+ */
+ if ((gpa_start & (PMD_SIZE - 1)) != (uaddr_start & (PMD_SIZE - 1)))
+ return false;
+
+ /*
+ * Next, let's make sure we're not trying to map anything not covered
+ * by the memslot. This means we have to prohibit block size mappings
+ * for the beginning and end of a non-block aligned and non-block sized
+ * memory slot (illustrated by the head and tail parts of the
+ * userspace view above containing pages 'abcde' and 'xyz',
+ * respectively).
+ *
+ * Note that it doesn't matter if we do the check using the
+ * userspace_addr or the base_gfn, as both are equally aligned (per
+ * the check above) and equally sized.
+ */
+ return (hva >= ALIGN(uaddr_start, PMD_SIZE)) && (hva < ALIGN_DOWN(uaddr_end, PMD_SIZE));
+}
+
+static int get_hva_mapping_size(struct kvm *kvm,
+ unsigned long hva)
+{
+ int size = PAGE_SIZE;
+ unsigned long flags;
+ pgd_t pgd;
+ p4d_t p4d;
+ pud_t pud;
+ pmd_t pmd;
+
+ /*
+ * Disable IRQs to prevent concurrent tear down of host page tables,
+ * e.g. if the primary MMU promotes a P*D to a huge page and then frees
+ * the original page table.
+ */
+ local_irq_save(flags);
+
+ /*
+ * Read each entry once. As above, a non-leaf entry can be promoted to
+ * a huge page _during_ this walk. Re-reading the entry could send the
+ * walk into the weeks, e.g. p*d_leaf() returns false (sees the old
+ * value) and then p*d_offset() walks into the target huge page instead
+ * of the old page table (sees the new value).
+ */
+ pgd = pgdp_get(pgd_offset(kvm->mm, hva));
+ if (pgd_none(pgd))
+ goto out;
+
+ p4d = p4dp_get(p4d_offset(&pgd, hva));
+ if (p4d_none(p4d) || !p4d_present(p4d))
+ goto out;
+
+ pud = pudp_get(pud_offset(&p4d, hva));
+ if (pud_none(pud) || !pud_present(pud))
+ goto out;
+
+ if (pud_leaf(pud)) {
+ size = PUD_SIZE;
+ goto out;
+ }
+
+ pmd = pmdp_get(pmd_offset(&pud, hva));
+ if (pmd_none(pmd) || !pmd_present(pmd))
+ goto out;
+
+ if (pmd_leaf(pmd))
+ size = PMD_SIZE;
+
+out:
+ local_irq_restore(flags);
+ return size;
+}
+
+static unsigned long transparent_hugepage_adjust(struct kvm *kvm,
+ struct kvm_memory_slot *memslot,
+ unsigned long hva,
+ kvm_pfn_t *hfnp, gpa_t *gpa)
+{
+ kvm_pfn_t hfn = *hfnp;
+
+ /*
+ * Make sure the adjustment is done only for THP pages. Also make
+ * sure that the HVA and GPA are sufficiently aligned and that the
+ * block map is contained within the memslot.
+ */
+ if (fault_supports_gstage_huge_mapping(memslot, hva)) {
+ int sz;
+
+ sz = get_hva_mapping_size(kvm, hva);
+ if (sz < PMD_SIZE)
+ return sz;
+
+ *gpa &= PMD_MASK;
+ hfn &= ~(PTRS_PER_PMD - 1);
+ *hfnp = hfn;
+
+ return PMD_SIZE;
+ }
+
+ return PAGE_SIZE;
+}
+
int kvm_riscv_mmu_map(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
gpa_t gpa, unsigned long hva, bool is_write,
struct kvm_gstage_mapping *out_map)
@@ -398,6 +534,10 @@ int kvm_riscv_mmu_map(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
if (mmu_invalidate_retry(kvm, mmu_seq))
goto out_unlock;
+ /* Check if we are backed by a THP and thus use block mapping if possible */
+ if (vma_pagesize == PAGE_SIZE)
+ vma_pagesize = transparent_hugepage_adjust(kvm, memslot, hva, &hfn, &gpa);
+
if (writable) {
mark_page_dirty_in_slot(kvm, memslot, gfn);
ret = kvm_riscv_gstage_map_page(&gstage, pcache, gpa, hfn << PAGE_SHIFT,
diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c
index 865dae903aa0..e7ab6cb00646 100644
--- a/arch/riscv/kvm/vcpu_onereg.c
+++ b/arch/riscv/kvm/vcpu_onereg.c
@@ -50,6 +50,7 @@ static const unsigned long kvm_isa_ext_arr[] = {
KVM_ISA_EXT_ARR(ZAAMO),
KVM_ISA_EXT_ARR(ZABHA),
KVM_ISA_EXT_ARR(ZACAS),
+ KVM_ISA_EXT_ARR(ZALASR),
KVM_ISA_EXT_ARR(ZALRSC),
KVM_ISA_EXT_ARR(ZAWRS),
KVM_ISA_EXT_ARR(ZBA),
@@ -63,6 +64,7 @@ static const unsigned long kvm_isa_ext_arr[] = {
KVM_ISA_EXT_ARR(ZCB),
KVM_ISA_EXT_ARR(ZCD),
KVM_ISA_EXT_ARR(ZCF),
+ KVM_ISA_EXT_ARR(ZCLSD),
KVM_ISA_EXT_ARR(ZCMOP),
KVM_ISA_EXT_ARR(ZFA),
KVM_ISA_EXT_ARR(ZFBFMIN),
@@ -79,6 +81,7 @@ static const unsigned long kvm_isa_ext_arr[] = {
KVM_ISA_EXT_ARR(ZIHINTNTL),
KVM_ISA_EXT_ARR(ZIHINTPAUSE),
KVM_ISA_EXT_ARR(ZIHPM),
+ KVM_ISA_EXT_ARR(ZILSD),
KVM_ISA_EXT_ARR(ZIMOP),
KVM_ISA_EXT_ARR(ZKND),
KVM_ISA_EXT_ARR(ZKNE),
@@ -187,6 +190,7 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext)
case KVM_RISCV_ISA_EXT_ZAAMO:
case KVM_RISCV_ISA_EXT_ZABHA:
case KVM_RISCV_ISA_EXT_ZACAS:
+ case KVM_RISCV_ISA_EXT_ZALASR:
case KVM_RISCV_ISA_EXT_ZALRSC:
case KVM_RISCV_ISA_EXT_ZAWRS:
case KVM_RISCV_ISA_EXT_ZBA:
diff --git a/arch/riscv/kvm/vcpu_pmu.c b/arch/riscv/kvm/vcpu_pmu.c
index a2fae70ee174..4d8d5e9aa53d 100644
--- a/arch/riscv/kvm/vcpu_pmu.c
+++ b/arch/riscv/kvm/vcpu_pmu.c
@@ -494,12 +494,9 @@ int kvm_riscv_vcpu_pmu_event_info(struct kvm_vcpu *vcpu, unsigned long saddr_low
}
ret = kvm_vcpu_write_guest(vcpu, shmem, einfo, shmem_size);
- if (ret) {
+ if (ret)
ret = SBI_ERR_INVALID_ADDRESS;
- goto free_mem;
- }
- ret = 0;
free_mem:
kfree(einfo);
out:
diff --git a/arch/riscv/mm/pgtable.c b/arch/riscv/mm/pgtable.c
index 807c0a0de182..b02bf71db33d 100644
--- a/arch/riscv/mm/pgtable.c
+++ b/arch/riscv/mm/pgtable.c
@@ -47,6 +47,7 @@ pud_t *pud_offset(p4d_t *p4d, unsigned long address)
return (pud_t *)p4d;
}
+EXPORT_SYMBOL_GPL(pud_offset);
p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
{
@@ -55,6 +56,7 @@ p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
return (p4d_t *)pgd;
}
+EXPORT_SYMBOL_GPL(p4d_offset);
#endif
#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP