summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-03-25 14:47:04 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2025-03-25 14:47:04 -0700
commita5b3d8660b049779880c790549ff3fef02f6922c (patch)
treedf07a0fd239a926a8713d22325497ac46bebd745 /arch
parentdce3ab4c57e662ae019c22e7c2f2aa887617beae (diff)
parent628cc040b3a2980df6032766e8ef0688e981ab95 (diff)
Merge tag 'hyperv-next-signed-20250324' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux
Pull hyperv updates from Wei Liu: - Add support for running as the root partition in Hyper-V (Microsoft Hypervisor) by exposing /dev/mshv (Nuno and various people) - Add support for CPU offlining in Hyper-V (Hamza Mahfooz) - Misc fixes and cleanups (Roman Kisel, Tianyu Lan, Wei Liu, Michael Kelley, Thorsten Blum) * tag 'hyperv-next-signed-20250324' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux: (24 commits) x86/hyperv: fix an indentation issue in mshyperv.h x86/hyperv: Add comments about hv_vpset and var size hypercall input args Drivers: hv: Introduce mshv_root module to expose /dev/mshv to VMMs hyperv: Add definitions for root partition driver to hv headers x86: hyperv: Add mshv_handler() irq handler and setup function Drivers: hv: Introduce per-cpu event ring tail Drivers: hv: Export some functions for use by root partition module acpi: numa: Export node_to_pxm() hyperv: Introduce hv_recommend_using_aeoi() arm64/hyperv: Add some missing functions to arm64 x86/mshyperv: Add support for extended Hyper-V features hyperv: Log hypercall status codes as strings x86/hyperv: Fix check of return value from snp_set_vmsa() x86/hyperv: Add VTL mode callback for restarting the system x86/hyperv: Add VTL mode emergency restart callback hyperv: Remove unused union and structs hyperv: Add CONFIG_MSHV_ROOT to gate root partition support hyperv: Change hv_root_partition into a function hyperv: Convert hypercall statuses to linux error codes drivers/hv: add CPU offlining support ...
Diffstat (limited to 'arch')
-rw-r--r--arch/arm64/hyperv/hv_core.c17
-rw-r--r--arch/arm64/hyperv/mshyperv.c6
-rw-r--r--arch/arm64/include/asm/mshyperv.h13
-rw-r--r--arch/x86/hyperv/Makefile2
-rw-r--r--arch/x86/hyperv/hv_apic.c5
-rw-r--r--arch/x86/hyperv/hv_init.c35
-rw-r--r--arch/x86/hyperv/hv_proc.c198
-rw-r--r--arch/x86/hyperv/hv_vtl.c34
-rw-r--r--arch/x86/hyperv/irqdomain.c6
-rw-r--r--arch/x86/hyperv/ivm.c2
-rw-r--r--arch/x86/hyperv/mmu.c4
-rw-r--r--arch/x86/include/asm/mshyperv.h8
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c40
13 files changed, 107 insertions, 263 deletions
diff --git a/arch/arm64/hyperv/hv_core.c b/arch/arm64/hyperv/hv_core.c
index 69004f619c57..e33a9e3c366a 100644
--- a/arch/arm64/hyperv/hv_core.c
+++ b/arch/arm64/hyperv/hv_core.c
@@ -54,6 +54,23 @@ u64 hv_do_fast_hypercall8(u16 code, u64 input)
EXPORT_SYMBOL_GPL(hv_do_fast_hypercall8);
/*
+ * hv_do_fast_hypercall16 -- Invoke the specified hypercall
+ * with arguments in registers instead of physical memory.
+ * Avoids the overhead of virt_to_phys for simple hypercalls.
+ */
+u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
+{
+ struct arm_smccc_res res;
+ u64 control;
+
+ control = (u64)code | HV_HYPERCALL_FAST_BIT;
+
+ arm_smccc_1_1_hvc(HV_FUNC_ID, control, input1, input2, &res);
+ return res.a0;
+}
+EXPORT_SYMBOL_GPL(hv_do_fast_hypercall16);
+
+/*
* Set a single VP register to a 64-bit value.
*/
void hv_set_vpreg(u32 msr, u64 value)
diff --git a/arch/arm64/hyperv/mshyperv.c b/arch/arm64/hyperv/mshyperv.c
index fc49949b7df6..4e27cc29c79e 100644
--- a/arch/arm64/hyperv/mshyperv.c
+++ b/arch/arm64/hyperv/mshyperv.c
@@ -26,6 +26,7 @@ int hv_get_hypervisor_version(union hv_hypervisor_version_info *info)
return 0;
}
+EXPORT_SYMBOL_GPL(hv_get_hypervisor_version);
static int __init hyperv_init(void)
{
@@ -61,6 +62,8 @@ static int __init hyperv_init(void)
ms_hyperv.features, ms_hyperv.priv_high, ms_hyperv.hints,
ms_hyperv.misc_features);
+ hv_identify_partition_type();
+
ret = hv_common_init();
if (ret)
return ret;
@@ -72,6 +75,9 @@ static int __init hyperv_init(void)
return ret;
}
+ if (ms_hyperv.priv_high & HV_ACCESS_PARTITION_ID)
+ hv_get_partition_id();
+
ms_hyperv_late_init();
hyperv_initialized = true;
diff --git a/arch/arm64/include/asm/mshyperv.h b/arch/arm64/include/asm/mshyperv.h
index 2e2f83bafcfb..b721d3134ab6 100644
--- a/arch/arm64/include/asm/mshyperv.h
+++ b/arch/arm64/include/asm/mshyperv.h
@@ -40,6 +40,19 @@ static inline u64 hv_get_msr(unsigned int reg)
return hv_get_vpreg(reg);
}
+/*
+ * Nested is not supported on arm64
+ */
+static inline void hv_set_non_nested_msr(unsigned int reg, u64 value)
+{
+ hv_set_msr(reg, value);
+}
+
+static inline u64 hv_get_non_nested_msr(unsigned int reg)
+{
+ return hv_get_msr(reg);
+}
+
/* SMCCC hypercall parameters */
#define HV_SMCCC_FUNC_NUMBER 1
#define HV_FUNC_ID ARM_SMCCC_CALL_VAL( \
diff --git a/arch/x86/hyperv/Makefile b/arch/x86/hyperv/Makefile
index 3a1548054b48..d55f494f471d 100644
--- a/arch/x86/hyperv/Makefile
+++ b/arch/x86/hyperv/Makefile
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-y := hv_init.o mmu.o nested.o irqdomain.o ivm.o
-obj-$(CONFIG_X86_64) += hv_apic.o hv_proc.o
+obj-$(CONFIG_X86_64) += hv_apic.o
obj-$(CONFIG_HYPERV_VTL_MODE) += hv_vtl.o
ifdef CONFIG_X86_64
diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c
index f022d5f64fb6..6d91ac5f9836 100644
--- a/arch/x86/hyperv/hv_apic.c
+++ b/arch/x86/hyperv/hv_apic.c
@@ -145,6 +145,11 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector,
ipi_arg->vp_set.format = HV_GENERIC_SET_ALL;
}
+ /*
+ * For this hypercall, Hyper-V treats the valid_bank_mask field
+ * of ipi_arg->vp_set as part of the fixed size input header.
+ * So the variable input header size is equal to nr_bank.
+ */
status = hv_do_rep_hypercall(HVCALL_SEND_IPI_EX, 0, nr_bank,
ipi_arg, NULL);
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 173005e6a95d..ddeb40930bc8 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -34,9 +34,6 @@
#include <clocksource/hyperv_timer.h>
#include <linux/highmem.h>
-u64 hv_current_partition_id = ~0ull;
-EXPORT_SYMBOL_GPL(hv_current_partition_id);
-
void *hv_hypercall_pg;
EXPORT_SYMBOL_GPL(hv_hypercall_pg);
@@ -93,7 +90,7 @@ static int hv_cpu_init(unsigned int cpu)
return 0;
hvp = &hv_vp_assist_page[cpu];
- if (hv_root_partition) {
+ if (hv_root_partition()) {
/*
* For root partition we get the hypervisor provided VP assist
* page, instead of allocating a new page.
@@ -245,7 +242,7 @@ static int hv_cpu_die(unsigned int cpu)
if (hv_vp_assist_page && hv_vp_assist_page[cpu]) {
union hv_vp_assist_msr_contents msr = { 0 };
- if (hv_root_partition) {
+ if (hv_root_partition()) {
/*
* For root partition the VP assist page is mapped to
* hypervisor provided page, and thus we unmap the
@@ -320,7 +317,7 @@ static int hv_suspend(void)
union hv_x64_msr_hypercall_contents hypercall_msr;
int ret;
- if (hv_root_partition)
+ if (hv_root_partition())
return -EPERM;
/*
@@ -393,24 +390,6 @@ static void __init hv_stimer_setup_percpu_clockev(void)
old_setup_percpu_clockev();
}
-static void __init hv_get_partition_id(void)
-{
- struct hv_get_partition_id *output_page;
- u64 status;
- unsigned long flags;
-
- local_irq_save(flags);
- output_page = *this_cpu_ptr(hyperv_pcpu_output_arg);
- status = hv_do_hypercall(HVCALL_GET_PARTITION_ID, NULL, output_page);
- if (!hv_result_success(status)) {
- /* No point in proceeding if this failed */
- pr_err("Failed to get partition ID: %lld\n", status);
- BUG();
- }
- hv_current_partition_id = output_page->partition_id;
- local_irq_restore(flags);
-}
-
#if IS_ENABLED(CONFIG_HYPERV_VTL_MODE)
static u8 __init get_vtl(void)
{
@@ -539,7 +518,7 @@ void __init hyperv_init(void)
rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
hypercall_msr.enable = 1;
- if (hv_root_partition) {
+ if (hv_root_partition()) {
struct page *pg;
void *src;
@@ -605,17 +584,15 @@ skip_hypercall_pg_init:
register_syscore_ops(&hv_syscore_ops);
- if (cpuid_ebx(HYPERV_CPUID_FEATURES) & HV_ACCESS_PARTITION_ID)
+ if (ms_hyperv.priv_high & HV_ACCESS_PARTITION_ID)
hv_get_partition_id();
- BUG_ON(hv_root_partition && hv_current_partition_id == ~0ull);
-
#ifdef CONFIG_PCI_MSI
/*
* If we're running as root, we want to create our own PCI MSI domain.
* We can't set this in hv_pci_init because that would be too late.
*/
- if (hv_root_partition)
+ if (hv_root_partition())
x86_init.irqs.create_pci_msi_domain = hv_create_pci_msi_domain;
#endif
diff --git a/arch/x86/hyperv/hv_proc.c b/arch/x86/hyperv/hv_proc.c
deleted file mode 100644
index ac4c834d4435..000000000000
--- a/arch/x86/hyperv/hv_proc.c
+++ /dev/null
@@ -1,198 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/types.h>
-#include <linux/vmalloc.h>
-#include <linux/mm.h>
-#include <linux/clockchips.h>
-#include <linux/slab.h>
-#include <linux/cpuhotplug.h>
-#include <linux/minmax.h>
-#include <asm/hypervisor.h>
-#include <asm/mshyperv.h>
-#include <asm/apic.h>
-
-#include <asm/trace/hyperv.h>
-
-/*
- * See struct hv_deposit_memory. The first u64 is partition ID, the rest
- * are GPAs.
- */
-#define HV_DEPOSIT_MAX (HV_HYP_PAGE_SIZE / sizeof(u64) - 1)
-
-/* Deposits exact number of pages. Must be called with interrupts enabled. */
-int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages)
-{
- struct page **pages, *page;
- int *counts;
- int num_allocations;
- int i, j, page_count;
- int order;
- u64 status;
- int ret;
- u64 base_pfn;
- struct hv_deposit_memory *input_page;
- unsigned long flags;
-
- if (num_pages > HV_DEPOSIT_MAX)
- return -E2BIG;
- if (!num_pages)
- return 0;
-
- /* One buffer for page pointers and counts */
- page = alloc_page(GFP_KERNEL);
- if (!page)
- return -ENOMEM;
- pages = page_address(page);
-
- counts = kcalloc(HV_DEPOSIT_MAX, sizeof(int), GFP_KERNEL);
- if (!counts) {
- free_page((unsigned long)pages);
- return -ENOMEM;
- }
-
- /* Allocate all the pages before disabling interrupts */
- i = 0;
-
- while (num_pages) {
- /* Find highest order we can actually allocate */
- order = 31 - __builtin_clz(num_pages);
-
- while (1) {
- pages[i] = alloc_pages_node(node, GFP_KERNEL, order);
- if (pages[i])
- break;
- if (!order) {
- ret = -ENOMEM;
- num_allocations = i;
- goto err_free_allocations;
- }
- --order;
- }
-
- split_page(pages[i], order);
- counts[i] = 1 << order;
- num_pages -= counts[i];
- i++;
- }
- num_allocations = i;
-
- local_irq_save(flags);
-
- input_page = *this_cpu_ptr(hyperv_pcpu_input_arg);
-
- input_page->partition_id = partition_id;
-
- /* Populate gpa_page_list - these will fit on the input page */
- for (i = 0, page_count = 0; i < num_allocations; ++i) {
- base_pfn = page_to_pfn(pages[i]);
- for (j = 0; j < counts[i]; ++j, ++page_count)
- input_page->gpa_page_list[page_count] = base_pfn + j;
- }
- status = hv_do_rep_hypercall(HVCALL_DEPOSIT_MEMORY,
- page_count, 0, input_page, NULL);
- local_irq_restore(flags);
- if (!hv_result_success(status)) {
- pr_err("Failed to deposit pages: %lld\n", status);
- ret = hv_result(status);
- goto err_free_allocations;
- }
-
- ret = 0;
- goto free_buf;
-
-err_free_allocations:
- for (i = 0; i < num_allocations; ++i) {
- base_pfn = page_to_pfn(pages[i]);
- for (j = 0; j < counts[i]; ++j)
- __free_page(pfn_to_page(base_pfn + j));
- }
-
-free_buf:
- free_page((unsigned long)pages);
- kfree(counts);
- return ret;
-}
-
-int hv_call_add_logical_proc(int node, u32 lp_index, u32 apic_id)
-{
- struct hv_input_add_logical_processor *input;
- struct hv_output_add_logical_processor *output;
- u64 status;
- unsigned long flags;
- int ret = HV_STATUS_SUCCESS;
-
- /*
- * When adding a logical processor, the hypervisor may return
- * HV_STATUS_INSUFFICIENT_MEMORY. When that happens, we deposit more
- * pages and retry.
- */
- do {
- local_irq_save(flags);
-
- input = *this_cpu_ptr(hyperv_pcpu_input_arg);
- /* We don't do anything with the output right now */
- output = *this_cpu_ptr(hyperv_pcpu_output_arg);
-
- input->lp_index = lp_index;
- input->apic_id = apic_id;
- input->proximity_domain_info = hv_numa_node_to_pxm_info(node);
- status = hv_do_hypercall(HVCALL_ADD_LOGICAL_PROCESSOR,
- input, output);
- local_irq_restore(flags);
-
- if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
- if (!hv_result_success(status)) {
- pr_err("%s: cpu %u apic ID %u, %lld\n", __func__,
- lp_index, apic_id, status);
- ret = hv_result(status);
- }
- break;
- }
- ret = hv_call_deposit_pages(node, hv_current_partition_id, 1);
- } while (!ret);
-
- return ret;
-}
-
-int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags)
-{
- struct hv_create_vp *input;
- u64 status;
- unsigned long irq_flags;
- int ret = HV_STATUS_SUCCESS;
-
- /* Root VPs don't seem to need pages deposited */
- if (partition_id != hv_current_partition_id) {
- /* The value 90 is empirically determined. It may change. */
- ret = hv_call_deposit_pages(node, partition_id, 90);
- if (ret)
- return ret;
- }
-
- do {
- local_irq_save(irq_flags);
-
- input = *this_cpu_ptr(hyperv_pcpu_input_arg);
-
- input->partition_id = partition_id;
- input->vp_index = vp_index;
- input->flags = flags;
- input->subnode_type = HV_SUBNODE_ANY;
- input->proximity_domain_info = hv_numa_node_to_pxm_info(node);
- status = hv_do_hypercall(HVCALL_CREATE_VP, input, NULL);
- local_irq_restore(irq_flags);
-
- if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
- if (!hv_result_success(status)) {
- pr_err("%s: vcpu %u, lp %u, %lld\n", __func__,
- vp_index, flags, status);
- ret = hv_result(status);
- }
- break;
- }
- ret = hv_call_deposit_pages(node, partition_id, 1);
-
- } while (!ret);
-
- return ret;
-}
-
diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c
index 3f4e20d7b724..13242ed8ff16 100644
--- a/arch/x86/hyperv/hv_vtl.c
+++ b/arch/x86/hyperv/hv_vtl.c
@@ -12,6 +12,7 @@
#include <asm/i8259.h>
#include <asm/mshyperv.h>
#include <asm/realmode.h>
+#include <asm/reboot.h>
#include <../kernel/smpboot.h>
extern struct boot_params boot_params;
@@ -22,6 +23,36 @@ static bool __init hv_vtl_msi_ext_dest_id(void)
return true;
}
+/*
+ * The `native_machine_emergency_restart` function from `reboot.c` writes
+ * to the physical address 0x472 to indicate the type of reboot for the
+ * firmware. We cannot have that in VSM as the memory composition might
+ * be more generic, and such write effectively corrupts the memory thus
+ * making diagnostics harder at the very least.
+ */
+static void __noreturn hv_vtl_emergency_restart(void)
+{
+ /*
+ * Cause a triple fault and the immediate reset. Here the code does not run
+ * on the top of any firmware, whereby cannot reach out to its services.
+ * The inifinite loop is for the improbable case that the triple fault does
+ * not work and have to preserve the state intact for debugging.
+ */
+ for (;;) {
+ idt_invalidate();
+ __asm__ __volatile__("int3");
+ }
+}
+
+/*
+ * The only way to restart in the VTL mode is to triple fault as the kernel runs
+ * as firmware.
+ */
+static void __noreturn hv_vtl_restart(char __maybe_unused *cmd)
+{
+ hv_vtl_emergency_restart();
+}
+
void __init hv_vtl_init_platform(void)
{
pr_info("Linux runs in Hyper-V Virtual Trust Level\n");
@@ -236,6 +267,9 @@ static int hv_vtl_wakeup_secondary_cpu(u32 apicid, unsigned long start_eip)
int __init hv_vtl_early_init(void)
{
+ machine_ops.emergency_restart = hv_vtl_emergency_restart;
+ machine_ops.restart = hv_vtl_restart;
+
/*
* `boot_cpu_has` returns the runtime feature support,
* and here is the earliest it can be used.
diff --git a/arch/x86/hyperv/irqdomain.c b/arch/x86/hyperv/irqdomain.c
index 64b921360b0f..31f0d29cbc5e 100644
--- a/arch/x86/hyperv/irqdomain.c
+++ b/arch/x86/hyperv/irqdomain.c
@@ -64,7 +64,7 @@ static int hv_map_interrupt(union hv_device_id device_id, bool level,
local_irq_restore(flags);
if (!hv_result_success(status))
- pr_err("%s: hypercall failed, status %lld\n", __func__, status);
+ hv_status_err(status, "\n");
return hv_result(status);
}
@@ -224,7 +224,7 @@ static void hv_irq_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
kfree(stored_entry);
if (status != HV_STATUS_SUCCESS) {
- pr_debug("%s: failed to unmap, status %lld", __func__, status);
+ hv_status_debug(status, "failed to unmap\n");
return;
}
}
@@ -273,7 +273,7 @@ static void hv_teardown_msi_irq(struct pci_dev *dev, struct irq_data *irqd)
status = hv_unmap_msi_interrupt(dev, &old_entry);
if (status != HV_STATUS_SUCCESS)
- pr_err("%s: hypercall failed, status %lld\n", __func__, status);
+ hv_status_err(status, "\n");
}
static void hv_msi_free_irq(struct irq_domain *domain,
diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
index ec7880271cf9..77bf05f06b9e 100644
--- a/arch/x86/hyperv/ivm.c
+++ b/arch/x86/hyperv/ivm.c
@@ -338,7 +338,7 @@ int hv_snp_boot_ap(u32 cpu, unsigned long start_ip)
vmsa->sev_features = sev_status >> 2;
ret = snp_set_vmsa(vmsa, true);
- if (!ret) {
+ if (ret) {
pr_err("RMPADJUST(%llx) failed: %llx\n", (u64)vmsa, ret);
free_page((u64)vmsa);
return ret;
diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c
index 1f7c3082a36d..cfcb60468b01 100644
--- a/arch/x86/hyperv/mmu.c
+++ b/arch/x86/hyperv/mmu.c
@@ -205,6 +205,10 @@ static u64 hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
/*
* We can flush not more than max_gvas with one hypercall. Flush the
* whole address space if we were asked to do more.
+ *
+ * For these hypercalls, Hyper-V treats the valid_bank_mask field
+ * of flush->hv_vp_set as part of the fixed size input header.
+ * So the variable input header size is equal to nr_bank.
*/
max_gvas =
(PAGE_SIZE - sizeof(*flush) - nr_bank *
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 5e6193dbc97e..bab5ccfc60a7 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -43,8 +43,6 @@ extern bool hyperv_paravisor_present;
extern void *hv_hypercall_pg;
-extern u64 hv_current_partition_id;
-
extern union hv_ghcb * __percpu *hv_ghcb_pg;
bool hv_isolation_type_snp(void);
@@ -58,10 +56,6 @@ u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2);
#define HV_AP_INIT_GPAT_DEFAULT 0x0007040600070406ULL
#define HV_AP_SEGMENT_LIMIT 0xffffffff
-int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages);
-int hv_call_add_logical_proc(int node, u32 lp_index, u32 acpi_id);
-int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags);
-
/*
* If the hypercall involves no input or output parameters, the hypervisor
* ignores the corresponding GPA pointer.
@@ -160,7 +154,7 @@ static inline u64 _hv_do_fast_hypercall8(u64 control, u64 input1)
: "cc", "edi", "esi");
}
#endif
- return hv_status;
+ return hv_status;
}
static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index f285757618fc..3e2533954675 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -33,8 +33,6 @@
#include <asm/numa.h>
#include <asm/svm.h>
-/* Is Linux running as the root partition? */
-bool hv_root_partition;
/* Is Linux running on nested Microsoft Hypervisor */
bool hv_nested;
struct ms_hyperv_info ms_hyperv;
@@ -109,6 +107,7 @@ void hv_set_msr(unsigned int reg, u64 value)
}
EXPORT_SYMBOL_GPL(hv_set_msr);
+static void (*mshv_handler)(void);
static void (*vmbus_handler)(void);
static void (*hv_stimer0_handler)(void);
static void (*hv_kexec_handler)(void);
@@ -119,6 +118,9 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_callback)
struct pt_regs *old_regs = set_irq_regs(regs);
inc_irq_stat(irq_hv_callback_count);
+ if (mshv_handler)
+ mshv_handler();
+
if (vmbus_handler)
vmbus_handler();
@@ -128,6 +130,11 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_callback)
set_irq_regs(old_regs);
}
+void hv_setup_mshv_handler(void (*handler)(void))
+{
+ mshv_handler = handler;
+}
+
void hv_setup_vmbus_handler(void (*handler)(void))
{
vmbus_handler = handler;
@@ -422,6 +429,7 @@ int hv_get_hypervisor_version(union hv_hypervisor_version_info *info)
return 0;
}
+EXPORT_SYMBOL_GPL(hv_get_hypervisor_version);
static void __init ms_hyperv_init_platform(void)
{
@@ -436,13 +444,15 @@ static void __init ms_hyperv_init_platform(void)
*/
ms_hyperv.features = cpuid_eax(HYPERV_CPUID_FEATURES);
ms_hyperv.priv_high = cpuid_ebx(HYPERV_CPUID_FEATURES);
+ ms_hyperv.ext_features = cpuid_ecx(HYPERV_CPUID_FEATURES);
ms_hyperv.misc_features = cpuid_edx(HYPERV_CPUID_FEATURES);
ms_hyperv.hints = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO);
hv_max_functions_eax = cpuid_eax(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS);
- pr_info("Hyper-V: privilege flags low 0x%x, high 0x%x, hints 0x%x, misc 0x%x\n",
- ms_hyperv.features, ms_hyperv.priv_high, ms_hyperv.hints,
+ pr_info("Hyper-V: privilege flags low %#x, high %#x, ext %#x, hints %#x, misc %#x\n",
+ ms_hyperv.features, ms_hyperv.priv_high,
+ ms_hyperv.ext_features, ms_hyperv.hints,
ms_hyperv.misc_features);
ms_hyperv.max_vp_index = cpuid_eax(HYPERV_CPUID_IMPLEMENT_LIMITS);
@@ -451,25 +461,7 @@ static void __init ms_hyperv_init_platform(void)
pr_debug("Hyper-V: max %u virtual processors, %u logical processors\n",
ms_hyperv.max_vp_index, ms_hyperv.max_lp_index);
- /*
- * Check CPU management privilege.
- *
- * To mirror what Windows does we should extract CPU management
- * features and use the ReservedIdentityBit to detect if Linux is the
- * root partition. But that requires negotiating CPU management
- * interface (a process to be finalized). For now, use the privilege
- * flag as the indicator for running as root.
- *
- * Hyper-V should never specify running as root and as a Confidential
- * VM. But to protect against a compromised/malicious Hyper-V trying
- * to exploit root behavior to expose Confidential VM memory, ignore
- * the root partition setting if also a Confidential VM.
- */
- if ((ms_hyperv.priv_high & HV_CPU_MANAGEMENT) &&
- !(ms_hyperv.priv_high & HV_ISOLATION)) {
- hv_root_partition = true;
- pr_info("Hyper-V: running as root partition\n");
- }
+ hv_identify_partition_type();
if (ms_hyperv.hints & HV_X64_HYPERV_NESTED) {
hv_nested = true;
@@ -618,7 +610,7 @@ static void __init ms_hyperv_init_platform(void)
# ifdef CONFIG_SMP
smp_ops.smp_prepare_boot_cpu = hv_smp_prepare_boot_cpu;
- if (hv_root_partition ||
+ if (hv_root_partition() ||
(!ms_hyperv.paravisor_present && hv_isolation_type_snp()))
smp_ops.smp_prepare_cpus = hv_smp_prepare_cpus;
# endif