summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-03-25 14:47:04 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2025-03-25 14:47:04 -0700
commita5b3d8660b049779880c790549ff3fef02f6922c (patch)
treedf07a0fd239a926a8713d22325497ac46bebd745 /include
parentdce3ab4c57e662ae019c22e7c2f2aa887617beae (diff)
parent628cc040b3a2980df6032766e8ef0688e981ab95 (diff)
Merge tag 'hyperv-next-signed-20250324' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux
Pull hyperv updates from Wei Liu: - Add support for running as the root partition in Hyper-V (Microsoft Hypervisor) by exposing /dev/mshv (Nuno and various people) - Add support for CPU offlining in Hyper-V (Hamza Mahfooz) - Misc fixes and cleanups (Roman Kisel, Tianyu Lan, Wei Liu, Michael Kelley, Thorsten Blum) * tag 'hyperv-next-signed-20250324' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux: (24 commits) x86/hyperv: fix an indentation issue in mshyperv.h x86/hyperv: Add comments about hv_vpset and var size hypercall input args Drivers: hv: Introduce mshv_root module to expose /dev/mshv to VMMs hyperv: Add definitions for root partition driver to hv headers x86: hyperv: Add mshv_handler() irq handler and setup function Drivers: hv: Introduce per-cpu event ring tail Drivers: hv: Export some functions for use by root partition module acpi: numa: Export node_to_pxm() hyperv: Introduce hv_recommend_using_aeoi() arm64/hyperv: Add some missing functions to arm64 x86/mshyperv: Add support for extended Hyper-V features hyperv: Log hypercall status codes as strings x86/hyperv: Fix check of return value from snp_set_vmsa() x86/hyperv: Add VTL mode callback for restarting the system x86/hyperv: Add VTL mode emergency restart callback hyperv: Remove unused union and structs hyperv: Add CONFIG_MSHV_ROOT to gate root partition support hyperv: Change hv_root_partition into a function hyperv: Convert hypercall statuses to linux error codes drivers/hv: add CPU offlining support ...
Diffstat (limited to 'include')
-rw-r--r--include/asm-generic/mshyperv.h72
-rw-r--r--include/hyperv/hvgdk_mini.h83
-rw-r--r--include/hyperv/hvhdk.h132
-rw-r--r--include/hyperv/hvhdk_mini.h91
-rw-r--r--include/linux/hyperv.h57
-rw-r--r--include/uapi/linux/mshv.h291
6 files changed, 653 insertions, 73 deletions
diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
index a7bbe504e4f3..ccccb1cbf7df 100644
--- a/include/asm-generic/mshyperv.h
+++ b/include/asm-generic/mshyperv.h
@@ -28,9 +28,15 @@
#define VTPM_BASE_ADDRESS 0xfed40000
+enum hv_partition_type {
+ HV_PARTITION_TYPE_GUEST,
+ HV_PARTITION_TYPE_ROOT,
+};
+
struct ms_hyperv_info {
u32 features;
u32 priv_high;
+ u32 ext_features;
u32 misc_features;
u32 hints;
u32 nested_features;
@@ -58,15 +64,32 @@ struct ms_hyperv_info {
};
extern struct ms_hyperv_info ms_hyperv;
extern bool hv_nested;
+extern u64 hv_current_partition_id;
+extern enum hv_partition_type hv_curr_partition_type;
extern void * __percpu *hyperv_pcpu_input_arg;
extern void * __percpu *hyperv_pcpu_output_arg;
-extern u64 hv_do_hypercall(u64 control, void *inputaddr, void *outputaddr);
-extern u64 hv_do_fast_hypercall8(u16 control, u64 input8);
+u64 hv_do_hypercall(u64 control, void *inputaddr, void *outputaddr);
+u64 hv_do_fast_hypercall8(u16 control, u64 input8);
+u64 hv_do_fast_hypercall16(u16 control, u64 input1, u64 input2);
+
bool hv_isolation_type_snp(void);
bool hv_isolation_type_tdx(void);
+/*
+ * On architectures where Hyper-V doesn't support AEOI (e.g., ARM64),
+ * it doesn't provide a recommendation flag and AEOI must be disabled.
+ */
+static inline bool hv_recommend_using_aeoi(void)
+{
+#ifdef HV_DEPRECATING_AEOI_RECOMMENDED
+ return !(ms_hyperv.hints & HV_DEPRECATING_AEOI_RECOMMENDED);
+#else
+ return false;
+#endif
+}
+
static inline struct hv_proximity_domain_info hv_numa_node_to_pxm_info(int node)
{
struct hv_proximity_domain_info pxm_info = {};
@@ -185,12 +208,11 @@ void hv_setup_kexec_handler(void (*handler)(void));
void hv_remove_kexec_handler(void);
void hv_setup_crash_handler(void (*handler)(struct pt_regs *regs));
void hv_remove_crash_handler(void);
+void hv_setup_mshv_handler(void (*handler)(void));
extern int vmbus_interrupt;
extern int vmbus_irq;
-extern bool hv_root_partition;
-
#if IS_ENABLED(CONFIG_HYPERV)
/*
* Hypervisor's notion of virtual processor ID is different from
@@ -207,10 +229,12 @@ extern u64 (*hv_read_reference_counter)(void);
#define VP_INVAL U32_MAX
int __init hv_common_init(void);
+void __init hv_get_partition_id(void);
void __init hv_common_free(void);
void __init ms_hyperv_late_init(void);
int hv_common_cpu_init(unsigned int cpu);
int hv_common_cpu_die(unsigned int cpu);
+void hv_identify_partition_type(void);
void *hv_alloc_hyperv_page(void);
void *hv_alloc_hyperv_zeroed_page(void);
@@ -291,6 +315,20 @@ static inline int cpumask_to_vpset_skip(struct hv_vpset *vpset,
return __cpumask_to_vpset(vpset, cpus, func);
}
+#define _hv_status_fmt(fmt) "%s: Hyper-V status: %#x = %s: " fmt
+#define hv_status_printk(level, status, fmt, ...) \
+do { \
+ u64 __status = (status); \
+ pr_##level(_hv_status_fmt(fmt), __func__, hv_result(__status), \
+ hv_result_to_string(__status), ##__VA_ARGS__); \
+} while (0)
+#define hv_status_err(status, fmt, ...) \
+ hv_status_printk(err, status, fmt, ##__VA_ARGS__)
+#define hv_status_debug(status, fmt, ...) \
+ hv_status_printk(debug, status, fmt, ##__VA_ARGS__)
+
+const char *hv_result_to_string(u64 hv_status);
+int hv_result_to_errno(u64 status);
void hyperv_report_panic(struct pt_regs *regs, long err, bool in_die);
bool hv_is_hyperv_initialized(void);
bool hv_is_hibernation_supported(void);
@@ -303,6 +341,7 @@ void hyperv_cleanup(void);
bool hv_query_ext_cap(u64 cap_query);
void hv_setup_dma_ops(struct device *dev, bool coherent);
#else /* CONFIG_HYPERV */
+static inline void hv_identify_partition_type(void) {}
static inline bool hv_is_hyperv_initialized(void) { return false; }
static inline bool hv_is_hibernation_supported(void) { return false; }
static inline void hyperv_cleanup(void) {}
@@ -314,4 +353,29 @@ static inline enum hv_isolation_type hv_get_isolation_type(void)
}
#endif /* CONFIG_HYPERV */
+#if IS_ENABLED(CONFIG_MSHV_ROOT)
+static inline bool hv_root_partition(void)
+{
+ return hv_curr_partition_type == HV_PARTITION_TYPE_ROOT;
+}
+int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages);
+int hv_call_add_logical_proc(int node, u32 lp_index, u32 acpi_id);
+int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags);
+
+#else /* CONFIG_MSHV_ROOT */
+static inline bool hv_root_partition(void) { return false; }
+static inline int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages)
+{
+ return -EOPNOTSUPP;
+}
+static inline int hv_call_add_logical_proc(int node, u32 lp_index, u32 acpi_id)
+{
+ return -EOPNOTSUPP;
+}
+static inline int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags)
+{
+ return -EOPNOTSUPP;
+}
+#endif /* CONFIG_MSHV_ROOT */
+
#endif
diff --git a/include/hyperv/hvgdk_mini.h b/include/hyperv/hvgdk_mini.h
index 155615175965..abf0bd76e370 100644
--- a/include/hyperv/hvgdk_mini.h
+++ b/include/hyperv/hvgdk_mini.h
@@ -13,7 +13,7 @@ struct hv_u128 {
u64 high_part;
} __packed;
-/* NOTE: when adding below, update hv_status_to_string() */
+/* NOTE: when adding below, update hv_result_to_string() */
#define HV_STATUS_SUCCESS 0x0
#define HV_STATUS_INVALID_HYPERCALL_CODE 0x2
#define HV_STATUS_INVALID_HYPERCALL_INPUT 0x3
@@ -51,6 +51,7 @@ struct hv_u128 {
#define HV_HYP_PAGE_SHIFT 12
#define HV_HYP_PAGE_SIZE BIT(HV_HYP_PAGE_SHIFT)
#define HV_HYP_PAGE_MASK (~(HV_HYP_PAGE_SIZE - 1))
+#define HV_HYP_LARGE_PAGE_SHIFT 21
#define HV_PARTITION_ID_INVALID ((u64)0)
#define HV_PARTITION_ID_SELF ((u64)-1)
@@ -182,7 +183,7 @@ struct hv_tsc_emulation_control { /* HV_TSC_INVARIANT_CONTROL */
#endif /* CONFIG_X86 */
-struct hv_get_partition_id { /* HV_OUTPUT_GET_PARTITION_ID */
+struct hv_output_get_partition_id {
u64 partition_id;
} __packed;
@@ -204,7 +205,14 @@ union hv_reference_tsc_msr {
/* The number of vCPUs in one sparse bank */
#define HV_VCPUS_PER_SPARSE_BANK (64)
-/* Some of Hyper-V structs do not use hv_vpset where linux uses them */
+/*
+ * Some of Hyper-V structs do not use hv_vpset where linux uses them.
+ *
+ * struct hv_vpset is usually used as part of hypercall input. The portion
+ * that counts as "fixed size input header" vs. "variable size input header"
+ * varies per hypercall. See comments at relevant hypercall call sites as to
+ * how the "valid_bank_mask" field should be accounted.
+ */
struct hv_vpset { /* HV_VP_SET */
u64 format;
u64 valid_bank_mask;
@@ -374,6 +382,10 @@ union hv_hypervisor_version_info {
#define HV_SHARED_GPA_BOUNDARY_ACTIVE BIT(5)
#define HV_SHARED_GPA_BOUNDARY_BITS GENMASK(11, 6)
+/* HYPERV_CPUID_FEATURES.ECX bits. */
+#define HV_VP_DISPATCH_INTERRUPT_INJECTION_AVAILABLE BIT(9)
+#define HV_VP_GHCB_ROOT_MAPPING_AVAILABLE BIT(10)
+
enum hv_isolation_type {
HV_ISOLATION_TYPE_NONE = 0, /* HV_PARTITION_ISOLATION_TYPE_NONE */
HV_ISOLATION_TYPE_VBS = 1,
@@ -436,10 +448,13 @@ union hv_vp_assist_msr_contents { /* HV_REGISTER_VP_ASSIST_PAGE */
#define HVCALL_WITHDRAW_MEMORY 0x0049
#define HVCALL_MAP_GPA_PAGES 0x004b
#define HVCALL_UNMAP_GPA_PAGES 0x004c
+#define HVCALL_INSTALL_INTERCEPT 0x004d
#define HVCALL_CREATE_VP 0x004e
#define HVCALL_DELETE_VP 0x004f
#define HVCALL_GET_VP_REGISTERS 0x0050
#define HVCALL_SET_VP_REGISTERS 0x0051
+#define HVCALL_TRANSLATE_VIRTUAL_ADDRESS 0x0052
+#define HVCALL_CLEAR_VIRTUAL_INTERRUPT 0x0056
#define HVCALL_DELETE_PORT 0x0058
#define HVCALL_DISCONNECT_PORT 0x005b
#define HVCALL_POST_MESSAGE 0x005c
@@ -447,12 +462,15 @@ union hv_vp_assist_msr_contents { /* HV_REGISTER_VP_ASSIST_PAGE */
#define HVCALL_POST_DEBUG_DATA 0x0069
#define HVCALL_RETRIEVE_DEBUG_DATA 0x006a
#define HVCALL_RESET_DEBUG_SESSION 0x006b
+#define HVCALL_MAP_STATS_PAGE 0x006c
+#define HVCALL_UNMAP_STATS_PAGE 0x006d
#define HVCALL_ADD_LOGICAL_PROCESSOR 0x0076
#define HVCALL_GET_SYSTEM_PROPERTY 0x007b
#define HVCALL_MAP_DEVICE_INTERRUPT 0x007c
#define HVCALL_UNMAP_DEVICE_INTERRUPT 0x007d
#define HVCALL_RETARGET_INTERRUPT 0x007e
#define HVCALL_NOTIFY_PORT_RING_EMPTY 0x008b
+#define HVCALL_REGISTER_INTERCEPT_RESULT 0x0091
#define HVCALL_ASSERT_VIRTUAL_INTERRUPT 0x0094
#define HVCALL_CREATE_PORT 0x0095
#define HVCALL_CONNECT_PORT 0x0096
@@ -460,12 +478,18 @@ union hv_vp_assist_msr_contents { /* HV_REGISTER_VP_ASSIST_PAGE */
#define HVCALL_GET_VP_ID_FROM_APIC_ID 0x009a
#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af
#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0
+#define HVCALL_SIGNAL_EVENT_DIRECT 0x00c0
+#define HVCALL_POST_MESSAGE_DIRECT 0x00c1
#define HVCALL_DISPATCH_VP 0x00c2
+#define HVCALL_GET_GPA_PAGES_ACCESS_STATES 0x00c9
+#define HVCALL_ACQUIRE_SPARSE_SPA_PAGE_HOST_ACCESS 0x00d7
+#define HVCALL_RELEASE_SPARSE_SPA_PAGE_HOST_ACCESS 0x00d8
#define HVCALL_MODIFY_SPARSE_GPA_PAGE_HOST_VISIBILITY 0x00db
#define HVCALL_MAP_VP_STATE_PAGE 0x00e1
#define HVCALL_UNMAP_VP_STATE_PAGE 0x00e2
#define HVCALL_GET_VP_STATE 0x00e3
#define HVCALL_SET_VP_STATE 0x00e4
+#define HVCALL_GET_VP_CPUID_VALUES 0x00f4
#define HVCALL_MMIO_READ 0x0106
#define HVCALL_MMIO_WRITE 0x0107
@@ -775,10 +799,10 @@ struct hv_message_page {
/* Define timer message payload structure. */
struct hv_timer_message_payload {
- __u32 timer_index;
- __u32 reserved;
- __u64 expiration_time; /* When the timer expired */
- __u64 delivery_time; /* When the message was delivered */
+ u32 timer_index;
+ u32 reserved;
+ u64 expiration_time; /* When the timer expired */
+ u64 delivery_time; /* When the message was delivered */
} __packed;
struct hv_x64_segment_register {
@@ -807,6 +831,8 @@ struct hv_x64_table_register {
u64 base;
} __packed;
+#define HV_NORMAL_VTL 0
+
union hv_input_vtl {
u8 as_uint8;
struct {
@@ -1325,6 +1351,49 @@ struct hv_retarget_device_interrupt { /* HV_INPUT_RETARGET_DEVICE_INTERRUPT */
struct hv_device_interrupt_target int_target;
} __packed __aligned(8);
+enum hv_intercept_type {
+#if defined(CONFIG_X86)
+ HV_INTERCEPT_TYPE_X64_IO_PORT = 0x00000000,
+ HV_INTERCEPT_TYPE_X64_MSR = 0x00000001,
+ HV_INTERCEPT_TYPE_X64_CPUID = 0x00000002,
+#endif
+ HV_INTERCEPT_TYPE_EXCEPTION = 0x00000003,
+ /* Used to be HV_INTERCEPT_TYPE_REGISTER */
+ HV_INTERCEPT_TYPE_RESERVED0 = 0x00000004,
+ HV_INTERCEPT_TYPE_MMIO = 0x00000005,
+#if defined(CONFIG_X86)
+ HV_INTERCEPT_TYPE_X64_GLOBAL_CPUID = 0x00000006,
+ HV_INTERCEPT_TYPE_X64_APIC_SMI = 0x00000007,
+#endif
+ HV_INTERCEPT_TYPE_HYPERCALL = 0x00000008,
+#if defined(CONFIG_X86)
+ HV_INTERCEPT_TYPE_X64_APIC_INIT_SIPI = 0x00000009,
+ HV_INTERCEPT_MC_UPDATE_PATCH_LEVEL_MSR_READ = 0x0000000A,
+ HV_INTERCEPT_TYPE_X64_APIC_WRITE = 0x0000000B,
+ HV_INTERCEPT_TYPE_X64_MSR_INDEX = 0x0000000C,
+#endif
+ HV_INTERCEPT_TYPE_MAX,
+ HV_INTERCEPT_TYPE_INVALID = 0xFFFFFFFF,
+};
+
+union hv_intercept_parameters {
+ /* HV_INTERCEPT_PARAMETERS is defined to be an 8-byte field. */
+ u64 as_uint64;
+#if defined(CONFIG_X86)
+ /* HV_INTERCEPT_TYPE_X64_IO_PORT */
+ u16 io_port;
+ /* HV_INTERCEPT_TYPE_X64_CPUID */
+ u32 cpuid_index;
+ /* HV_INTERCEPT_TYPE_X64_APIC_WRITE */
+ u32 apic_write_mask;
+ /* HV_INTERCEPT_TYPE_EXCEPTION */
+ u16 exception_vector;
+ /* HV_INTERCEPT_TYPE_X64_MSR_INDEX */
+ u32 msr_index;
+#endif
+ /* N.B. Other intercept types do not have any parameters. */
+};
+
/* Data structures for HVCALL_MMIO_READ and HVCALL_MMIO_WRITE */
#define HV_HYPERCALL_MMIO_MAX_DATA_LENGTH 64
diff --git a/include/hyperv/hvhdk.h b/include/hyperv/hvhdk.h
index 64407c2a3809..b4067ada02cf 100644
--- a/include/hyperv/hvhdk.h
+++ b/include/hyperv/hvhdk.h
@@ -19,11 +19,24 @@
#define HV_VP_REGISTER_PAGE_VERSION_1 1u
+#define HV_VP_REGISTER_PAGE_MAX_VECTOR_COUNT 7
+
+union hv_vp_register_page_interrupt_vectors {
+ u64 as_uint64;
+ struct {
+ u8 vector_count;
+ u8 vector[HV_VP_REGISTER_PAGE_MAX_VECTOR_COUNT];
+ } __packed;
+};
+
struct hv_vp_register_page {
u16 version;
u8 isvalid;
u8 rsvdz;
u32 dirty;
+
+#if IS_ENABLED(CONFIG_X86)
+
union {
struct {
/* General purpose registers
@@ -95,6 +108,22 @@ struct hv_vp_register_page {
union hv_x64_pending_interruption_register pending_interruption;
union hv_x64_interrupt_state_register interrupt_state;
u64 instruction_emulation_hints;
+ u64 xfem;
+
+ /*
+ * Fields from this point are not included in the register page save chunk.
+ * The reserved field is intended to maintain alignment for unsaved fields.
+ */
+ u8 reserved1[0x100];
+
+ /*
+ * Interrupts injected as part of HvCallDispatchVp.
+ */
+ union hv_vp_register_page_interrupt_vectors interrupt_vectors;
+
+#elif IS_ENABLED(CONFIG_ARM64)
+ /* Not yet supported in ARM */
+#endif
} __packed;
#define HV_PARTITION_PROCESSOR_FEATURES_BANKS 2
@@ -299,10 +328,11 @@ union hv_partition_isolation_properties {
#define HV_PARTITION_ISOLATION_HOST_TYPE_RESERVED 0x2
/* Note: Exo partition is enabled by default */
-#define HV_PARTITION_CREATION_FLAG_EXO_PARTITION BIT(8)
-#define HV_PARTITION_CREATION_FLAG_LAPIC_ENABLED BIT(13)
-#define HV_PARTITION_CREATION_FLAG_INTERCEPT_MESSAGE_PAGE_ENABLED BIT(19)
-#define HV_PARTITION_CREATION_FLAG_X2APIC_CAPABLE BIT(22)
+#define HV_PARTITION_CREATION_FLAG_GPA_SUPER_PAGES_ENABLED BIT(4)
+#define HV_PARTITION_CREATION_FLAG_EXO_PARTITION BIT(8)
+#define HV_PARTITION_CREATION_FLAG_LAPIC_ENABLED BIT(13)
+#define HV_PARTITION_CREATION_FLAG_INTERCEPT_MESSAGE_PAGE_ENABLED BIT(19)
+#define HV_PARTITION_CREATION_FLAG_X2APIC_CAPABLE BIT(22)
struct hv_input_create_partition {
u64 flags;
@@ -349,13 +379,23 @@ struct hv_input_set_partition_property {
enum hv_vp_state_page_type {
HV_VP_STATE_PAGE_REGISTERS = 0,
HV_VP_STATE_PAGE_INTERCEPT_MESSAGE = 1,
+ HV_VP_STATE_PAGE_GHCB = 2,
HV_VP_STATE_PAGE_COUNT
};
struct hv_input_map_vp_state_page {
u64 partition_id;
u32 vp_index;
- u32 type; /* enum hv_vp_state_page_type */
+ u16 type; /* enum hv_vp_state_page_type */
+ union hv_input_vtl input_vtl;
+ union {
+ u8 as_uint8;
+ struct {
+ u8 map_location_provided : 1;
+ u8 reserved : 7;
+ };
+ } flags;
+ u64 requested_map_location;
} __packed;
struct hv_output_map_vp_state_page {
@@ -365,7 +405,14 @@ struct hv_output_map_vp_state_page {
struct hv_input_unmap_vp_state_page {
u64 partition_id;
u32 vp_index;
- u32 type; /* enum hv_vp_state_page_type */
+ u16 type; /* enum hv_vp_state_page_type */
+ union hv_input_vtl input_vtl;
+ u8 reserved0;
+} __packed;
+
+struct hv_x64_apic_eoi_message {
+ u32 vp_index;
+ u32 interrupt_vector;
} __packed;
struct hv_opaque_intercept_message {
@@ -515,6 +562,13 @@ struct hv_synthetic_timers_state {
u64 reserved[5];
} __packed;
+struct hv_async_completion_message_payload {
+ u64 partition_id;
+ u32 status;
+ u32 completion_count;
+ u64 sub_status;
+} __packed;
+
union hv_input_delete_vp {
u64 as_uint64[2];
struct {
@@ -649,6 +703,57 @@ struct hv_input_set_vp_state {
union hv_input_set_vp_state_data data[];
} __packed;
+union hv_x64_vp_execution_state {
+ u16 as_uint16;
+ struct {
+ u16 cpl:2;
+ u16 cr0_pe:1;
+ u16 cr0_am:1;
+ u16 efer_lma:1;
+ u16 debug_active:1;
+ u16 interruption_pending:1;
+ u16 vtl:4;
+ u16 enclave_mode:1;
+ u16 interrupt_shadow:1;
+ u16 virtualization_fault_active:1;
+ u16 reserved:2;
+ } __packed;
+};
+
+struct hv_x64_intercept_message_header {
+ u32 vp_index;
+ u8 instruction_length:4;
+ u8 cr8:4; /* Only set for exo partitions */
+ u8 intercept_access_type;
+ union hv_x64_vp_execution_state execution_state;
+ struct hv_x64_segment_register cs_segment;
+ u64 rip;
+ u64 rflags;
+} __packed;
+
+union hv_x64_memory_access_info {
+ u8 as_uint8;
+ struct {
+ u8 gva_valid:1;
+ u8 gva_gpa_valid:1;
+ u8 hypercall_output_pending:1;
+ u8 tlb_locked_no_overlay:1;
+ u8 reserved:4;
+ } __packed;
+};
+
+struct hv_x64_memory_intercept_message {
+ struct hv_x64_intercept_message_header header;
+ u32 cache_type; /* enum hv_cache_type */
+ u8 instruction_byte_count;
+ union hv_x64_memory_access_info memory_access_info;
+ u8 tpr_priority;
+ u8 reserved1;
+ u64 guest_virtual_address;
+ u64 guest_physical_address;
+ u8 instruction_bytes[16];
+} __packed;
+
/*
* Dispatch state for the VP communicated by the hypervisor to the
* VP-dispatching thread in the root on return from HVCALL_DISPATCH_VP.
@@ -716,6 +821,7 @@ static_assert(sizeof(struct hv_vp_signal_pair_scheduler_message) ==
#define HV_DISPATCH_VP_FLAG_SKIP_VP_SPEC_FLUSH 0x8
#define HV_DISPATCH_VP_FLAG_SKIP_CALLER_SPEC_FLUSH 0x10
#define HV_DISPATCH_VP_FLAG_SKIP_CALLER_USER_SPEC_FLUSH 0x20
+#define HV_DISPATCH_VP_FLAG_SCAN_INTERRUPT_INJECTION 0x40
struct hv_input_dispatch_vp {
u64 partition_id;
@@ -730,4 +836,18 @@ struct hv_output_dispatch_vp {
u32 dispatch_event; /* enum hv_vp_dispatch_event */
} __packed;
+struct hv_input_modify_sparse_spa_page_host_access {
+ u32 host_access : 2;
+ u32 reserved : 30;
+ u32 flags;
+ u64 partition_id;
+ u64 spa_page_list[];
+} __packed;
+
+/* hv_input_modify_sparse_spa_page_host_access flags */
+#define HV_MODIFY_SPA_PAGE_HOST_ACCESS_MAKE_EXCLUSIVE 0x1
+#define HV_MODIFY_SPA_PAGE_HOST_ACCESS_MAKE_SHARED 0x2
+#define HV_MODIFY_SPA_PAGE_HOST_ACCESS_LARGE_PAGE 0x4
+#define HV_MODIFY_SPA_PAGE_HOST_ACCESS_HUGE_PAGE 0x8
+
#endif /* _HV_HVHDK_H */
diff --git a/include/hyperv/hvhdk_mini.h b/include/hyperv/hvhdk_mini.h
index f8a39d3e9ce6..42e7876455b5 100644
--- a/include/hyperv/hvhdk_mini.h
+++ b/include/hyperv/hvhdk_mini.h
@@ -36,6 +36,52 @@ enum hv_scheduler_type {
HV_SCHEDULER_TYPE_MAX
};
+/* HV_STATS_AREA_TYPE */
+enum hv_stats_area_type {
+ HV_STATS_AREA_SELF = 0,
+ HV_STATS_AREA_PARENT = 1,
+ HV_STATS_AREA_INTERNAL = 2,
+ HV_STATS_AREA_COUNT
+};
+
+enum hv_stats_object_type {
+ HV_STATS_OBJECT_HYPERVISOR = 0x00000001,
+ HV_STATS_OBJECT_LOGICAL_PROCESSOR = 0x00000002,
+ HV_STATS_OBJECT_PARTITION = 0x00010001,
+ HV_STATS_OBJECT_VP = 0x00010002
+};
+
+union hv_stats_object_identity {
+ /* hv_stats_hypervisor */
+ struct {
+ u8 reserved[15];
+ u8 stats_area_type;
+ } __packed hv;
+
+ /* hv_stats_logical_processor */
+ struct {
+ u32 lp_index;
+ u8 reserved[11];
+ u8 stats_area_type;
+ } __packed lp;
+
+ /* hv_stats_partition */
+ struct {
+ u64 partition_id;
+ u8 reserved[7];
+ u8 stats_area_type;
+ } __packed partition;
+
+ /* hv_stats_vp */
+ struct {
+ u64 partition_id;
+ u32 vp_index;
+ u16 flags;
+ u8 reserved;
+ u8 stats_area_type;
+ } __packed vp;
+};
+
enum hv_partition_property_code {
/* Privilege properties */
HV_PARTITION_PROPERTY_PRIVILEGE_FLAGS = 0x00010000,
@@ -47,19 +93,45 @@ enum hv_partition_property_code {
/* Compatibility properties */
HV_PARTITION_PROPERTY_PROCESSOR_XSAVE_FEATURES = 0x00060002,
+ HV_PARTITION_PROPERTY_XSAVE_STATES = 0x00060007,
HV_PARTITION_PROPERTY_MAX_XSAVE_DATA_SIZE = 0x00060008,
HV_PARTITION_PROPERTY_PROCESSOR_CLOCK_FREQUENCY = 0x00060009,
};
+enum hv_snp_status {
+ HV_SNP_STATUS_NONE = 0,
+ HV_SNP_STATUS_AVAILABLE = 1,
+ HV_SNP_STATUS_INCOMPATIBLE = 2,
+ HV_SNP_STATUS_PSP_UNAVAILABLE = 3,
+ HV_SNP_STATUS_PSP_INIT_FAILED = 4,
+ HV_SNP_STATUS_PSP_BAD_FW_VERSION = 5,
+ HV_SNP_STATUS_BAD_CONFIGURATION = 6,
+ HV_SNP_STATUS_PSP_FW_UPDATE_IN_PROGRESS = 7,
+ HV_SNP_STATUS_PSP_RB_INIT_FAILED = 8,
+ HV_SNP_STATUS_PSP_PLATFORM_STATUS_FAILED = 9,
+ HV_SNP_STATUS_PSP_INIT_LATE_FAILED = 10,
+};
+
enum hv_system_property {
/* Add more values when needed */
HV_SYSTEM_PROPERTY_SCHEDULER_TYPE = 15,
+ HV_DYNAMIC_PROCESSOR_FEATURE_PROPERTY = 21,
+};
+
+enum hv_dynamic_processor_feature_property {
+ /* Add more values when needed */
+ HV_X64_DYNAMIC_PROCESSOR_FEATURE_MAX_ENCRYPTED_PARTITIONS = 13,
+ HV_X64_DYNAMIC_PROCESSOR_FEATURE_SNP_STATUS = 16,
};
struct hv_input_get_system_property {
u32 property_id; /* enum hv_system_property */
union {
u32 as_uint32;
+#if IS_ENABLED(CONFIG_X86)
+ /* enum hv_dynamic_processor_feature_property */
+ u32 hv_processor_feature;
+#endif
/* More fields to be filled in when needed */
};
} __packed;
@@ -67,9 +139,28 @@ struct hv_input_get_system_property {
struct hv_output_get_system_property {
union {
u32 scheduler_type; /* enum hv_scheduler_type */
+#if IS_ENABLED(CONFIG_X86)
+ u64 hv_processor_feature_value;
+#endif
};
} __packed;
+struct hv_input_map_stats_page {
+ u32 type; /* enum hv_stats_object_type */
+ u32 padding;
+ union hv_stats_object_identity identity;
+} __packed;
+
+struct hv_output_map_stats_page {
+ u64 map_location;
+} __packed;
+
+struct hv_input_unmap_stats_page {
+ u32 type; /* enum hv_stats_object_type */
+ u32 padding;
+ union hv_stats_object_identity identity;
+} __packed;
+
struct hv_proximity_domain_flags {
u32 proximity_preferred : 1;
u32 reserved : 30;
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 4179add2864b..675959fb97ba 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -371,19 +371,6 @@ struct vmtransfer_page_packet_header {
struct vmtransfer_page_range ranges[];
} __packed;
-struct vmgpadl_packet_header {
- struct vmpacket_descriptor d;
- u32 gpadl;
- u32 reserved;
-} __packed;
-
-struct vmadd_remove_transfer_page_set {
- struct vmpacket_descriptor d;
- u32 gpadl;
- u16 xfer_pageset_id;
- u16 reserved;
-} __packed;
-
/*
* This structure defines a range in guest physical space that can be made to
* look virtually contiguous.
@@ -395,30 +382,6 @@ struct gpa_range {
};
/*
- * This is the format for an Establish Gpadl packet, which contains a handle by
- * which this GPADL will be known and a set of GPA ranges associated with it.
- * This can be converted to a MDL by the guest OS. If there are multiple GPA
- * ranges, then the resulting MDL will be "chained," representing multiple VA
- * ranges.
- */
-struct vmestablish_gpadl {
- struct vmpacket_descriptor d;
- u32 gpadl;
- u32 range_cnt;
- struct gpa_range range[1];
-} __packed;
-
-/*
- * This is the format for a Teardown Gpadl packet, which indicates that the
- * GPADL handle in the Establish Gpadl packet will never be referenced again.
- */
-struct vmteardown_gpadl {
- struct vmpacket_descriptor d;
- u32 gpadl;
- u32 reserved; /* for alignment to a 8-byte boundary */
-} __packed;
-
-/*
* This is the format for a GPA-Direct packet, which contains a set of GPA
* ranges, in addition to commands and/or data.
*/
@@ -429,25 +392,6 @@ struct vmdata_gpa_direct {
struct gpa_range range[1];
} __packed;
-/* This is the format for a Additional Data Packet. */
-struct vmadditional_data {
- struct vmpacket_descriptor d;
- u64 total_bytes;
- u32 offset;
- u32 byte_cnt;
- unsigned char data[1];
-} __packed;
-
-union vmpacket_largest_possible_header {
- struct vmpacket_descriptor simple_hdr;
- struct vmtransfer_page_packet_header xfer_page_hdr;
- struct vmgpadl_packet_header gpadl_hdr;
- struct vmadd_remove_transfer_page_set add_rm_xfer_page_hdr;
- struct vmestablish_gpadl establish_gpadl_hdr;
- struct vmteardown_gpadl teardown_gpadl_hdr;
- struct vmdata_gpa_direct data_gpa_direct_hdr;
-};
-
#define VMPACKET_DATA_START_ADDRESS(__packet) \
(void *)(((unsigned char *)__packet) + \
((struct vmpacket_descriptor)__packet)->offset8 * 8)
@@ -1661,6 +1605,7 @@ int vmbus_send_tl_connect_request(const guid_t *shv_guest_servie_id,
const guid_t *shv_host_servie_id);
int vmbus_send_modifychannel(struct vmbus_channel *channel, u32 target_vp);
void vmbus_set_event(struct vmbus_channel *channel);
+int vmbus_channel_set_cpu(struct vmbus_channel *channel, u32 target_cpu);
/* Get the start of the ring buffer. */
static inline void *
diff --git a/include/uapi/linux/mshv.h b/include/uapi/linux/mshv.h
new file mode 100644
index 000000000000..876bfe4e4227
--- /dev/null
+++ b/include/uapi/linux/mshv.h
@@ -0,0 +1,291 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Userspace interfaces for /dev/mshv* devices and derived fds
+ *
+ * This file is divided into sections containing data structures and IOCTLs for
+ * a particular set of related devices or derived file descriptors.
+ *
+ * The IOCTL definitions are at the end of each section. They are grouped by
+ * device/fd, so that new IOCTLs can easily be added with a monotonically
+ * increasing number.
+ */
+#ifndef _UAPI_LINUX_MSHV_H
+#define _UAPI_LINUX_MSHV_H
+
+#include <linux/types.h>
+
+#define MSHV_IOCTL 0xB8
+
+/*
+ *******************************************
+ * Entry point to main VMM APIs: /dev/mshv *
+ *******************************************
+ */
+
+enum {
+ MSHV_PT_BIT_LAPIC,
+ MSHV_PT_BIT_X2APIC,
+ MSHV_PT_BIT_GPA_SUPER_PAGES,
+ MSHV_PT_BIT_COUNT,
+};
+
+#define MSHV_PT_FLAGS_MASK ((1 << MSHV_PT_BIT_COUNT) - 1)
+
+enum {
+ MSHV_PT_ISOLATION_NONE,
+ MSHV_PT_ISOLATION_COUNT,
+};
+
+/**
+ * struct mshv_create_partition - arguments for MSHV_CREATE_PARTITION
+ * @pt_flags: Bitmask of 1 << MSHV_PT_BIT_*
+ * @pt_isolation: MSHV_PT_ISOLATION_*
+ *
+ * Returns a file descriptor to act as a handle to a guest partition.
+ * At this point the partition is not yet initialized in the hypervisor.
+ * Some operations must be done with the partition in this state, e.g. setting
+ * so-called "early" partition properties. The partition can then be
+ * initialized with MSHV_INITIALIZE_PARTITION.
+ */
+struct mshv_create_partition {
+ __u64 pt_flags;
+ __u64 pt_isolation;
+};
+
+/* /dev/mshv */
+#define MSHV_CREATE_PARTITION _IOW(MSHV_IOCTL, 0x00, struct mshv_create_partition)
+
+/*
+ ************************
+ * Child partition APIs *
+ ************************
+ */
+
+struct mshv_create_vp {
+ __u32 vp_index;
+};
+
+enum {
+ MSHV_SET_MEM_BIT_WRITABLE,
+ MSHV_SET_MEM_BIT_EXECUTABLE,
+ MSHV_SET_MEM_BIT_UNMAP,
+ MSHV_SET_MEM_BIT_COUNT
+};
+
+#define MSHV_SET_MEM_FLAGS_MASK ((1 << MSHV_SET_MEM_BIT_COUNT) - 1)
+
+/* The hypervisor's "native" page size */
+#define MSHV_HV_PAGE_SIZE 0x1000
+
+/**
+ * struct mshv_user_mem_region - arguments for MSHV_SET_GUEST_MEMORY
+ * @size: Size of the memory region (bytes). Must be aligned to
+ * MSHV_HV_PAGE_SIZE
+ * @guest_pfn: Base guest page number to map
+ * @userspace_addr: Base address of userspace memory. Must be aligned to
+ * MSHV_HV_PAGE_SIZE
+ * @flags: Bitmask of 1 << MSHV_SET_MEM_BIT_*. If (1 << MSHV_SET_MEM_BIT_UNMAP)
+ * is set, ignore other bits.
+ * @rsvd: MBZ
+ *
+ * Map or unmap a region of userspace memory to Guest Physical Addresses (GPA).
+ * Mappings can't overlap in GPA space or userspace.
+ * To unmap, these fields must match an existing mapping.
+ */
+struct mshv_user_mem_region {
+ __u64 size;
+ __u64 guest_pfn;
+ __u64 userspace_addr;
+ __u8 flags;
+ __u8 rsvd[7];
+};
+
+enum {
+ MSHV_IRQFD_BIT_DEASSIGN,
+ MSHV_IRQFD_BIT_RESAMPLE,
+ MSHV_IRQFD_BIT_COUNT,
+};
+
+#define MSHV_IRQFD_FLAGS_MASK ((1 << MSHV_IRQFD_BIT_COUNT) - 1)
+
+struct mshv_user_irqfd {
+ __s32 fd;
+ __s32 resamplefd;
+ __u32 gsi;
+ __u32 flags;
+};
+
+enum {
+ MSHV_IOEVENTFD_BIT_DATAMATCH,
+ MSHV_IOEVENTFD_BIT_PIO,
+ MSHV_IOEVENTFD_BIT_DEASSIGN,
+ MSHV_IOEVENTFD_BIT_COUNT,
+};
+
+#define MSHV_IOEVENTFD_FLAGS_MASK ((1 << MSHV_IOEVENTFD_BIT_COUNT) - 1)
+
+struct mshv_user_ioeventfd {
+ __u64 datamatch;
+ __u64 addr; /* legal pio/mmio address */
+ __u32 len; /* 1, 2, 4, or 8 bytes */
+ __s32 fd;
+ __u32 flags;
+ __u8 rsvd[4];
+};
+
+struct mshv_user_irq_entry {
+ __u32 gsi;
+ __u32 address_lo;
+ __u32 address_hi;
+ __u32 data;
+};
+
+struct mshv_user_irq_table {
+ __u32 nr;
+ __u32 rsvd; /* MBZ */
+ struct mshv_user_irq_entry entries[];
+};
+
+enum {
+ MSHV_GPAP_ACCESS_TYPE_ACCESSED,
+ MSHV_GPAP_ACCESS_TYPE_DIRTY,
+ MSHV_GPAP_ACCESS_TYPE_COUNT /* Count of enum members */
+};
+
+enum {
+ MSHV_GPAP_ACCESS_OP_NOOP,
+ MSHV_GPAP_ACCESS_OP_CLEAR,
+ MSHV_GPAP_ACCESS_OP_SET,
+ MSHV_GPAP_ACCESS_OP_COUNT /* Count of enum members */
+};
+
+/**
+ * struct mshv_gpap_access_bitmap - arguments for MSHV_GET_GPAP_ACCESS_BITMAP
+ * @access_type: MSHV_GPAP_ACCESS_TYPE_* - The type of access to record in the
+ * bitmap
+ * @access_op: MSHV_GPAP_ACCESS_OP_* - Allows an optional clear or set of all
+ * the access states in the range, after retrieving the current
+ * states.
+ * @rsvd: MBZ
+ * @page_count: Number of pages
+ * @gpap_base: Base gpa page number
+ * @bitmap_ptr: Output buffer for bitmap, at least (page_count + 7) / 8 bytes
+ *
+ * Retrieve a bitmap of either ACCESSED or DIRTY bits for a given range of guest
+ * memory, and optionally clear or set the bits.
+ */
+struct mshv_gpap_access_bitmap {
+ __u8 access_type;
+ __u8 access_op;
+ __u8 rsvd[6];
+ __u64 page_count;
+ __u64 gpap_base;
+ __u64 bitmap_ptr;
+};
+
+/**
+ * struct mshv_root_hvcall - arguments for MSHV_ROOT_HVCALL
+ * @code: Hypercall code (HVCALL_*)
+ * @reps: in: Rep count ('repcount')
+ * out: Reps completed ('repcomp'). MBZ unless rep hvcall
+ * @in_sz: Size of input incl rep data. <= MSHV_HV_PAGE_SIZE
+ * @out_sz: Size of output buffer. <= MSHV_HV_PAGE_SIZE. MBZ if out_ptr is 0
+ * @status: in: MBZ
+ * out: HV_STATUS_* from hypercall
+ * @rsvd: MBZ
+ * @in_ptr: Input data buffer (struct hv_input_*). If used with partition or
+ * vp fd, partition id field is populated by kernel.
+ * @out_ptr: Output data buffer (optional)
+ */
+struct mshv_root_hvcall {
+ __u16 code;
+ __u16 reps;
+ __u16 in_sz;
+ __u16 out_sz;
+ __u16 status;
+ __u8 rsvd[6];
+ __u64 in_ptr;
+ __u64 out_ptr;
+};
+
+/* Partition fds created with MSHV_CREATE_PARTITION */
+#define MSHV_INITIALIZE_PARTITION _IO(MSHV_IOCTL, 0x00)
+#define MSHV_CREATE_VP _IOW(MSHV_IOCTL, 0x01, struct mshv_create_vp)
+#define MSHV_SET_GUEST_MEMORY _IOW(MSHV_IOCTL, 0x02, struct mshv_user_mem_region)
+#define MSHV_IRQFD _IOW(MSHV_IOCTL, 0x03, struct mshv_user_irqfd)
+#define MSHV_IOEVENTFD _IOW(MSHV_IOCTL, 0x04, struct mshv_user_ioeventfd)
+#define MSHV_SET_MSI_ROUTING _IOW(MSHV_IOCTL, 0x05, struct mshv_user_irq_table)
+#define MSHV_GET_GPAP_ACCESS_BITMAP _IOWR(MSHV_IOCTL, 0x06, struct mshv_gpap_access_bitmap)
+/* Generic hypercall */
+#define MSHV_ROOT_HVCALL _IOWR(MSHV_IOCTL, 0x07, struct mshv_root_hvcall)
+
+/*
+ ********************************
+ * VP APIs for child partitions *
+ ********************************
+ */
+
+#define MSHV_RUN_VP_BUF_SZ 256
+
+/*
+ * VP state pages may be mapped to userspace via mmap().
+ * To specify which state page, use MSHV_VP_MMAP_OFFSET_ values multiplied by
+ * the system page size.
+ * e.g.
+ * long page_size = sysconf(_SC_PAGE_SIZE);
+ * void *reg_page = mmap(NULL, MSHV_HV_PAGE_SIZE, PROT_READ|PROT_WRITE,
+ * MAP_SHARED, vp_fd,
+ * MSHV_VP_MMAP_OFFSET_REGISTERS * page_size);
+ */
+enum {
+ MSHV_VP_MMAP_OFFSET_REGISTERS,
+ MSHV_VP_MMAP_OFFSET_INTERCEPT_MESSAGE,
+ MSHV_VP_MMAP_OFFSET_GHCB,
+ MSHV_VP_MMAP_OFFSET_COUNT
+};
+
+/**
+ * struct mshv_run_vp - argument for MSHV_RUN_VP
+ * @msg_buf: On success, the intercept message is copied here. It can be
+ * interpreted using the relevant hypervisor definitions.
+ */
+struct mshv_run_vp {
+ __u8 msg_buf[MSHV_RUN_VP_BUF_SZ];
+};
+
+enum {
+ MSHV_VP_STATE_LAPIC, /* Local interrupt controller state (either arch) */
+ MSHV_VP_STATE_XSAVE, /* XSAVE data in compacted form (x86_64) */
+ MSHV_VP_STATE_SIMP,
+ MSHV_VP_STATE_SIEFP,
+ MSHV_VP_STATE_SYNTHETIC_TIMERS,
+ MSHV_VP_STATE_COUNT,
+};
+
+/**
+ * struct mshv_get_set_vp_state - arguments for MSHV_[GET,SET]_VP_STATE
+ * @type: MSHV_VP_STATE_*
+ * @rsvd: MBZ
+ * @buf_sz: in: 4k page-aligned size of buffer
+ * out: Actual size of data (on EINVAL, check this to see if buffer
+ * was too small)
+ * @buf_ptr: 4k page-aligned data buffer
+ */
+struct mshv_get_set_vp_state {
+ __u8 type;
+ __u8 rsvd[3];
+ __u32 buf_sz;
+ __u64 buf_ptr;
+};
+
+/* VP fds created with MSHV_CREATE_VP */
+#define MSHV_RUN_VP _IOR(MSHV_IOCTL, 0x00, struct mshv_run_vp)
+#define MSHV_GET_VP_STATE _IOWR(MSHV_IOCTL, 0x01, struct mshv_get_set_vp_state)
+#define MSHV_SET_VP_STATE _IOWR(MSHV_IOCTL, 0x02, struct mshv_get_set_vp_state)
+/*
+ * Generic hypercall
+ * Defined above in partition IOCTLs, avoid redefining it here
+ * #define MSHV_ROOT_HVCALL _IOWR(MSHV_IOCTL, 0x07, struct mshv_root_hvcall)
+ */
+
+#endif