diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2025-03-25 14:47:04 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2025-03-25 14:47:04 -0700 |
| commit | a5b3d8660b049779880c790549ff3fef02f6922c (patch) | |
| tree | df07a0fd239a926a8713d22325497ac46bebd745 /include | |
| parent | dce3ab4c57e662ae019c22e7c2f2aa887617beae (diff) | |
| parent | 628cc040b3a2980df6032766e8ef0688e981ab95 (diff) | |
Merge tag 'hyperv-next-signed-20250324' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux
Pull hyperv updates from Wei Liu:
- Add support for running as the root partition in Hyper-V (Microsoft
Hypervisor) by exposing /dev/mshv (Nuno and various people)
- Add support for CPU offlining in Hyper-V (Hamza Mahfooz)
- Misc fixes and cleanups (Roman Kisel, Tianyu Lan, Wei Liu, Michael
Kelley, Thorsten Blum)
* tag 'hyperv-next-signed-20250324' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux: (24 commits)
x86/hyperv: fix an indentation issue in mshyperv.h
x86/hyperv: Add comments about hv_vpset and var size hypercall input args
Drivers: hv: Introduce mshv_root module to expose /dev/mshv to VMMs
hyperv: Add definitions for root partition driver to hv headers
x86: hyperv: Add mshv_handler() irq handler and setup function
Drivers: hv: Introduce per-cpu event ring tail
Drivers: hv: Export some functions for use by root partition module
acpi: numa: Export node_to_pxm()
hyperv: Introduce hv_recommend_using_aeoi()
arm64/hyperv: Add some missing functions to arm64
x86/mshyperv: Add support for extended Hyper-V features
hyperv: Log hypercall status codes as strings
x86/hyperv: Fix check of return value from snp_set_vmsa()
x86/hyperv: Add VTL mode callback for restarting the system
x86/hyperv: Add VTL mode emergency restart callback
hyperv: Remove unused union and structs
hyperv: Add CONFIG_MSHV_ROOT to gate root partition support
hyperv: Change hv_root_partition into a function
hyperv: Convert hypercall statuses to linux error codes
drivers/hv: add CPU offlining support
...
Diffstat (limited to 'include')
| -rw-r--r-- | include/asm-generic/mshyperv.h | 72 | ||||
| -rw-r--r-- | include/hyperv/hvgdk_mini.h | 83 | ||||
| -rw-r--r-- | include/hyperv/hvhdk.h | 132 | ||||
| -rw-r--r-- | include/hyperv/hvhdk_mini.h | 91 | ||||
| -rw-r--r-- | include/linux/hyperv.h | 57 | ||||
| -rw-r--r-- | include/uapi/linux/mshv.h | 291 |
6 files changed, 653 insertions, 73 deletions
diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h index a7bbe504e4f3..ccccb1cbf7df 100644 --- a/include/asm-generic/mshyperv.h +++ b/include/asm-generic/mshyperv.h @@ -28,9 +28,15 @@ #define VTPM_BASE_ADDRESS 0xfed40000 +enum hv_partition_type { + HV_PARTITION_TYPE_GUEST, + HV_PARTITION_TYPE_ROOT, +}; + struct ms_hyperv_info { u32 features; u32 priv_high; + u32 ext_features; u32 misc_features; u32 hints; u32 nested_features; @@ -58,15 +64,32 @@ struct ms_hyperv_info { }; extern struct ms_hyperv_info ms_hyperv; extern bool hv_nested; +extern u64 hv_current_partition_id; +extern enum hv_partition_type hv_curr_partition_type; extern void * __percpu *hyperv_pcpu_input_arg; extern void * __percpu *hyperv_pcpu_output_arg; -extern u64 hv_do_hypercall(u64 control, void *inputaddr, void *outputaddr); -extern u64 hv_do_fast_hypercall8(u16 control, u64 input8); +u64 hv_do_hypercall(u64 control, void *inputaddr, void *outputaddr); +u64 hv_do_fast_hypercall8(u16 control, u64 input8); +u64 hv_do_fast_hypercall16(u16 control, u64 input1, u64 input2); + bool hv_isolation_type_snp(void); bool hv_isolation_type_tdx(void); +/* + * On architectures where Hyper-V doesn't support AEOI (e.g., ARM64), + * it doesn't provide a recommendation flag and AEOI must be disabled. + */ +static inline bool hv_recommend_using_aeoi(void) +{ +#ifdef HV_DEPRECATING_AEOI_RECOMMENDED + return !(ms_hyperv.hints & HV_DEPRECATING_AEOI_RECOMMENDED); +#else + return false; +#endif +} + static inline struct hv_proximity_domain_info hv_numa_node_to_pxm_info(int node) { struct hv_proximity_domain_info pxm_info = {}; @@ -185,12 +208,11 @@ void hv_setup_kexec_handler(void (*handler)(void)); void hv_remove_kexec_handler(void); void hv_setup_crash_handler(void (*handler)(struct pt_regs *regs)); void hv_remove_crash_handler(void); +void hv_setup_mshv_handler(void (*handler)(void)); extern int vmbus_interrupt; extern int vmbus_irq; -extern bool hv_root_partition; - #if IS_ENABLED(CONFIG_HYPERV) /* * Hypervisor's notion of virtual processor ID is different from @@ -207,10 +229,12 @@ extern u64 (*hv_read_reference_counter)(void); #define VP_INVAL U32_MAX int __init hv_common_init(void); +void __init hv_get_partition_id(void); void __init hv_common_free(void); void __init ms_hyperv_late_init(void); int hv_common_cpu_init(unsigned int cpu); int hv_common_cpu_die(unsigned int cpu); +void hv_identify_partition_type(void); void *hv_alloc_hyperv_page(void); void *hv_alloc_hyperv_zeroed_page(void); @@ -291,6 +315,20 @@ static inline int cpumask_to_vpset_skip(struct hv_vpset *vpset, return __cpumask_to_vpset(vpset, cpus, func); } +#define _hv_status_fmt(fmt) "%s: Hyper-V status: %#x = %s: " fmt +#define hv_status_printk(level, status, fmt, ...) \ +do { \ + u64 __status = (status); \ + pr_##level(_hv_status_fmt(fmt), __func__, hv_result(__status), \ + hv_result_to_string(__status), ##__VA_ARGS__); \ +} while (0) +#define hv_status_err(status, fmt, ...) \ + hv_status_printk(err, status, fmt, ##__VA_ARGS__) +#define hv_status_debug(status, fmt, ...) \ + hv_status_printk(debug, status, fmt, ##__VA_ARGS__) + +const char *hv_result_to_string(u64 hv_status); +int hv_result_to_errno(u64 status); void hyperv_report_panic(struct pt_regs *regs, long err, bool in_die); bool hv_is_hyperv_initialized(void); bool hv_is_hibernation_supported(void); @@ -303,6 +341,7 @@ void hyperv_cleanup(void); bool hv_query_ext_cap(u64 cap_query); void hv_setup_dma_ops(struct device *dev, bool coherent); #else /* CONFIG_HYPERV */ +static inline void hv_identify_partition_type(void) {} static inline bool hv_is_hyperv_initialized(void) { return false; } static inline bool hv_is_hibernation_supported(void) { return false; } static inline void hyperv_cleanup(void) {} @@ -314,4 +353,29 @@ static inline enum hv_isolation_type hv_get_isolation_type(void) } #endif /* CONFIG_HYPERV */ +#if IS_ENABLED(CONFIG_MSHV_ROOT) +static inline bool hv_root_partition(void) +{ + return hv_curr_partition_type == HV_PARTITION_TYPE_ROOT; +} +int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages); +int hv_call_add_logical_proc(int node, u32 lp_index, u32 acpi_id); +int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags); + +#else /* CONFIG_MSHV_ROOT */ +static inline bool hv_root_partition(void) { return false; } +static inline int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages) +{ + return -EOPNOTSUPP; +} +static inline int hv_call_add_logical_proc(int node, u32 lp_index, u32 acpi_id) +{ + return -EOPNOTSUPP; +} +static inline int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags) +{ + return -EOPNOTSUPP; +} +#endif /* CONFIG_MSHV_ROOT */ + #endif diff --git a/include/hyperv/hvgdk_mini.h b/include/hyperv/hvgdk_mini.h index 155615175965..abf0bd76e370 100644 --- a/include/hyperv/hvgdk_mini.h +++ b/include/hyperv/hvgdk_mini.h @@ -13,7 +13,7 @@ struct hv_u128 { u64 high_part; } __packed; -/* NOTE: when adding below, update hv_status_to_string() */ +/* NOTE: when adding below, update hv_result_to_string() */ #define HV_STATUS_SUCCESS 0x0 #define HV_STATUS_INVALID_HYPERCALL_CODE 0x2 #define HV_STATUS_INVALID_HYPERCALL_INPUT 0x3 @@ -51,6 +51,7 @@ struct hv_u128 { #define HV_HYP_PAGE_SHIFT 12 #define HV_HYP_PAGE_SIZE BIT(HV_HYP_PAGE_SHIFT) #define HV_HYP_PAGE_MASK (~(HV_HYP_PAGE_SIZE - 1)) +#define HV_HYP_LARGE_PAGE_SHIFT 21 #define HV_PARTITION_ID_INVALID ((u64)0) #define HV_PARTITION_ID_SELF ((u64)-1) @@ -182,7 +183,7 @@ struct hv_tsc_emulation_control { /* HV_TSC_INVARIANT_CONTROL */ #endif /* CONFIG_X86 */ -struct hv_get_partition_id { /* HV_OUTPUT_GET_PARTITION_ID */ +struct hv_output_get_partition_id { u64 partition_id; } __packed; @@ -204,7 +205,14 @@ union hv_reference_tsc_msr { /* The number of vCPUs in one sparse bank */ #define HV_VCPUS_PER_SPARSE_BANK (64) -/* Some of Hyper-V structs do not use hv_vpset where linux uses them */ +/* + * Some of Hyper-V structs do not use hv_vpset where linux uses them. + * + * struct hv_vpset is usually used as part of hypercall input. The portion + * that counts as "fixed size input header" vs. "variable size input header" + * varies per hypercall. See comments at relevant hypercall call sites as to + * how the "valid_bank_mask" field should be accounted. + */ struct hv_vpset { /* HV_VP_SET */ u64 format; u64 valid_bank_mask; @@ -374,6 +382,10 @@ union hv_hypervisor_version_info { #define HV_SHARED_GPA_BOUNDARY_ACTIVE BIT(5) #define HV_SHARED_GPA_BOUNDARY_BITS GENMASK(11, 6) +/* HYPERV_CPUID_FEATURES.ECX bits. */ +#define HV_VP_DISPATCH_INTERRUPT_INJECTION_AVAILABLE BIT(9) +#define HV_VP_GHCB_ROOT_MAPPING_AVAILABLE BIT(10) + enum hv_isolation_type { HV_ISOLATION_TYPE_NONE = 0, /* HV_PARTITION_ISOLATION_TYPE_NONE */ HV_ISOLATION_TYPE_VBS = 1, @@ -436,10 +448,13 @@ union hv_vp_assist_msr_contents { /* HV_REGISTER_VP_ASSIST_PAGE */ #define HVCALL_WITHDRAW_MEMORY 0x0049 #define HVCALL_MAP_GPA_PAGES 0x004b #define HVCALL_UNMAP_GPA_PAGES 0x004c +#define HVCALL_INSTALL_INTERCEPT 0x004d #define HVCALL_CREATE_VP 0x004e #define HVCALL_DELETE_VP 0x004f #define HVCALL_GET_VP_REGISTERS 0x0050 #define HVCALL_SET_VP_REGISTERS 0x0051 +#define HVCALL_TRANSLATE_VIRTUAL_ADDRESS 0x0052 +#define HVCALL_CLEAR_VIRTUAL_INTERRUPT 0x0056 #define HVCALL_DELETE_PORT 0x0058 #define HVCALL_DISCONNECT_PORT 0x005b #define HVCALL_POST_MESSAGE 0x005c @@ -447,12 +462,15 @@ union hv_vp_assist_msr_contents { /* HV_REGISTER_VP_ASSIST_PAGE */ #define HVCALL_POST_DEBUG_DATA 0x0069 #define HVCALL_RETRIEVE_DEBUG_DATA 0x006a #define HVCALL_RESET_DEBUG_SESSION 0x006b +#define HVCALL_MAP_STATS_PAGE 0x006c +#define HVCALL_UNMAP_STATS_PAGE 0x006d #define HVCALL_ADD_LOGICAL_PROCESSOR 0x0076 #define HVCALL_GET_SYSTEM_PROPERTY 0x007b #define HVCALL_MAP_DEVICE_INTERRUPT 0x007c #define HVCALL_UNMAP_DEVICE_INTERRUPT 0x007d #define HVCALL_RETARGET_INTERRUPT 0x007e #define HVCALL_NOTIFY_PORT_RING_EMPTY 0x008b +#define HVCALL_REGISTER_INTERCEPT_RESULT 0x0091 #define HVCALL_ASSERT_VIRTUAL_INTERRUPT 0x0094 #define HVCALL_CREATE_PORT 0x0095 #define HVCALL_CONNECT_PORT 0x0096 @@ -460,12 +478,18 @@ union hv_vp_assist_msr_contents { /* HV_REGISTER_VP_ASSIST_PAGE */ #define HVCALL_GET_VP_ID_FROM_APIC_ID 0x009a #define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af #define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0 +#define HVCALL_SIGNAL_EVENT_DIRECT 0x00c0 +#define HVCALL_POST_MESSAGE_DIRECT 0x00c1 #define HVCALL_DISPATCH_VP 0x00c2 +#define HVCALL_GET_GPA_PAGES_ACCESS_STATES 0x00c9 +#define HVCALL_ACQUIRE_SPARSE_SPA_PAGE_HOST_ACCESS 0x00d7 +#define HVCALL_RELEASE_SPARSE_SPA_PAGE_HOST_ACCESS 0x00d8 #define HVCALL_MODIFY_SPARSE_GPA_PAGE_HOST_VISIBILITY 0x00db #define HVCALL_MAP_VP_STATE_PAGE 0x00e1 #define HVCALL_UNMAP_VP_STATE_PAGE 0x00e2 #define HVCALL_GET_VP_STATE 0x00e3 #define HVCALL_SET_VP_STATE 0x00e4 +#define HVCALL_GET_VP_CPUID_VALUES 0x00f4 #define HVCALL_MMIO_READ 0x0106 #define HVCALL_MMIO_WRITE 0x0107 @@ -775,10 +799,10 @@ struct hv_message_page { /* Define timer message payload structure. */ struct hv_timer_message_payload { - __u32 timer_index; - __u32 reserved; - __u64 expiration_time; /* When the timer expired */ - __u64 delivery_time; /* When the message was delivered */ + u32 timer_index; + u32 reserved; + u64 expiration_time; /* When the timer expired */ + u64 delivery_time; /* When the message was delivered */ } __packed; struct hv_x64_segment_register { @@ -807,6 +831,8 @@ struct hv_x64_table_register { u64 base; } __packed; +#define HV_NORMAL_VTL 0 + union hv_input_vtl { u8 as_uint8; struct { @@ -1325,6 +1351,49 @@ struct hv_retarget_device_interrupt { /* HV_INPUT_RETARGET_DEVICE_INTERRUPT */ struct hv_device_interrupt_target int_target; } __packed __aligned(8); +enum hv_intercept_type { +#if defined(CONFIG_X86) + HV_INTERCEPT_TYPE_X64_IO_PORT = 0x00000000, + HV_INTERCEPT_TYPE_X64_MSR = 0x00000001, + HV_INTERCEPT_TYPE_X64_CPUID = 0x00000002, +#endif + HV_INTERCEPT_TYPE_EXCEPTION = 0x00000003, + /* Used to be HV_INTERCEPT_TYPE_REGISTER */ + HV_INTERCEPT_TYPE_RESERVED0 = 0x00000004, + HV_INTERCEPT_TYPE_MMIO = 0x00000005, +#if defined(CONFIG_X86) + HV_INTERCEPT_TYPE_X64_GLOBAL_CPUID = 0x00000006, + HV_INTERCEPT_TYPE_X64_APIC_SMI = 0x00000007, +#endif + HV_INTERCEPT_TYPE_HYPERCALL = 0x00000008, +#if defined(CONFIG_X86) + HV_INTERCEPT_TYPE_X64_APIC_INIT_SIPI = 0x00000009, + HV_INTERCEPT_MC_UPDATE_PATCH_LEVEL_MSR_READ = 0x0000000A, + HV_INTERCEPT_TYPE_X64_APIC_WRITE = 0x0000000B, + HV_INTERCEPT_TYPE_X64_MSR_INDEX = 0x0000000C, +#endif + HV_INTERCEPT_TYPE_MAX, + HV_INTERCEPT_TYPE_INVALID = 0xFFFFFFFF, +}; + +union hv_intercept_parameters { + /* HV_INTERCEPT_PARAMETERS is defined to be an 8-byte field. */ + u64 as_uint64; +#if defined(CONFIG_X86) + /* HV_INTERCEPT_TYPE_X64_IO_PORT */ + u16 io_port; + /* HV_INTERCEPT_TYPE_X64_CPUID */ + u32 cpuid_index; + /* HV_INTERCEPT_TYPE_X64_APIC_WRITE */ + u32 apic_write_mask; + /* HV_INTERCEPT_TYPE_EXCEPTION */ + u16 exception_vector; + /* HV_INTERCEPT_TYPE_X64_MSR_INDEX */ + u32 msr_index; +#endif + /* N.B. Other intercept types do not have any parameters. */ +}; + /* Data structures for HVCALL_MMIO_READ and HVCALL_MMIO_WRITE */ #define HV_HYPERCALL_MMIO_MAX_DATA_LENGTH 64 diff --git a/include/hyperv/hvhdk.h b/include/hyperv/hvhdk.h index 64407c2a3809..b4067ada02cf 100644 --- a/include/hyperv/hvhdk.h +++ b/include/hyperv/hvhdk.h @@ -19,11 +19,24 @@ #define HV_VP_REGISTER_PAGE_VERSION_1 1u +#define HV_VP_REGISTER_PAGE_MAX_VECTOR_COUNT 7 + +union hv_vp_register_page_interrupt_vectors { + u64 as_uint64; + struct { + u8 vector_count; + u8 vector[HV_VP_REGISTER_PAGE_MAX_VECTOR_COUNT]; + } __packed; +}; + struct hv_vp_register_page { u16 version; u8 isvalid; u8 rsvdz; u32 dirty; + +#if IS_ENABLED(CONFIG_X86) + union { struct { /* General purpose registers @@ -95,6 +108,22 @@ struct hv_vp_register_page { union hv_x64_pending_interruption_register pending_interruption; union hv_x64_interrupt_state_register interrupt_state; u64 instruction_emulation_hints; + u64 xfem; + + /* + * Fields from this point are not included in the register page save chunk. + * The reserved field is intended to maintain alignment for unsaved fields. + */ + u8 reserved1[0x100]; + + /* + * Interrupts injected as part of HvCallDispatchVp. + */ + union hv_vp_register_page_interrupt_vectors interrupt_vectors; + +#elif IS_ENABLED(CONFIG_ARM64) + /* Not yet supported in ARM */ +#endif } __packed; #define HV_PARTITION_PROCESSOR_FEATURES_BANKS 2 @@ -299,10 +328,11 @@ union hv_partition_isolation_properties { #define HV_PARTITION_ISOLATION_HOST_TYPE_RESERVED 0x2 /* Note: Exo partition is enabled by default */ -#define HV_PARTITION_CREATION_FLAG_EXO_PARTITION BIT(8) -#define HV_PARTITION_CREATION_FLAG_LAPIC_ENABLED BIT(13) -#define HV_PARTITION_CREATION_FLAG_INTERCEPT_MESSAGE_PAGE_ENABLED BIT(19) -#define HV_PARTITION_CREATION_FLAG_X2APIC_CAPABLE BIT(22) +#define HV_PARTITION_CREATION_FLAG_GPA_SUPER_PAGES_ENABLED BIT(4) +#define HV_PARTITION_CREATION_FLAG_EXO_PARTITION BIT(8) +#define HV_PARTITION_CREATION_FLAG_LAPIC_ENABLED BIT(13) +#define HV_PARTITION_CREATION_FLAG_INTERCEPT_MESSAGE_PAGE_ENABLED BIT(19) +#define HV_PARTITION_CREATION_FLAG_X2APIC_CAPABLE BIT(22) struct hv_input_create_partition { u64 flags; @@ -349,13 +379,23 @@ struct hv_input_set_partition_property { enum hv_vp_state_page_type { HV_VP_STATE_PAGE_REGISTERS = 0, HV_VP_STATE_PAGE_INTERCEPT_MESSAGE = 1, + HV_VP_STATE_PAGE_GHCB = 2, HV_VP_STATE_PAGE_COUNT }; struct hv_input_map_vp_state_page { u64 partition_id; u32 vp_index; - u32 type; /* enum hv_vp_state_page_type */ + u16 type; /* enum hv_vp_state_page_type */ + union hv_input_vtl input_vtl; + union { + u8 as_uint8; + struct { + u8 map_location_provided : 1; + u8 reserved : 7; + }; + } flags; + u64 requested_map_location; } __packed; struct hv_output_map_vp_state_page { @@ -365,7 +405,14 @@ struct hv_output_map_vp_state_page { struct hv_input_unmap_vp_state_page { u64 partition_id; u32 vp_index; - u32 type; /* enum hv_vp_state_page_type */ + u16 type; /* enum hv_vp_state_page_type */ + union hv_input_vtl input_vtl; + u8 reserved0; +} __packed; + +struct hv_x64_apic_eoi_message { + u32 vp_index; + u32 interrupt_vector; } __packed; struct hv_opaque_intercept_message { @@ -515,6 +562,13 @@ struct hv_synthetic_timers_state { u64 reserved[5]; } __packed; +struct hv_async_completion_message_payload { + u64 partition_id; + u32 status; + u32 completion_count; + u64 sub_status; +} __packed; + union hv_input_delete_vp { u64 as_uint64[2]; struct { @@ -649,6 +703,57 @@ struct hv_input_set_vp_state { union hv_input_set_vp_state_data data[]; } __packed; +union hv_x64_vp_execution_state { + u16 as_uint16; + struct { + u16 cpl:2; + u16 cr0_pe:1; + u16 cr0_am:1; + u16 efer_lma:1; + u16 debug_active:1; + u16 interruption_pending:1; + u16 vtl:4; + u16 enclave_mode:1; + u16 interrupt_shadow:1; + u16 virtualization_fault_active:1; + u16 reserved:2; + } __packed; +}; + +struct hv_x64_intercept_message_header { + u32 vp_index; + u8 instruction_length:4; + u8 cr8:4; /* Only set for exo partitions */ + u8 intercept_access_type; + union hv_x64_vp_execution_state execution_state; + struct hv_x64_segment_register cs_segment; + u64 rip; + u64 rflags; +} __packed; + +union hv_x64_memory_access_info { + u8 as_uint8; + struct { + u8 gva_valid:1; + u8 gva_gpa_valid:1; + u8 hypercall_output_pending:1; + u8 tlb_locked_no_overlay:1; + u8 reserved:4; + } __packed; +}; + +struct hv_x64_memory_intercept_message { + struct hv_x64_intercept_message_header header; + u32 cache_type; /* enum hv_cache_type */ + u8 instruction_byte_count; + union hv_x64_memory_access_info memory_access_info; + u8 tpr_priority; + u8 reserved1; + u64 guest_virtual_address; + u64 guest_physical_address; + u8 instruction_bytes[16]; +} __packed; + /* * Dispatch state for the VP communicated by the hypervisor to the * VP-dispatching thread in the root on return from HVCALL_DISPATCH_VP. @@ -716,6 +821,7 @@ static_assert(sizeof(struct hv_vp_signal_pair_scheduler_message) == #define HV_DISPATCH_VP_FLAG_SKIP_VP_SPEC_FLUSH 0x8 #define HV_DISPATCH_VP_FLAG_SKIP_CALLER_SPEC_FLUSH 0x10 #define HV_DISPATCH_VP_FLAG_SKIP_CALLER_USER_SPEC_FLUSH 0x20 +#define HV_DISPATCH_VP_FLAG_SCAN_INTERRUPT_INJECTION 0x40 struct hv_input_dispatch_vp { u64 partition_id; @@ -730,4 +836,18 @@ struct hv_output_dispatch_vp { u32 dispatch_event; /* enum hv_vp_dispatch_event */ } __packed; +struct hv_input_modify_sparse_spa_page_host_access { + u32 host_access : 2; + u32 reserved : 30; + u32 flags; + u64 partition_id; + u64 spa_page_list[]; +} __packed; + +/* hv_input_modify_sparse_spa_page_host_access flags */ +#define HV_MODIFY_SPA_PAGE_HOST_ACCESS_MAKE_EXCLUSIVE 0x1 +#define HV_MODIFY_SPA_PAGE_HOST_ACCESS_MAKE_SHARED 0x2 +#define HV_MODIFY_SPA_PAGE_HOST_ACCESS_LARGE_PAGE 0x4 +#define HV_MODIFY_SPA_PAGE_HOST_ACCESS_HUGE_PAGE 0x8 + #endif /* _HV_HVHDK_H */ diff --git a/include/hyperv/hvhdk_mini.h b/include/hyperv/hvhdk_mini.h index f8a39d3e9ce6..42e7876455b5 100644 --- a/include/hyperv/hvhdk_mini.h +++ b/include/hyperv/hvhdk_mini.h @@ -36,6 +36,52 @@ enum hv_scheduler_type { HV_SCHEDULER_TYPE_MAX }; +/* HV_STATS_AREA_TYPE */ +enum hv_stats_area_type { + HV_STATS_AREA_SELF = 0, + HV_STATS_AREA_PARENT = 1, + HV_STATS_AREA_INTERNAL = 2, + HV_STATS_AREA_COUNT +}; + +enum hv_stats_object_type { + HV_STATS_OBJECT_HYPERVISOR = 0x00000001, + HV_STATS_OBJECT_LOGICAL_PROCESSOR = 0x00000002, + HV_STATS_OBJECT_PARTITION = 0x00010001, + HV_STATS_OBJECT_VP = 0x00010002 +}; + +union hv_stats_object_identity { + /* hv_stats_hypervisor */ + struct { + u8 reserved[15]; + u8 stats_area_type; + } __packed hv; + + /* hv_stats_logical_processor */ + struct { + u32 lp_index; + u8 reserved[11]; + u8 stats_area_type; + } __packed lp; + + /* hv_stats_partition */ + struct { + u64 partition_id; + u8 reserved[7]; + u8 stats_area_type; + } __packed partition; + + /* hv_stats_vp */ + struct { + u64 partition_id; + u32 vp_index; + u16 flags; + u8 reserved; + u8 stats_area_type; + } __packed vp; +}; + enum hv_partition_property_code { /* Privilege properties */ HV_PARTITION_PROPERTY_PRIVILEGE_FLAGS = 0x00010000, @@ -47,19 +93,45 @@ enum hv_partition_property_code { /* Compatibility properties */ HV_PARTITION_PROPERTY_PROCESSOR_XSAVE_FEATURES = 0x00060002, + HV_PARTITION_PROPERTY_XSAVE_STATES = 0x00060007, HV_PARTITION_PROPERTY_MAX_XSAVE_DATA_SIZE = 0x00060008, HV_PARTITION_PROPERTY_PROCESSOR_CLOCK_FREQUENCY = 0x00060009, }; +enum hv_snp_status { + HV_SNP_STATUS_NONE = 0, + HV_SNP_STATUS_AVAILABLE = 1, + HV_SNP_STATUS_INCOMPATIBLE = 2, + HV_SNP_STATUS_PSP_UNAVAILABLE = 3, + HV_SNP_STATUS_PSP_INIT_FAILED = 4, + HV_SNP_STATUS_PSP_BAD_FW_VERSION = 5, + HV_SNP_STATUS_BAD_CONFIGURATION = 6, + HV_SNP_STATUS_PSP_FW_UPDATE_IN_PROGRESS = 7, + HV_SNP_STATUS_PSP_RB_INIT_FAILED = 8, + HV_SNP_STATUS_PSP_PLATFORM_STATUS_FAILED = 9, + HV_SNP_STATUS_PSP_INIT_LATE_FAILED = 10, +}; + enum hv_system_property { /* Add more values when needed */ HV_SYSTEM_PROPERTY_SCHEDULER_TYPE = 15, + HV_DYNAMIC_PROCESSOR_FEATURE_PROPERTY = 21, +}; + +enum hv_dynamic_processor_feature_property { + /* Add more values when needed */ + HV_X64_DYNAMIC_PROCESSOR_FEATURE_MAX_ENCRYPTED_PARTITIONS = 13, + HV_X64_DYNAMIC_PROCESSOR_FEATURE_SNP_STATUS = 16, }; struct hv_input_get_system_property { u32 property_id; /* enum hv_system_property */ union { u32 as_uint32; +#if IS_ENABLED(CONFIG_X86) + /* enum hv_dynamic_processor_feature_property */ + u32 hv_processor_feature; +#endif /* More fields to be filled in when needed */ }; } __packed; @@ -67,9 +139,28 @@ struct hv_input_get_system_property { struct hv_output_get_system_property { union { u32 scheduler_type; /* enum hv_scheduler_type */ +#if IS_ENABLED(CONFIG_X86) + u64 hv_processor_feature_value; +#endif }; } __packed; +struct hv_input_map_stats_page { + u32 type; /* enum hv_stats_object_type */ + u32 padding; + union hv_stats_object_identity identity; +} __packed; + +struct hv_output_map_stats_page { + u64 map_location; +} __packed; + +struct hv_input_unmap_stats_page { + u32 type; /* enum hv_stats_object_type */ + u32 padding; + union hv_stats_object_identity identity; +} __packed; + struct hv_proximity_domain_flags { u32 proximity_preferred : 1; u32 reserved : 30; diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 4179add2864b..675959fb97ba 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -371,19 +371,6 @@ struct vmtransfer_page_packet_header { struct vmtransfer_page_range ranges[]; } __packed; -struct vmgpadl_packet_header { - struct vmpacket_descriptor d; - u32 gpadl; - u32 reserved; -} __packed; - -struct vmadd_remove_transfer_page_set { - struct vmpacket_descriptor d; - u32 gpadl; - u16 xfer_pageset_id; - u16 reserved; -} __packed; - /* * This structure defines a range in guest physical space that can be made to * look virtually contiguous. @@ -395,30 +382,6 @@ struct gpa_range { }; /* - * This is the format for an Establish Gpadl packet, which contains a handle by - * which this GPADL will be known and a set of GPA ranges associated with it. - * This can be converted to a MDL by the guest OS. If there are multiple GPA - * ranges, then the resulting MDL will be "chained," representing multiple VA - * ranges. - */ -struct vmestablish_gpadl { - struct vmpacket_descriptor d; - u32 gpadl; - u32 range_cnt; - struct gpa_range range[1]; -} __packed; - -/* - * This is the format for a Teardown Gpadl packet, which indicates that the - * GPADL handle in the Establish Gpadl packet will never be referenced again. - */ -struct vmteardown_gpadl { - struct vmpacket_descriptor d; - u32 gpadl; - u32 reserved; /* for alignment to a 8-byte boundary */ -} __packed; - -/* * This is the format for a GPA-Direct packet, which contains a set of GPA * ranges, in addition to commands and/or data. */ @@ -429,25 +392,6 @@ struct vmdata_gpa_direct { struct gpa_range range[1]; } __packed; -/* This is the format for a Additional Data Packet. */ -struct vmadditional_data { - struct vmpacket_descriptor d; - u64 total_bytes; - u32 offset; - u32 byte_cnt; - unsigned char data[1]; -} __packed; - -union vmpacket_largest_possible_header { - struct vmpacket_descriptor simple_hdr; - struct vmtransfer_page_packet_header xfer_page_hdr; - struct vmgpadl_packet_header gpadl_hdr; - struct vmadd_remove_transfer_page_set add_rm_xfer_page_hdr; - struct vmestablish_gpadl establish_gpadl_hdr; - struct vmteardown_gpadl teardown_gpadl_hdr; - struct vmdata_gpa_direct data_gpa_direct_hdr; -}; - #define VMPACKET_DATA_START_ADDRESS(__packet) \ (void *)(((unsigned char *)__packet) + \ ((struct vmpacket_descriptor)__packet)->offset8 * 8) @@ -1661,6 +1605,7 @@ int vmbus_send_tl_connect_request(const guid_t *shv_guest_servie_id, const guid_t *shv_host_servie_id); int vmbus_send_modifychannel(struct vmbus_channel *channel, u32 target_vp); void vmbus_set_event(struct vmbus_channel *channel); +int vmbus_channel_set_cpu(struct vmbus_channel *channel, u32 target_cpu); /* Get the start of the ring buffer. */ static inline void * diff --git a/include/uapi/linux/mshv.h b/include/uapi/linux/mshv.h new file mode 100644 index 000000000000..876bfe4e4227 --- /dev/null +++ b/include/uapi/linux/mshv.h @@ -0,0 +1,291 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Userspace interfaces for /dev/mshv* devices and derived fds + * + * This file is divided into sections containing data structures and IOCTLs for + * a particular set of related devices or derived file descriptors. + * + * The IOCTL definitions are at the end of each section. They are grouped by + * device/fd, so that new IOCTLs can easily be added with a monotonically + * increasing number. + */ +#ifndef _UAPI_LINUX_MSHV_H +#define _UAPI_LINUX_MSHV_H + +#include <linux/types.h> + +#define MSHV_IOCTL 0xB8 + +/* + ******************************************* + * Entry point to main VMM APIs: /dev/mshv * + ******************************************* + */ + +enum { + MSHV_PT_BIT_LAPIC, + MSHV_PT_BIT_X2APIC, + MSHV_PT_BIT_GPA_SUPER_PAGES, + MSHV_PT_BIT_COUNT, +}; + +#define MSHV_PT_FLAGS_MASK ((1 << MSHV_PT_BIT_COUNT) - 1) + +enum { + MSHV_PT_ISOLATION_NONE, + MSHV_PT_ISOLATION_COUNT, +}; + +/** + * struct mshv_create_partition - arguments for MSHV_CREATE_PARTITION + * @pt_flags: Bitmask of 1 << MSHV_PT_BIT_* + * @pt_isolation: MSHV_PT_ISOLATION_* + * + * Returns a file descriptor to act as a handle to a guest partition. + * At this point the partition is not yet initialized in the hypervisor. + * Some operations must be done with the partition in this state, e.g. setting + * so-called "early" partition properties. The partition can then be + * initialized with MSHV_INITIALIZE_PARTITION. + */ +struct mshv_create_partition { + __u64 pt_flags; + __u64 pt_isolation; +}; + +/* /dev/mshv */ +#define MSHV_CREATE_PARTITION _IOW(MSHV_IOCTL, 0x00, struct mshv_create_partition) + +/* + ************************ + * Child partition APIs * + ************************ + */ + +struct mshv_create_vp { + __u32 vp_index; +}; + +enum { + MSHV_SET_MEM_BIT_WRITABLE, + MSHV_SET_MEM_BIT_EXECUTABLE, + MSHV_SET_MEM_BIT_UNMAP, + MSHV_SET_MEM_BIT_COUNT +}; + +#define MSHV_SET_MEM_FLAGS_MASK ((1 << MSHV_SET_MEM_BIT_COUNT) - 1) + +/* The hypervisor's "native" page size */ +#define MSHV_HV_PAGE_SIZE 0x1000 + +/** + * struct mshv_user_mem_region - arguments for MSHV_SET_GUEST_MEMORY + * @size: Size of the memory region (bytes). Must be aligned to + * MSHV_HV_PAGE_SIZE + * @guest_pfn: Base guest page number to map + * @userspace_addr: Base address of userspace memory. Must be aligned to + * MSHV_HV_PAGE_SIZE + * @flags: Bitmask of 1 << MSHV_SET_MEM_BIT_*. If (1 << MSHV_SET_MEM_BIT_UNMAP) + * is set, ignore other bits. + * @rsvd: MBZ + * + * Map or unmap a region of userspace memory to Guest Physical Addresses (GPA). + * Mappings can't overlap in GPA space or userspace. + * To unmap, these fields must match an existing mapping. + */ +struct mshv_user_mem_region { + __u64 size; + __u64 guest_pfn; + __u64 userspace_addr; + __u8 flags; + __u8 rsvd[7]; +}; + +enum { + MSHV_IRQFD_BIT_DEASSIGN, + MSHV_IRQFD_BIT_RESAMPLE, + MSHV_IRQFD_BIT_COUNT, +}; + +#define MSHV_IRQFD_FLAGS_MASK ((1 << MSHV_IRQFD_BIT_COUNT) - 1) + +struct mshv_user_irqfd { + __s32 fd; + __s32 resamplefd; + __u32 gsi; + __u32 flags; +}; + +enum { + MSHV_IOEVENTFD_BIT_DATAMATCH, + MSHV_IOEVENTFD_BIT_PIO, + MSHV_IOEVENTFD_BIT_DEASSIGN, + MSHV_IOEVENTFD_BIT_COUNT, +}; + +#define MSHV_IOEVENTFD_FLAGS_MASK ((1 << MSHV_IOEVENTFD_BIT_COUNT) - 1) + +struct mshv_user_ioeventfd { + __u64 datamatch; + __u64 addr; /* legal pio/mmio address */ + __u32 len; /* 1, 2, 4, or 8 bytes */ + __s32 fd; + __u32 flags; + __u8 rsvd[4]; +}; + +struct mshv_user_irq_entry { + __u32 gsi; + __u32 address_lo; + __u32 address_hi; + __u32 data; +}; + +struct mshv_user_irq_table { + __u32 nr; + __u32 rsvd; /* MBZ */ + struct mshv_user_irq_entry entries[]; +}; + +enum { + MSHV_GPAP_ACCESS_TYPE_ACCESSED, + MSHV_GPAP_ACCESS_TYPE_DIRTY, + MSHV_GPAP_ACCESS_TYPE_COUNT /* Count of enum members */ +}; + +enum { + MSHV_GPAP_ACCESS_OP_NOOP, + MSHV_GPAP_ACCESS_OP_CLEAR, + MSHV_GPAP_ACCESS_OP_SET, + MSHV_GPAP_ACCESS_OP_COUNT /* Count of enum members */ +}; + +/** + * struct mshv_gpap_access_bitmap - arguments for MSHV_GET_GPAP_ACCESS_BITMAP + * @access_type: MSHV_GPAP_ACCESS_TYPE_* - The type of access to record in the + * bitmap + * @access_op: MSHV_GPAP_ACCESS_OP_* - Allows an optional clear or set of all + * the access states in the range, after retrieving the current + * states. + * @rsvd: MBZ + * @page_count: Number of pages + * @gpap_base: Base gpa page number + * @bitmap_ptr: Output buffer for bitmap, at least (page_count + 7) / 8 bytes + * + * Retrieve a bitmap of either ACCESSED or DIRTY bits for a given range of guest + * memory, and optionally clear or set the bits. + */ +struct mshv_gpap_access_bitmap { + __u8 access_type; + __u8 access_op; + __u8 rsvd[6]; + __u64 page_count; + __u64 gpap_base; + __u64 bitmap_ptr; +}; + +/** + * struct mshv_root_hvcall - arguments for MSHV_ROOT_HVCALL + * @code: Hypercall code (HVCALL_*) + * @reps: in: Rep count ('repcount') + * out: Reps completed ('repcomp'). MBZ unless rep hvcall + * @in_sz: Size of input incl rep data. <= MSHV_HV_PAGE_SIZE + * @out_sz: Size of output buffer. <= MSHV_HV_PAGE_SIZE. MBZ if out_ptr is 0 + * @status: in: MBZ + * out: HV_STATUS_* from hypercall + * @rsvd: MBZ + * @in_ptr: Input data buffer (struct hv_input_*). If used with partition or + * vp fd, partition id field is populated by kernel. + * @out_ptr: Output data buffer (optional) + */ +struct mshv_root_hvcall { + __u16 code; + __u16 reps; + __u16 in_sz; + __u16 out_sz; + __u16 status; + __u8 rsvd[6]; + __u64 in_ptr; + __u64 out_ptr; +}; + +/* Partition fds created with MSHV_CREATE_PARTITION */ +#define MSHV_INITIALIZE_PARTITION _IO(MSHV_IOCTL, 0x00) +#define MSHV_CREATE_VP _IOW(MSHV_IOCTL, 0x01, struct mshv_create_vp) +#define MSHV_SET_GUEST_MEMORY _IOW(MSHV_IOCTL, 0x02, struct mshv_user_mem_region) +#define MSHV_IRQFD _IOW(MSHV_IOCTL, 0x03, struct mshv_user_irqfd) +#define MSHV_IOEVENTFD _IOW(MSHV_IOCTL, 0x04, struct mshv_user_ioeventfd) +#define MSHV_SET_MSI_ROUTING _IOW(MSHV_IOCTL, 0x05, struct mshv_user_irq_table) +#define MSHV_GET_GPAP_ACCESS_BITMAP _IOWR(MSHV_IOCTL, 0x06, struct mshv_gpap_access_bitmap) +/* Generic hypercall */ +#define MSHV_ROOT_HVCALL _IOWR(MSHV_IOCTL, 0x07, struct mshv_root_hvcall) + +/* + ******************************** + * VP APIs for child partitions * + ******************************** + */ + +#define MSHV_RUN_VP_BUF_SZ 256 + +/* + * VP state pages may be mapped to userspace via mmap(). + * To specify which state page, use MSHV_VP_MMAP_OFFSET_ values multiplied by + * the system page size. + * e.g. + * long page_size = sysconf(_SC_PAGE_SIZE); + * void *reg_page = mmap(NULL, MSHV_HV_PAGE_SIZE, PROT_READ|PROT_WRITE, + * MAP_SHARED, vp_fd, + * MSHV_VP_MMAP_OFFSET_REGISTERS * page_size); + */ +enum { + MSHV_VP_MMAP_OFFSET_REGISTERS, + MSHV_VP_MMAP_OFFSET_INTERCEPT_MESSAGE, + MSHV_VP_MMAP_OFFSET_GHCB, + MSHV_VP_MMAP_OFFSET_COUNT +}; + +/** + * struct mshv_run_vp - argument for MSHV_RUN_VP + * @msg_buf: On success, the intercept message is copied here. It can be + * interpreted using the relevant hypervisor definitions. + */ +struct mshv_run_vp { + __u8 msg_buf[MSHV_RUN_VP_BUF_SZ]; +}; + +enum { + MSHV_VP_STATE_LAPIC, /* Local interrupt controller state (either arch) */ + MSHV_VP_STATE_XSAVE, /* XSAVE data in compacted form (x86_64) */ + MSHV_VP_STATE_SIMP, + MSHV_VP_STATE_SIEFP, + MSHV_VP_STATE_SYNTHETIC_TIMERS, + MSHV_VP_STATE_COUNT, +}; + +/** + * struct mshv_get_set_vp_state - arguments for MSHV_[GET,SET]_VP_STATE + * @type: MSHV_VP_STATE_* + * @rsvd: MBZ + * @buf_sz: in: 4k page-aligned size of buffer + * out: Actual size of data (on EINVAL, check this to see if buffer + * was too small) + * @buf_ptr: 4k page-aligned data buffer + */ +struct mshv_get_set_vp_state { + __u8 type; + __u8 rsvd[3]; + __u32 buf_sz; + __u64 buf_ptr; +}; + +/* VP fds created with MSHV_CREATE_VP */ +#define MSHV_RUN_VP _IOR(MSHV_IOCTL, 0x00, struct mshv_run_vp) +#define MSHV_GET_VP_STATE _IOWR(MSHV_IOCTL, 0x01, struct mshv_get_set_vp_state) +#define MSHV_SET_VP_STATE _IOWR(MSHV_IOCTL, 0x02, struct mshv_get_set_vp_state) +/* + * Generic hypercall + * Defined above in partition IOCTLs, avoid redefining it here + * #define MSHV_ROOT_HVCALL _IOWR(MSHV_IOCTL, 0x07, struct mshv_root_hvcall) + */ + +#endif |
