| 1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
 | /* SPDX-License-Identifier: GPL-2.0 */
#include <linux/linkage.h>
#include <asm/asm.h>
#include <asm/asm-offsets.h>
#include <asm/bitsperlong.h>
#include <asm/frame.h>
#include <asm/kvm_vcpu_regs.h>
#include <asm/nospec-branch.h>
#include "kvm-asm-offsets.h"
#define WORD_SIZE (BITS_PER_LONG / 8)
/* Intentionally omit RAX as it's context switched by hardware */
#define VCPU_RCX	(SVM_vcpu_arch_regs + __VCPU_REGS_RCX * WORD_SIZE)
#define VCPU_RDX	(SVM_vcpu_arch_regs + __VCPU_REGS_RDX * WORD_SIZE)
#define VCPU_RBX	(SVM_vcpu_arch_regs + __VCPU_REGS_RBX * WORD_SIZE)
/* Intentionally omit RSP as it's context switched by hardware */
#define VCPU_RBP	(SVM_vcpu_arch_regs + __VCPU_REGS_RBP * WORD_SIZE)
#define VCPU_RSI	(SVM_vcpu_arch_regs + __VCPU_REGS_RSI * WORD_SIZE)
#define VCPU_RDI	(SVM_vcpu_arch_regs + __VCPU_REGS_RDI * WORD_SIZE)
#ifdef CONFIG_X86_64
#define VCPU_R8		(SVM_vcpu_arch_regs + __VCPU_REGS_R8  * WORD_SIZE)
#define VCPU_R9		(SVM_vcpu_arch_regs + __VCPU_REGS_R9  * WORD_SIZE)
#define VCPU_R10	(SVM_vcpu_arch_regs + __VCPU_REGS_R10 * WORD_SIZE)
#define VCPU_R11	(SVM_vcpu_arch_regs + __VCPU_REGS_R11 * WORD_SIZE)
#define VCPU_R12	(SVM_vcpu_arch_regs + __VCPU_REGS_R12 * WORD_SIZE)
#define VCPU_R13	(SVM_vcpu_arch_regs + __VCPU_REGS_R13 * WORD_SIZE)
#define VCPU_R14	(SVM_vcpu_arch_regs + __VCPU_REGS_R14 * WORD_SIZE)
#define VCPU_R15	(SVM_vcpu_arch_regs + __VCPU_REGS_R15 * WORD_SIZE)
#endif
#define SVM_vmcb01_pa	(SVM_vmcb01 + KVM_VMCB_pa)
.section .noinstr.text, "ax"
.macro RESTORE_GUEST_SPEC_CTRL
	/* No need to do anything if SPEC_CTRL is unset or V_SPEC_CTRL is set */
	ALTERNATIVE_2 "", \
		"jmp 800f", X86_FEATURE_MSR_SPEC_CTRL, \
		"", X86_FEATURE_V_SPEC_CTRL
801:
.endm
.macro RESTORE_GUEST_SPEC_CTRL_BODY
800:
	/*
	 * SPEC_CTRL handling: if the guest's SPEC_CTRL value differs from the
	 * host's, write the MSR.  This is kept out-of-line so that the common
	 * case does not have to jump.
	 *
	 * IMPORTANT: To avoid RSB underflow attacks and any other nastiness,
	 * there must not be any returns or indirect branches between this code
	 * and vmentry.
	 */
	movl SVM_spec_ctrl(%_ASM_DI), %eax
	cmp PER_CPU_VAR(x86_spec_ctrl_current), %eax
	je 801b
	mov $MSR_IA32_SPEC_CTRL, %ecx
	xor %edx, %edx
	wrmsr
	jmp 801b
.endm
.macro RESTORE_HOST_SPEC_CTRL
	/* No need to do anything if SPEC_CTRL is unset or V_SPEC_CTRL is set */
	ALTERNATIVE_2 "", \
		"jmp 900f", X86_FEATURE_MSR_SPEC_CTRL, \
		"", X86_FEATURE_V_SPEC_CTRL
901:
.endm
.macro RESTORE_HOST_SPEC_CTRL_BODY spec_ctrl_intercepted:req
900:
	/* Same for after vmexit.  */
	mov $MSR_IA32_SPEC_CTRL, %ecx
	/*
	 * Load the value that the guest had written into MSR_IA32_SPEC_CTRL,
	 * if it was not intercepted during guest execution.
	 */
	cmpb $0, \spec_ctrl_intercepted
	jnz 998f
	rdmsr
	movl %eax, SVM_spec_ctrl(%_ASM_DI)
998:
	/* Now restore the host value of the MSR if different from the guest's.  */
	movl PER_CPU_VAR(x86_spec_ctrl_current), %eax
	cmp SVM_spec_ctrl(%_ASM_DI), %eax
	je 901b
	xor %edx, %edx
	wrmsr
	jmp 901b
.endm
/**
 * __svm_vcpu_run - Run a vCPU via a transition to SVM guest mode
 * @svm:	struct vcpu_svm *
 * @spec_ctrl_intercepted: bool
 */
SYM_FUNC_START(__svm_vcpu_run)
	push %_ASM_BP
	mov  %_ASM_SP, %_ASM_BP
#ifdef CONFIG_X86_64
	push %r15
	push %r14
	push %r13
	push %r12
#else
	push %edi
	push %esi
#endif
	push %_ASM_BX
	/*
	 * Save variables needed after vmexit on the stack, in inverse
	 * order compared to when they are needed.
	 */
	/* Accessed directly from the stack in RESTORE_HOST_SPEC_CTRL.  */
	push %_ASM_ARG2
	/* Needed to restore access to percpu variables.  */
	__ASM_SIZE(push) PER_CPU_VAR(svm_data + SD_save_area_pa)
	/* Finally save @svm. */
	push %_ASM_ARG1
.ifnc _ASM_ARG1, _ASM_DI
	/*
	 * Stash @svm in RDI early. On 32-bit, arguments are in RAX, RCX
	 * and RDX which are clobbered by RESTORE_GUEST_SPEC_CTRL.
	 */
	mov %_ASM_ARG1, %_ASM_DI
.endif
	/* Clobbers RAX, RCX, RDX.  */
	RESTORE_GUEST_SPEC_CTRL
	/*
	 * Use a single vmcb (vmcb01 because it's always valid) for
	 * context switching guest state via VMLOAD/VMSAVE, that way
	 * the state doesn't need to be copied between vmcb01 and
	 * vmcb02 when switching vmcbs for nested virtualization.
	 */
	mov SVM_vmcb01_pa(%_ASM_DI), %_ASM_AX
1:	vmload %_ASM_AX
2:
	/* Get svm->current_vmcb->pa into RAX. */
	mov SVM_current_vmcb(%_ASM_DI), %_ASM_AX
	mov KVM_VMCB_pa(%_ASM_AX), %_ASM_AX
	/* Load guest registers. */
	mov VCPU_RCX(%_ASM_DI), %_ASM_CX
	mov VCPU_RDX(%_ASM_DI), %_ASM_DX
	mov VCPU_RBX(%_ASM_DI), %_ASM_BX
	mov VCPU_RBP(%_ASM_DI), %_ASM_BP
	mov VCPU_RSI(%_ASM_DI), %_ASM_SI
#ifdef CONFIG_X86_64
	mov VCPU_R8 (%_ASM_DI),  %r8
	mov VCPU_R9 (%_ASM_DI),  %r9
	mov VCPU_R10(%_ASM_DI), %r10
	mov VCPU_R11(%_ASM_DI), %r11
	mov VCPU_R12(%_ASM_DI), %r12
	mov VCPU_R13(%_ASM_DI), %r13
	mov VCPU_R14(%_ASM_DI), %r14
	mov VCPU_R15(%_ASM_DI), %r15
#endif
	mov VCPU_RDI(%_ASM_DI), %_ASM_DI
	/* Enter guest mode */
	sti
3:	vmrun %_ASM_AX
4:
	cli
	/* Pop @svm to RAX while it's the only available register. */
	pop %_ASM_AX
	/* Save all guest registers.  */
	mov %_ASM_CX,   VCPU_RCX(%_ASM_AX)
	mov %_ASM_DX,   VCPU_RDX(%_ASM_AX)
	mov %_ASM_BX,   VCPU_RBX(%_ASM_AX)
	mov %_ASM_BP,   VCPU_RBP(%_ASM_AX)
	mov %_ASM_SI,   VCPU_RSI(%_ASM_AX)
	mov %_ASM_DI,   VCPU_RDI(%_ASM_AX)
#ifdef CONFIG_X86_64
	mov %r8,  VCPU_R8 (%_ASM_AX)
	mov %r9,  VCPU_R9 (%_ASM_AX)
	mov %r10, VCPU_R10(%_ASM_AX)
	mov %r11, VCPU_R11(%_ASM_AX)
	mov %r12, VCPU_R12(%_ASM_AX)
	mov %r13, VCPU_R13(%_ASM_AX)
	mov %r14, VCPU_R14(%_ASM_AX)
	mov %r15, VCPU_R15(%_ASM_AX)
#endif
	/* @svm can stay in RDI from now on.  */
	mov %_ASM_AX, %_ASM_DI
	mov SVM_vmcb01_pa(%_ASM_DI), %_ASM_AX
5:	vmsave %_ASM_AX
6:
	/* Restores GSBASE among other things, allowing access to percpu data.  */
	pop %_ASM_AX
7:	vmload %_ASM_AX
8:
	/* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */
	FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT
	/* Clobbers RAX, RCX, RDX.  */
	RESTORE_HOST_SPEC_CTRL
	/*
	 * Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be
	 * untrained as soon as we exit the VM and are back to the
	 * kernel. This should be done before re-enabling interrupts
	 * because interrupt handlers won't sanitize 'ret' if the return is
	 * from the kernel.
	 */
	UNTRAIN_RET_VM
	/*
	 * Clear all general purpose registers except RSP and RAX to prevent
	 * speculative use of the guest's values, even those that are reloaded
	 * via the stack.  In theory, an L1 cache miss when restoring registers
	 * could lead to speculative execution with the guest's values.
	 * Zeroing XORs are dirt cheap, i.e. the extra paranoia is essentially
	 * free.  RSP and RAX are exempt as they are restored by hardware
	 * during VM-Exit.
	 */
	xor %ecx, %ecx
	xor %edx, %edx
	xor %ebx, %ebx
	xor %ebp, %ebp
	xor %esi, %esi
	xor %edi, %edi
#ifdef CONFIG_X86_64
	xor %r8d,  %r8d
	xor %r9d,  %r9d
	xor %r10d, %r10d
	xor %r11d, %r11d
	xor %r12d, %r12d
	xor %r13d, %r13d
	xor %r14d, %r14d
	xor %r15d, %r15d
#endif
	/* "Pop" @spec_ctrl_intercepted.  */
	pop %_ASM_BX
	pop %_ASM_BX
#ifdef CONFIG_X86_64
	pop %r12
	pop %r13
	pop %r14
	pop %r15
#else
	pop %esi
	pop %edi
#endif
	pop %_ASM_BP
	RET
	RESTORE_GUEST_SPEC_CTRL_BODY
	RESTORE_HOST_SPEC_CTRL_BODY (%_ASM_SP)
10:	cmpb $0, _ASM_RIP(kvm_rebooting)
	jne 2b
	ud2
30:	cmpb $0, _ASM_RIP(kvm_rebooting)
	jne 4b
	ud2
50:	cmpb $0, _ASM_RIP(kvm_rebooting)
	jne 6b
	ud2
70:	cmpb $0, _ASM_RIP(kvm_rebooting)
	jne 8b
	ud2
	_ASM_EXTABLE(1b, 10b)
	_ASM_EXTABLE(3b, 30b)
	_ASM_EXTABLE(5b, 50b)
	_ASM_EXTABLE(7b, 70b)
SYM_FUNC_END(__svm_vcpu_run)
#ifdef CONFIG_KVM_AMD_SEV
#ifdef CONFIG_X86_64
#define SEV_ES_GPRS_BASE 0x300
#define SEV_ES_RBX	(SEV_ES_GPRS_BASE + __VCPU_REGS_RBX * WORD_SIZE)
#define SEV_ES_RBP	(SEV_ES_GPRS_BASE + __VCPU_REGS_RBP * WORD_SIZE)
#define SEV_ES_RSI	(SEV_ES_GPRS_BASE + __VCPU_REGS_RSI * WORD_SIZE)
#define SEV_ES_RDI	(SEV_ES_GPRS_BASE + __VCPU_REGS_RDI * WORD_SIZE)
#define SEV_ES_R12	(SEV_ES_GPRS_BASE + __VCPU_REGS_R12 * WORD_SIZE)
#define SEV_ES_R13	(SEV_ES_GPRS_BASE + __VCPU_REGS_R13 * WORD_SIZE)
#define SEV_ES_R14	(SEV_ES_GPRS_BASE + __VCPU_REGS_R14 * WORD_SIZE)
#define SEV_ES_R15	(SEV_ES_GPRS_BASE + __VCPU_REGS_R15 * WORD_SIZE)
#endif
/**
 * __svm_sev_es_vcpu_run - Run a SEV-ES vCPU via a transition to SVM guest mode
 * @svm:	struct vcpu_svm *
 * @spec_ctrl_intercepted: bool
 */
SYM_FUNC_START(__svm_sev_es_vcpu_run)
	FRAME_BEGIN
	/*
	 * Save non-volatile (callee-saved) registers to the host save area.
	 * Except for RAX and RSP, all GPRs are restored on #VMEXIT, but not
	 * saved on VMRUN.
	 */
	mov %rbp, SEV_ES_RBP (%rdx)
	mov %r15, SEV_ES_R15 (%rdx)
	mov %r14, SEV_ES_R14 (%rdx)
	mov %r13, SEV_ES_R13 (%rdx)
	mov %r12, SEV_ES_R12 (%rdx)
	mov %rbx, SEV_ES_RBX (%rdx)
	/*
	 * Save volatile registers that hold arguments that are needed after
	 * #VMEXIT (RDI=@svm and RSI=@spec_ctrl_intercepted).
	 */
	mov %rdi, SEV_ES_RDI (%rdx)
	mov %rsi, SEV_ES_RSI (%rdx)
	/* Clobbers RAX, RCX, RDX (@hostsa). */
	RESTORE_GUEST_SPEC_CTRL
	/* Get svm->current_vmcb->pa into RAX. */
	mov SVM_current_vmcb(%rdi), %rax
	mov KVM_VMCB_pa(%rax), %rax
	/* Enter guest mode */
	sti
1:	vmrun %rax
2:	cli
	/* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */
	FILL_RETURN_BUFFER %rax, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT
	/* Clobbers RAX, RCX, RDX, consumes RDI (@svm) and RSI (@spec_ctrl_intercepted). */
	RESTORE_HOST_SPEC_CTRL
	/*
	 * Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be
	 * untrained as soon as we exit the VM and are back to the
	 * kernel. This should be done before re-enabling interrupts
	 * because interrupt handlers won't sanitize RET if the return is
	 * from the kernel.
	 */
	UNTRAIN_RET_VM
	FRAME_END
	RET
	RESTORE_GUEST_SPEC_CTRL_BODY
	RESTORE_HOST_SPEC_CTRL_BODY %sil
3:	cmpb $0, kvm_rebooting(%rip)
	jne 2b
	ud2
	_ASM_EXTABLE(1b, 3b)
SYM_FUNC_END(__svm_sev_es_vcpu_run)
#endif /* CONFIG_KVM_AMD_SEV */
 |