summaryrefslogtreecommitdiff
path: root/arch/s390
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390')
-rw-r--r--arch/s390/include/asm/kvm_host.h18
-rw-r--r--arch/s390/include/asm/perf_event.h10
-rw-r--r--arch/s390/include/asm/pgtable.h83
-rw-r--r--arch/s390/kernel/asm-offsets.c3
-rw-r--r--arch/s390/kernel/entry64.S81
-rw-r--r--arch/s390/kernel/perf_event.c52
-rw-r--r--arch/s390/kernel/s390_ksyms.c1
-rw-r--r--arch/s390/kvm/Makefile3
-rw-r--r--arch/s390/kvm/diag.c3
-rw-r--r--arch/s390/kvm/intercept.c124
-rw-r--r--arch/s390/kvm/interrupt.c18
-rw-r--r--arch/s390/kvm/kvm-s390.c105
-rw-r--r--arch/s390/kvm/kvm-s390.h14
-rw-r--r--arch/s390/kvm/priv.c274
-rw-r--r--arch/s390/kvm/sigp.c19
-rw-r--r--arch/s390/mm/pgtable.c2
16 files changed, 527 insertions, 283 deletions
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 16bd5d169cdb..3238d4004e84 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -62,13 +62,20 @@ struct sca_block {
#define CPUSTAT_MCDS 0x00000100
#define CPUSTAT_SM 0x00000080
#define CPUSTAT_G 0x00000008
+#define CPUSTAT_GED 0x00000004
#define CPUSTAT_J 0x00000002
#define CPUSTAT_P 0x00000001
struct kvm_s390_sie_block {
atomic_t cpuflags; /* 0x0000 */
__u32 prefix; /* 0x0004 */
- __u8 reserved8[32]; /* 0x0008 */
+ __u8 reserved08[4]; /* 0x0008 */
+#define PROG_IN_SIE (1<<0)
+ __u32 prog0c; /* 0x000c */
+ __u8 reserved10[16]; /* 0x0010 */
+#define PROG_BLOCK_SIE 0x00000001
+ atomic_t prog20; /* 0x0020 */
+ __u8 reserved24[4]; /* 0x0024 */
__u64 cputm; /* 0x0028 */
__u64 ckc; /* 0x0030 */
__u64 epoch; /* 0x0038 */
@@ -90,7 +97,8 @@ struct kvm_s390_sie_block {
__u32 scaoh; /* 0x005c */
__u8 reserved60; /* 0x0060 */
__u8 ecb; /* 0x0061 */
- __u8 reserved62[2]; /* 0x0062 */
+ __u8 ecb2; /* 0x0062 */
+ __u8 reserved63[1]; /* 0x0063 */
__u32 scaol; /* 0x0064 */
__u8 reserved68[4]; /* 0x0068 */
__u32 todpr; /* 0x006c */
@@ -130,6 +138,7 @@ struct kvm_vcpu_stat {
u32 deliver_program_int;
u32 deliver_io_int;
u32 exit_wait_state;
+ u32 instruction_pfmf;
u32 instruction_stidp;
u32 instruction_spx;
u32 instruction_stpx;
@@ -166,7 +175,7 @@ struct kvm_s390_ext_info {
};
#define PGM_OPERATION 0x01
-#define PGM_PRIVILEGED_OPERATION 0x02
+#define PGM_PRIVILEGED_OP 0x02
#define PGM_EXECUTE 0x03
#define PGM_PROTECTION 0x04
#define PGM_ADDRESSING 0x05
@@ -219,7 +228,7 @@ struct kvm_s390_local_interrupt {
atomic_t active;
struct kvm_s390_float_interrupt *float_int;
int timer_due; /* event indicator for waitqueue below */
- wait_queue_head_t wq;
+ wait_queue_head_t *wq;
atomic_t *cpuflags;
unsigned int action_bits;
};
@@ -266,4 +275,5 @@ struct kvm_arch{
};
extern int sie64a(struct kvm_s390_sie_block *, u64 *);
+extern char sie_exit;
#endif
diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h
index 5f0173a31693..1141fb3e7b21 100644
--- a/arch/s390/include/asm/perf_event.h
+++ b/arch/s390/include/asm/perf_event.h
@@ -14,3 +14,13 @@
/* Per-CPU flags for PMU states */
#define PMU_F_RESERVED 0x1000
#define PMU_F_ENABLED 0x2000
+
+#ifdef CONFIG_64BIT
+
+/* Perf callbacks */
+struct pt_regs;
+extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
+extern unsigned long perf_misc_flags(struct pt_regs *regs);
+#define perf_misc_flags(regs) perf_misc_flags(regs)
+
+#endif /* CONFIG_64BIT */
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 9aefa3c64eb2..0ea4e591fa78 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -296,18 +296,16 @@ extern unsigned long MODULES_END;
#define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INV)
/* Page status table bits for virtualization */
-#define RCP_ACC_BITS 0xf0000000UL
-#define RCP_FP_BIT 0x08000000UL
-#define RCP_PCL_BIT 0x00800000UL
-#define RCP_HR_BIT 0x00400000UL
-#define RCP_HC_BIT 0x00200000UL
-#define RCP_GR_BIT 0x00040000UL
-#define RCP_GC_BIT 0x00020000UL
-#define RCP_IN_BIT 0x00002000UL /* IPTE notify bit */
-
-/* User dirty / referenced bit for KVM's migration feature */
-#define KVM_UR_BIT 0x00008000UL
-#define KVM_UC_BIT 0x00004000UL
+#define PGSTE_ACC_BITS 0xf0000000UL
+#define PGSTE_FP_BIT 0x08000000UL
+#define PGSTE_PCL_BIT 0x00800000UL
+#define PGSTE_HR_BIT 0x00400000UL
+#define PGSTE_HC_BIT 0x00200000UL
+#define PGSTE_GR_BIT 0x00040000UL
+#define PGSTE_GC_BIT 0x00020000UL
+#define PGSTE_UR_BIT 0x00008000UL
+#define PGSTE_UC_BIT 0x00004000UL /* user dirty (migration) */
+#define PGSTE_IN_BIT 0x00002000UL /* IPTE notify bit */
#else /* CONFIG_64BIT */
@@ -364,18 +362,16 @@ extern unsigned long MODULES_END;
| _SEGMENT_ENTRY_SPLIT | _SEGMENT_ENTRY_CO)
/* Page status table bits for virtualization */
-#define RCP_ACC_BITS 0xf000000000000000UL
-#define RCP_FP_BIT 0x0800000000000000UL
-#define RCP_PCL_BIT 0x0080000000000000UL
-#define RCP_HR_BIT 0x0040000000000000UL
-#define RCP_HC_BIT 0x0020000000000000UL
-#define RCP_GR_BIT 0x0004000000000000UL
-#define RCP_GC_BIT 0x0002000000000000UL
-#define RCP_IN_BIT 0x0000200000000000UL /* IPTE notify bit */
-
-/* User dirty / referenced bit for KVM's migration feature */
-#define KVM_UR_BIT 0x0000800000000000UL
-#define KVM_UC_BIT 0x0000400000000000UL
+#define PGSTE_ACC_BITS 0xf000000000000000UL
+#define PGSTE_FP_BIT 0x0800000000000000UL
+#define PGSTE_PCL_BIT 0x0080000000000000UL
+#define PGSTE_HR_BIT 0x0040000000000000UL
+#define PGSTE_HC_BIT 0x0020000000000000UL
+#define PGSTE_GR_BIT 0x0004000000000000UL
+#define PGSTE_GC_BIT 0x0002000000000000UL
+#define PGSTE_UR_BIT 0x0000800000000000UL
+#define PGSTE_UC_BIT 0x0000400000000000UL /* user dirty (migration) */
+#define PGSTE_IN_BIT 0x0000200000000000UL /* IPTE notify bit */
#endif /* CONFIG_64BIT */
@@ -615,8 +611,8 @@ static inline pgste_t pgste_get_lock(pte_t *ptep)
asm(
" lg %0,%2\n"
"0: lgr %1,%0\n"
- " nihh %0,0xff7f\n" /* clear RCP_PCL_BIT in old */
- " oihh %1,0x0080\n" /* set RCP_PCL_BIT in new */
+ " nihh %0,0xff7f\n" /* clear PCL bit in old */
+ " oihh %1,0x0080\n" /* set PCL bit in new */
" csg %0,%1,%2\n"
" jl 0b\n"
: "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE])
@@ -629,7 +625,7 @@ static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste)
{
#ifdef CONFIG_PGSTE
asm(
- " nihh %1,0xff7f\n" /* clear RCP_PCL_BIT */
+ " nihh %1,0xff7f\n" /* clear PCL bit */
" stg %1,%0\n"
: "=Q" (ptep[PTRS_PER_PTE])
: "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE])
@@ -662,14 +658,14 @@ static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste)
else if (bits)
page_reset_referenced(address);
/* Transfer page changed & referenced bit to guest bits in pgste */
- pgste_val(pgste) |= bits << 48; /* RCP_GR_BIT & RCP_GC_BIT */
+ pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */
/* Get host changed & referenced bits from pgste */
- bits |= (pgste_val(pgste) & (RCP_HR_BIT | RCP_HC_BIT)) >> 52;
+ bits |= (pgste_val(pgste) & (PGSTE_HR_BIT | PGSTE_HC_BIT)) >> 52;
/* Transfer page changed & referenced bit to kvm user bits */
- pgste_val(pgste) |= bits << 45; /* KVM_UR_BIT & KVM_UC_BIT */
+ pgste_val(pgste) |= bits << 45; /* PGSTE_UR_BIT & PGSTE_UC_BIT */
/* Clear relevant host bits in pgste. */
- pgste_val(pgste) &= ~(RCP_HR_BIT | RCP_HC_BIT);
- pgste_val(pgste) &= ~(RCP_ACC_BITS | RCP_FP_BIT);
+ pgste_val(pgste) &= ~(PGSTE_HR_BIT | PGSTE_HC_BIT);
+ pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT);
/* Copy page access key and fetch protection bit to pgste */
pgste_val(pgste) |=
(unsigned long) (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
@@ -690,15 +686,15 @@ static inline pgste_t pgste_update_young(pte_t *ptep, pgste_t pgste)
/* Get referenced bit from storage key */
young = page_reset_referenced(pte_val(*ptep) & PAGE_MASK);
if (young)
- pgste_val(pgste) |= RCP_GR_BIT;
+ pgste_val(pgste) |= PGSTE_GR_BIT;
/* Get host referenced bit from pgste */
- if (pgste_val(pgste) & RCP_HR_BIT) {
- pgste_val(pgste) &= ~RCP_HR_BIT;
+ if (pgste_val(pgste) & PGSTE_HR_BIT) {
+ pgste_val(pgste) &= ~PGSTE_HR_BIT;
young = 1;
}
/* Transfer referenced bit to kvm user bits and pte */
if (young) {
- pgste_val(pgste) |= KVM_UR_BIT;
+ pgste_val(pgste) |= PGSTE_UR_BIT;
pte_val(*ptep) |= _PAGE_SWR;
}
#endif
@@ -720,7 +716,7 @@ static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry)
* The guest C/R information is still in the PGSTE, set real
* key C/R to 0.
*/
- nkey = (pgste_val(pgste) & (RCP_ACC_BITS | RCP_FP_BIT)) >> 56;
+ nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
page_set_storage_key(address, nkey, 0);
#endif
}
@@ -750,6 +746,7 @@ struct gmap {
struct mm_struct *mm;
unsigned long *table;
unsigned long asce;
+ void *private;
struct list_head crst_list;
};
@@ -808,8 +805,8 @@ static inline pgste_t pgste_ipte_notify(struct mm_struct *mm,
pte_t *ptep, pgste_t pgste)
{
#ifdef CONFIG_PGSTE
- if (pgste_val(pgste) & RCP_IN_BIT) {
- pgste_val(pgste) &= ~RCP_IN_BIT;
+ if (pgste_val(pgste) & PGSTE_IN_BIT) {
+ pgste_val(pgste) &= ~PGSTE_IN_BIT;
gmap_do_ipte_notify(mm, addr, ptep);
}
#endif
@@ -977,8 +974,8 @@ static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm,
if (mm_has_pgste(mm)) {
pgste = pgste_get_lock(ptep);
pgste = pgste_update_all(ptep, pgste);
- dirty = !!(pgste_val(pgste) & KVM_UC_BIT);
- pgste_val(pgste) &= ~KVM_UC_BIT;
+ dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT);
+ pgste_val(pgste) &= ~PGSTE_UC_BIT;
pgste_set_unlock(ptep, pgste);
return dirty;
}
@@ -997,8 +994,8 @@ static inline int ptep_test_and_clear_user_young(struct mm_struct *mm,
if (mm_has_pgste(mm)) {
pgste = pgste_get_lock(ptep);
pgste = pgste_update_young(ptep, pgste);
- young = !!(pgste_val(pgste) & KVM_UR_BIT);
- pgste_val(pgste) &= ~KVM_UR_BIT;
+ young = !!(pgste_val(pgste) & PGSTE_UR_BIT);
+ pgste_val(pgste) &= ~PGSTE_UR_BIT;
pgste_set_unlock(ptep, pgste);
}
return young;
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index d6de844bc30a..2416138ebd3e 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -7,6 +7,7 @@
#define ASM_OFFSETS_C
#include <linux/kbuild.h>
+#include <linux/kvm_host.h>
#include <linux/sched.h>
#include <asm/cputime.h>
#include <asm/vdso.h>
@@ -162,6 +163,8 @@ int main(void)
DEFINE(__LC_PGM_TDB, offsetof(struct _lowcore, pgm_tdb));
DEFINE(__THREAD_trap_tdb, offsetof(struct task_struct, thread.trap_tdb));
DEFINE(__GMAP_ASCE, offsetof(struct gmap, asce));
+ DEFINE(__SIE_PROG0C, offsetof(struct kvm_s390_sie_block, prog0c));
+ DEFINE(__SIE_PROG20, offsetof(struct kvm_s390_sie_block, prog20));
#endif /* CONFIG_32BIT */
return 0;
}
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index bc5864c5148b..1c039d0c24c7 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -47,7 +47,6 @@ _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
_TIF_MCCK_PENDING)
_TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
_TIF_SYSCALL_TRACEPOINT)
-_TIF_EXIT_SIE = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | _TIF_MCCK_PENDING)
#define BASED(name) name-system_call(%r13)
@@ -81,23 +80,27 @@ _TIF_EXIT_SIE = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | _TIF_MCCK_PENDING)
#endif
.endm
- .macro HANDLE_SIE_INTERCEPT scratch,pgmcheck
+ .macro HANDLE_SIE_INTERCEPT scratch,reason
#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE)
tmhh %r8,0x0001 # interrupting from user ?
- jnz .+42
+ jnz .+62
lgr \scratch,%r9
- slg \scratch,BASED(.Lsie_loop)
- clg \scratch,BASED(.Lsie_length)
- .if \pgmcheck
+ slg \scratch,BASED(.Lsie_critical)
+ clg \scratch,BASED(.Lsie_critical_length)
+ .if \reason==1
# Some program interrupts are suppressing (e.g. protection).
# We must also check the instruction after SIE in that case.
# do_protection_exception will rewind to rewind_pad
- jh .+22
+ jh .+42
.else
- jhe .+22
+ jhe .+42
.endif
- lg %r9,BASED(.Lsie_loop)
- LPP BASED(.Lhost_id) # set host id
+ lg %r14,__SF_EMPTY(%r15) # get control block pointer
+ LPP __SF_EMPTY+16(%r15) # set host id
+ ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE
+ lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
+ larl %r9,sie_exit # skip forward to sie_exit
+ mvi __SF_EMPTY+31(%r15),\reason # set exit reason
#endif
.endm
@@ -450,7 +453,7 @@ ENTRY(io_int_handler)
lg %r12,__LC_THREAD_INFO
larl %r13,system_call
lmg %r8,%r9,__LC_IO_OLD_PSW
- HANDLE_SIE_INTERCEPT %r14,0
+ HANDLE_SIE_INTERCEPT %r14,2
SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT
tmhh %r8,0x0001 # interrupting from user?
jz io_skip
@@ -603,7 +606,7 @@ ENTRY(ext_int_handler)
lg %r12,__LC_THREAD_INFO
larl %r13,system_call
lmg %r8,%r9,__LC_EXT_OLD_PSW
- HANDLE_SIE_INTERCEPT %r14,0
+ HANDLE_SIE_INTERCEPT %r14,3
SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT
tmhh %r8,0x0001 # interrupting from user ?
jz ext_skip
@@ -651,7 +654,7 @@ ENTRY(mcck_int_handler)
lg %r12,__LC_THREAD_INFO
larl %r13,system_call
lmg %r8,%r9,__LC_MCK_OLD_PSW
- HANDLE_SIE_INTERCEPT %r14,0
+ HANDLE_SIE_INTERCEPT %r14,4
tm __LC_MCCK_CODE,0x80 # system damage?
jo mcck_panic # yes -> rest of mcck code invalid
lghi %r14,__LC_CPU_TIMER_SAVE_AREA
@@ -945,56 +948,50 @@ ENTRY(sie64a)
stmg %r6,%r14,__SF_GPRS(%r15) # save kernel registers
stg %r2,__SF_EMPTY(%r15) # save control block pointer
stg %r3,__SF_EMPTY+8(%r15) # save guest register save area
- xc __SF_EMPTY+16(8,%r15),__SF_EMPTY+16(%r15) # host id == 0
+ xc __SF_EMPTY+16(16,%r15),__SF_EMPTY+16(%r15) # host id & reason
lmg %r0,%r13,0(%r3) # load guest gprs 0-13
-# some program checks are suppressing. C code (e.g. do_protection_exception)
-# will rewind the PSW by the ILC, which is 4 bytes in case of SIE. Other
-# instructions in the sie_loop should not cause program interrupts. So
-# lets use a nop (47 00 00 00) as a landing pad.
-# See also HANDLE_SIE_INTERCEPT
-rewind_pad:
- nop 0
-sie_loop:
- lg %r14,__LC_THREAD_INFO # pointer thread_info struct
- tm __TI_flags+7(%r14),_TIF_EXIT_SIE
- jnz sie_exit
lg %r14,__LC_GMAP # get gmap pointer
ltgr %r14,%r14
jz sie_gmap
lctlg %c1,%c1,__GMAP_ASCE(%r14) # load primary asce
sie_gmap:
lg %r14,__SF_EMPTY(%r15) # get control block pointer
+ oi __SIE_PROG0C+3(%r14),1 # we are going into SIE now
+ tm __SIE_PROG20+3(%r14),1 # last exit...
+ jnz sie_done
LPP __SF_EMPTY(%r15) # set guest id
sie 0(%r14)
sie_done:
LPP __SF_EMPTY+16(%r15) # set host id
- lg %r14,__LC_THREAD_INFO # pointer thread_info struct
-sie_exit:
+ ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE
lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
+# some program checks are suppressing. C code (e.g. do_protection_exception)
+# will rewind the PSW by the ILC, which is 4 bytes in case of SIE. Other
+# instructions beween sie64a and sie_done should not cause program
+# interrupts. So lets use a nop (47 00 00 00) as a landing pad.
+# See also HANDLE_SIE_INTERCEPT
+rewind_pad:
+ nop 0
+ .globl sie_exit
+sie_exit:
lg %r14,__SF_EMPTY+8(%r15) # load guest register save area
stmg %r0,%r13,0(%r14) # save guest gprs 0-13
lmg %r6,%r14,__SF_GPRS(%r15) # restore kernel registers
- lghi %r2,0
+ lg %r2,__SF_EMPTY+24(%r15) # return exit reason code
br %r14
sie_fault:
- lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
- lg %r14,__LC_THREAD_INFO # pointer thread_info struct
- lg %r14,__SF_EMPTY+8(%r15) # load guest register save area
- stmg %r0,%r13,0(%r14) # save guest gprs 0-13
- lmg %r6,%r14,__SF_GPRS(%r15) # restore kernel registers
- lghi %r2,-EFAULT
- br %r14
+ lghi %r14,-EFAULT
+ stg %r14,__SF_EMPTY+24(%r15) # set exit reason code
+ j sie_exit
.align 8
-.Lsie_loop:
- .quad sie_loop
-.Lsie_length:
- .quad sie_done - sie_loop
-.Lhost_id:
- .quad 0
+.Lsie_critical:
+ .quad sie_gmap
+.Lsie_critical_length:
+ .quad sie_done - sie_gmap
EX_TABLE(rewind_pad,sie_fault)
- EX_TABLE(sie_loop,sie_fault)
+ EX_TABLE(sie_exit,sie_fault)
#endif
.section .rodata, "a"
diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
index f58f37f66824..a6fc037671b1 100644
--- a/arch/s390/kernel/perf_event.c
+++ b/arch/s390/kernel/perf_event.c
@@ -13,6 +13,7 @@
#include <linux/kernel.h>
#include <linux/perf_event.h>
+#include <linux/kvm_host.h>
#include <linux/percpu.h>
#include <linux/export.h>
#include <asm/irq.h>
@@ -39,6 +40,57 @@ int perf_num_counters(void)
}
EXPORT_SYMBOL(perf_num_counters);
+static struct kvm_s390_sie_block *sie_block(struct pt_regs *regs)
+{
+ struct stack_frame *stack = (struct stack_frame *) regs->gprs[15];
+
+ if (!stack)
+ return NULL;
+
+ return (struct kvm_s390_sie_block *) stack->empty1[0];
+}
+
+static bool is_in_guest(struct pt_regs *regs)
+{
+ unsigned long ip = instruction_pointer(regs);
+
+ if (user_mode(regs))
+ return false;
+
+ return ip == (unsigned long) &sie_exit;
+}
+
+static unsigned long guest_is_user_mode(struct pt_regs *regs)
+{
+ return sie_block(regs)->gpsw.mask & PSW_MASK_PSTATE;
+}
+
+static unsigned long instruction_pointer_guest(struct pt_regs *regs)
+{
+ return sie_block(regs)->gpsw.addr & PSW_ADDR_INSN;
+}
+
+unsigned long perf_instruction_pointer(struct pt_regs *regs)
+{
+ return is_in_guest(regs) ? instruction_pointer_guest(regs)
+ : instruction_pointer(regs);
+}
+
+static unsigned long perf_misc_guest_flags(struct pt_regs *regs)
+{
+ return guest_is_user_mode(regs) ? PERF_RECORD_MISC_GUEST_USER
+ : PERF_RECORD_MISC_GUEST_KERNEL;
+}
+
+unsigned long perf_misc_flags(struct pt_regs *regs)
+{
+ if (is_in_guest(regs))
+ return perf_misc_guest_flags(regs);
+
+ return user_mode(regs) ? PERF_RECORD_MISC_USER
+ : PERF_RECORD_MISC_KERNEL;
+}
+
void perf_event_print_debug(void)
{
struct cpumf_ctr_info cf_info;
diff --git a/arch/s390/kernel/s390_ksyms.c b/arch/s390/kernel/s390_ksyms.c
index 9bdbcef1da9e..3bac589844a7 100644
--- a/arch/s390/kernel/s390_ksyms.c
+++ b/arch/s390/kernel/s390_ksyms.c
@@ -7,6 +7,7 @@ EXPORT_SYMBOL(_mcount);
#endif
#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE)
EXPORT_SYMBOL(sie64a);
+EXPORT_SYMBOL(sie_exit);
#endif
EXPORT_SYMBOL(memcpy);
EXPORT_SYMBOL(memset);
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
index 8fe9d65a4585..40b4c6470f88 100644
--- a/arch/s390/kvm/Makefile
+++ b/arch/s390/kvm/Makefile
@@ -6,7 +6,8 @@
# it under the terms of the GNU General Public License (version 2 only)
# as published by the Free Software Foundation.
-common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o eventfd.o)
+KVM := ../../../virt/kvm
+common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o
ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 1c01a9912989..3074475c8ae0 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -132,6 +132,9 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu)
{
int code = (vcpu->arch.sie_block->ipb & 0xfff0000) >> 16;
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
trace_kvm_s390_handle_diag(vcpu, code);
switch (code) {
case 0x10:
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index b7d1b2edeeb3..5ee56e5acc23 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -22,87 +22,6 @@
#include "trace.h"
#include "trace-s390.h"
-static int handle_lctlg(struct kvm_vcpu *vcpu)
-{
- int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
- int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
- u64 useraddr;
- int reg, rc;
-
- vcpu->stat.instruction_lctlg++;
-
- useraddr = kvm_s390_get_base_disp_rsy(vcpu);
-
- if (useraddr & 7)
- return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
-
- reg = reg1;
-
- VCPU_EVENT(vcpu, 5, "lctlg r1:%x, r3:%x, addr:%llx", reg1, reg3,
- useraddr);
- trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, useraddr);
-
- do {
- rc = get_guest(vcpu, vcpu->arch.sie_block->gcr[reg],
- (u64 __user *) useraddr);
- if (rc)
- return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
- useraddr += 8;
- if (reg == reg3)
- break;
- reg = (reg + 1) % 16;
- } while (1);
- return 0;
-}
-
-static int handle_lctl(struct kvm_vcpu *vcpu)
-{
- int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
- int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
- u64 useraddr;
- u32 val = 0;
- int reg, rc;
-
- vcpu->stat.instruction_lctl++;
-
- useraddr = kvm_s390_get_base_disp_rs(vcpu);
-
- if (useraddr & 3)
- return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
-
- VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x, addr:%llx", reg1, reg3,
- useraddr);
- trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, useraddr);
-
- reg = reg1;
- do {
- rc = get_guest(vcpu, val, (u32 __user *) useraddr);
- if (rc)
- return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
- vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul;
- vcpu->arch.sie_block->gcr[reg] |= val;
- useraddr += 4;
- if (reg == reg3)
- break;
- reg = (reg + 1) % 16;
- } while (1);
- return 0;
-}
-
-static const intercept_handler_t eb_handlers[256] = {
- [0x2f] = handle_lctlg,
- [0x8a] = kvm_s390_handle_priv_eb,
-};
-
-static int handle_eb(struct kvm_vcpu *vcpu)
-{
- intercept_handler_t handler;
-
- handler = eb_handlers[vcpu->arch.sie_block->ipb & 0xff];
- if (handler)
- return handler(vcpu);
- return -EOPNOTSUPP;
-}
static const intercept_handler_t instruction_handlers[256] = {
[0x01] = kvm_s390_handle_01,
@@ -110,10 +29,10 @@ static const intercept_handler_t instruction_handlers[256] = {
[0x83] = kvm_s390_handle_diag,
[0xae] = kvm_s390_handle_sigp,
[0xb2] = kvm_s390_handle_b2,
- [0xb7] = handle_lctl,
+ [0xb7] = kvm_s390_handle_lctl,
[0xb9] = kvm_s390_handle_b9,
[0xe5] = kvm_s390_handle_e5,
- [0xeb] = handle_eb,
+ [0xeb] = kvm_s390_handle_eb,
};
static int handle_noop(struct kvm_vcpu *vcpu)
@@ -174,47 +93,12 @@ static int handle_stop(struct kvm_vcpu *vcpu)
static int handle_validity(struct kvm_vcpu *vcpu)
{
- unsigned long vmaddr;
int viwhy = vcpu->arch.sie_block->ipb >> 16;
- int rc;
vcpu->stat.exit_validity++;
trace_kvm_s390_intercept_validity(vcpu, viwhy);
- if (viwhy == 0x37) {
- vmaddr = gmap_fault(vcpu->arch.sie_block->prefix,
- vcpu->arch.gmap);
- if (IS_ERR_VALUE(vmaddr)) {
- rc = -EOPNOTSUPP;
- goto out;
- }
- rc = fault_in_pages_writeable((char __user *) vmaddr,
- PAGE_SIZE);
- if (rc) {
- /* user will receive sigsegv, exit to user */
- rc = -EOPNOTSUPP;
- goto out;
- }
- vmaddr = gmap_fault(vcpu->arch.sie_block->prefix + PAGE_SIZE,
- vcpu->arch.gmap);
- if (IS_ERR_VALUE(vmaddr)) {
- rc = -EOPNOTSUPP;
- goto out;
- }
- rc = fault_in_pages_writeable((char __user *) vmaddr,
- PAGE_SIZE);
- if (rc) {
- /* user will receive sigsegv, exit to user */
- rc = -EOPNOTSUPP;
- goto out;
- }
- } else
- rc = -EOPNOTSUPP;
-
-out:
- if (rc)
- VCPU_EVENT(vcpu, 2, "unhandled validity intercept code %d",
- viwhy);
- return rc;
+ WARN_ONCE(true, "kvm: unhandled validity intercept 0x%x\n", viwhy);
+ return -EOPNOTSUPP;
}
static int handle_instruction(struct kvm_vcpu *vcpu)
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 5c948177529e..7f35cb33e510 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -438,7 +438,7 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
no_timer:
spin_lock(&vcpu->arch.local_int.float_int->lock);
spin_lock_bh(&vcpu->arch.local_int.lock);
- add_wait_queue(&vcpu->arch.local_int.wq, &wait);
+ add_wait_queue(&vcpu->wq, &wait);
while (list_empty(&vcpu->arch.local_int.list) &&
list_empty(&vcpu->arch.local_int.float_int->list) &&
(!vcpu->arch.local_int.timer_due) &&
@@ -452,7 +452,7 @@ no_timer:
}
__unset_cpu_idle(vcpu);
__set_current_state(TASK_RUNNING);
- remove_wait_queue(&vcpu->arch.local_int.wq, &wait);
+ remove_wait_queue(&vcpu->wq, &wait);
spin_unlock_bh(&vcpu->arch.local_int.lock);
spin_unlock(&vcpu->arch.local_int.float_int->lock);
hrtimer_try_to_cancel(&vcpu->arch.ckc_timer);
@@ -465,8 +465,8 @@ void kvm_s390_tasklet(unsigned long parm)
spin_lock(&vcpu->arch.local_int.lock);
vcpu->arch.local_int.timer_due = 1;
- if (waitqueue_active(&vcpu->arch.local_int.wq))
- wake_up_interruptible(&vcpu->arch.local_int.wq);
+ if (waitqueue_active(&vcpu->wq))
+ wake_up_interruptible(&vcpu->wq);
spin_unlock(&vcpu->arch.local_int.lock);
}
@@ -613,7 +613,7 @@ int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
spin_lock_bh(&li->lock);
list_add(&inti->list, &li->list);
atomic_set(&li->active, 1);
- BUG_ON(waitqueue_active(&li->wq));
+ BUG_ON(waitqueue_active(li->wq));
spin_unlock_bh(&li->lock);
return 0;
}
@@ -746,8 +746,8 @@ int kvm_s390_inject_vm(struct kvm *kvm,
li = fi->local_int[sigcpu];
spin_lock_bh(&li->lock);
atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
- if (waitqueue_active(&li->wq))
- wake_up_interruptible(&li->wq);
+ if (waitqueue_active(li->wq))
+ wake_up_interruptible(li->wq);
spin_unlock_bh(&li->lock);
spin_unlock(&fi->lock);
mutex_unlock(&kvm->lock);
@@ -832,8 +832,8 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
if (inti->type == KVM_S390_SIGP_STOP)
li->action_bits |= ACTION_STOP_ON_STOP;
atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
- if (waitqueue_active(&li->wq))
- wake_up_interruptible(&vcpu->arch.local_int.wq);
+ if (waitqueue_active(&vcpu->wq))
+ wake_up_interruptible(&vcpu->wq);
spin_unlock_bh(&li->lock);
mutex_unlock(&vcpu->kvm->lock);
return 0;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index c1c7c683fa26..ba694d2ba51e 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -59,6 +59,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
{ "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
+ { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
{ "instruction_stidp", VCPU_STAT(instruction_stidp) },
{ "instruction_spx", VCPU_STAT(instruction_spx) },
{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
@@ -84,6 +85,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
};
static unsigned long long *facilities;
+static struct gmap_notifier gmap_notifier;
/* Section: not file related */
int kvm_arch_hardware_enable(void *garbage)
@@ -96,13 +98,18 @@ void kvm_arch_hardware_disable(void *garbage)
{
}
+static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
+
int kvm_arch_hardware_setup(void)
{
+ gmap_notifier.notifier_call = kvm_gmap_notifier;
+ gmap_register_ipte_notifier(&gmap_notifier);
return 0;
}
void kvm_arch_hardware_unsetup(void)
{
+ gmap_unregister_ipte_notifier(&gmap_notifier);
}
void kvm_arch_check_processor_compat(void *rtn)
@@ -239,6 +246,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm->arch.gmap = gmap_alloc(current->mm);
if (!kvm->arch.gmap)
goto out_nogmap;
+ kvm->arch.gmap->private = kvm;
}
kvm->arch.css_support = 0;
@@ -270,7 +278,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
free_page((unsigned long)(vcpu->arch.sie_block));
kvm_vcpu_uninit(vcpu);
- kfree(vcpu);
+ kmem_cache_free(kvm_vcpu_cache, vcpu);
}
static void kvm_free_vcpus(struct kvm *kvm)
@@ -309,6 +317,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
vcpu->arch.gmap = gmap_alloc(current->mm);
if (!vcpu->arch.gmap)
return -ENOMEM;
+ vcpu->arch.gmap->private = vcpu->kvm;
return 0;
}
@@ -373,8 +382,10 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
{
atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
CPUSTAT_SM |
- CPUSTAT_STOPPED);
+ CPUSTAT_STOPPED |
+ CPUSTAT_GED);
vcpu->arch.sie_block->ecb = 6;
+ vcpu->arch.sie_block->ecb2 = 8;
vcpu->arch.sie_block->eca = 0xC1002001U;
vcpu->arch.sie_block->fac = (int) (long) facilities;
hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
@@ -397,7 +408,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
rc = -ENOMEM;
- vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL);
+ vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
if (!vcpu)
goto out;
@@ -427,7 +438,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
vcpu->arch.local_int.float_int = &kvm->arch.float_int;
spin_lock(&kvm->arch.float_int.lock);
kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int;
- init_waitqueue_head(&vcpu->arch.local_int.wq);
+ vcpu->arch.local_int.wq = &vcpu->wq;
vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
spin_unlock(&kvm->arch.float_int.lock);
@@ -442,7 +453,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
out_free_sie_block:
free_page((unsigned long)(vcpu->arch.sie_block));
out_free_cpu:
- kfree(vcpu);
+ kmem_cache_free(kvm_vcpu_cache, vcpu);
out:
return ERR_PTR(rc);
}
@@ -454,6 +465,50 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
return 0;
}
+void s390_vcpu_block(struct kvm_vcpu *vcpu)
+{
+ atomic_set_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
+}
+
+void s390_vcpu_unblock(struct kvm_vcpu *vcpu)
+{
+ atomic_clear_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
+}
+
+/*
+ * Kick a guest cpu out of SIE and wait until SIE is not running.
+ * If the CPU is not running (e.g. waiting as idle) the function will
+ * return immediately. */
+void exit_sie(struct kvm_vcpu *vcpu)
+{
+ atomic_set_mask(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
+ while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
+ cpu_relax();
+}
+
+/* Kick a guest cpu out of SIE and prevent SIE-reentry */
+void exit_sie_sync(struct kvm_vcpu *vcpu)
+{
+ s390_vcpu_block(vcpu);
+ exit_sie(vcpu);
+}
+
+static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
+{
+ int i;
+ struct kvm *kvm = gmap->private;
+ struct kvm_vcpu *vcpu;
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ /* match against both prefix pages */
+ if (vcpu->arch.sie_block->prefix == (address & ~0x1000UL)) {
+ VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
+ kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
+ exit_sie_sync(vcpu);
+ }
+ }
+}
+
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
{
/* kvm common code refers to this, but never calls it */
@@ -606,6 +661,27 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
return -EINVAL; /* not implemented yet */
}
+static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
+{
+ /*
+ * We use MMU_RELOAD just to re-arm the ipte notifier for the
+ * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
+ * This ensures that the ipte instruction for this request has
+ * already finished. We might race against a second unmapper that
+ * wants to set the blocking bit. Lets just retry the request loop.
+ */
+ while (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
+ int rc;
+ rc = gmap_ipte_notify(vcpu->arch.gmap,
+ vcpu->arch.sie_block->prefix,
+ PAGE_SIZE * 2);
+ if (rc)
+ return rc;
+ s390_vcpu_unblock(vcpu);
+ }
+ return 0;
+}
+
static int __vcpu_run(struct kvm_vcpu *vcpu)
{
int rc;
@@ -621,6 +697,10 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
if (!kvm_is_ucontrol(vcpu->kvm))
kvm_s390_deliver_pending_interrupts(vcpu);
+ rc = kvm_s390_handle_requests(vcpu);
+ if (rc)
+ return rc;
+
vcpu->arch.sie_block->icptcode = 0;
preempt_disable();
kvm_guest_enter();
@@ -630,7 +710,9 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
trace_kvm_s390_sie_enter(vcpu,
atomic_read(&vcpu->arch.sie_block->cpuflags));
rc = sie64a(vcpu->arch.sie_block, vcpu->run->s.regs.gprs);
- if (rc) {
+ if (rc > 0)
+ rc = 0;
+ if (rc < 0) {
if (kvm_is_ucontrol(vcpu->kvm)) {
rc = SIE_INTERCEPT_UCONTROL;
} else {
@@ -1046,7 +1128,7 @@ static int __init kvm_s390_init(void)
return -ENOMEM;
}
memcpy(facilities, S390_lowcore.stfle_fac_list, 16);
- facilities[0] &= 0xff00fff3f47c0000ULL;
+ facilities[0] &= 0xff82fff3f47c0000ULL;
facilities[1] &= 0x001c000000000000ULL;
return 0;
}
@@ -1059,3 +1141,12 @@ static void __exit kvm_s390_exit(void)
module_init(kvm_s390_init);
module_exit(kvm_s390_exit);
+
+/*
+ * Enable autoloading of the kvm module.
+ * Note that we add the module alias here instead of virt/kvm/kvm_main.c
+ * since x86 takes a different approach.
+ */
+#include <linux/miscdevice.h>
+MODULE_ALIAS_MISCDEV(KVM_MINOR);
+MODULE_ALIAS("devname:kvm");
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index efc14f687265..028ca9fd2158 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -63,6 +63,7 @@ static inline void kvm_s390_set_prefix(struct kvm_vcpu *vcpu, u32 prefix)
{
vcpu->arch.sie_block->prefix = prefix & 0x7fffe000u;
vcpu->arch.sie_block->ihcpu = 0xffff;
+ kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
}
static inline u64 kvm_s390_get_base_disp_s(struct kvm_vcpu *vcpu)
@@ -85,6 +86,12 @@ static inline void kvm_s390_get_base_disp_sse(struct kvm_vcpu *vcpu,
*address2 = (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2;
}
+static inline void kvm_s390_get_regs_rre(struct kvm_vcpu *vcpu, int *r1, int *r2)
+{
+ *r1 = (vcpu->arch.sie_block->ipb & 0x00f00000) >> 20;
+ *r2 = (vcpu->arch.sie_block->ipb & 0x000f0000) >> 16;
+}
+
static inline u64 kvm_s390_get_base_disp_rsy(struct kvm_vcpu *vcpu)
{
u32 base2 = vcpu->arch.sie_block->ipb >> 28;
@@ -125,7 +132,8 @@ int kvm_s390_handle_e5(struct kvm_vcpu *vcpu);
int kvm_s390_handle_01(struct kvm_vcpu *vcpu);
int kvm_s390_handle_b9(struct kvm_vcpu *vcpu);
int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu);
-int kvm_s390_handle_priv_eb(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_eb(struct kvm_vcpu *vcpu);
/* implemented in sigp.c */
int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
@@ -133,6 +141,10 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
/* implemented in kvm-s390.c */
int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu,
unsigned long addr);
+void s390_vcpu_block(struct kvm_vcpu *vcpu);
+void s390_vcpu_unblock(struct kvm_vcpu *vcpu);
+void exit_sie(struct kvm_vcpu *vcpu);
+void exit_sie_sync(struct kvm_vcpu *vcpu);
/* implemented in diag.c */
int kvm_s390_handle_diag(struct kvm_vcpu *vcpu);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 6bbd7b5a0bbe..0da3e6eb6be6 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -1,7 +1,7 @@
/*
* handling privileged instructions
*
- * Copyright IBM Corp. 2008
+ * Copyright IBM Corp. 2008, 2013
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License (version 2 only)
@@ -20,6 +20,9 @@
#include <asm/debug.h>
#include <asm/ebcdic.h>
#include <asm/sysinfo.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/io.h>
#include <asm/ptrace.h>
#include <asm/compat.h>
#include "gaccess.h"
@@ -34,6 +37,9 @@ static int handle_set_prefix(struct kvm_vcpu *vcpu)
vcpu->stat.instruction_spx++;
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
operand2 = kvm_s390_get_base_disp_s(vcpu);
/* must be word boundary */
@@ -65,6 +71,9 @@ static int handle_store_prefix(struct kvm_vcpu *vcpu)
vcpu->stat.instruction_stpx++;
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
operand2 = kvm_s390_get_base_disp_s(vcpu);
/* must be word boundary */
@@ -89,6 +98,9 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu)
vcpu->stat.instruction_stap++;
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
useraddr = kvm_s390_get_base_disp_s(vcpu);
if (useraddr & 1)
@@ -105,7 +117,12 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu)
static int handle_skey(struct kvm_vcpu *vcpu)
{
vcpu->stat.instruction_storage_key++;
- vcpu->arch.sie_block->gpsw.addr -= 4;
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ vcpu->arch.sie_block->gpsw.addr =
+ __rewind_psw(vcpu->arch.sie_block->gpsw, 4);
VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation");
return 0;
}
@@ -129,9 +146,10 @@ static int handle_tpi(struct kvm_vcpu *vcpu)
* Store the two-word I/O interruption code into the
* provided area.
*/
- put_guest(vcpu, inti->io.subchannel_id, (u16 __user *) addr);
- put_guest(vcpu, inti->io.subchannel_nr, (u16 __user *) (addr + 2));
- put_guest(vcpu, inti->io.io_int_parm, (u32 __user *) (addr + 4));
+ if (put_guest(vcpu, inti->io.subchannel_id, (u16 __user *)addr)
+ || put_guest(vcpu, inti->io.subchannel_nr, (u16 __user *)(addr + 2))
+ || put_guest(vcpu, inti->io.io_int_parm, (u32 __user *)(addr + 4)))
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
} else {
/*
* Store the three-word I/O interruption code into
@@ -182,6 +200,9 @@ static int handle_io_inst(struct kvm_vcpu *vcpu)
{
VCPU_EVENT(vcpu, 4, "%s", "I/O instruction");
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
if (vcpu->kvm->arch.css_support) {
/*
* Most I/O instructions will be handled by userspace.
@@ -210,8 +231,12 @@ static int handle_stfl(struct kvm_vcpu *vcpu)
int rc;
vcpu->stat.instruction_stfl++;
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
/* only pass the facility bits, which we can handle */
- facility_list = S390_lowcore.stfl_fac_list & 0xff00fff3;
+ facility_list = S390_lowcore.stfl_fac_list & 0xff82fff3;
rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list),
&facility_list, sizeof(facility_list));
@@ -255,8 +280,8 @@ int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu)
u64 addr;
if (gpsw->mask & PSW_MASK_PSTATE)
- return kvm_s390_inject_program_int(vcpu,
- PGM_PRIVILEGED_OPERATION);
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
addr = kvm_s390_get_base_disp_s(vcpu);
if (addr & 7)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
@@ -278,6 +303,9 @@ static int handle_lpswe(struct kvm_vcpu *vcpu)
psw_t new_psw;
u64 addr;
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
addr = kvm_s390_get_base_disp_s(vcpu);
if (addr & 7)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
@@ -296,6 +324,9 @@ static int handle_stidp(struct kvm_vcpu *vcpu)
vcpu->stat.instruction_stidp++;
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
operand2 = kvm_s390_get_base_disp_s(vcpu);
if (operand2 & 7)
@@ -351,16 +382,30 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
vcpu->stat.instruction_stsi++;
VCPU_EVENT(vcpu, 4, "stsi: fc: %x sel1: %x sel2: %x", fc, sel1, sel2);
- operand2 = kvm_s390_get_base_disp_s(vcpu);
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ if (fc > 3) {
+ vcpu->arch.sie_block->gpsw.mask |= 3ul << 44; /* cc 3 */
+ return 0;
+ }
- if (operand2 & 0xfff && fc > 0)
+ if (vcpu->run->s.regs.gprs[0] & 0x0fffff00
+ || vcpu->run->s.regs.gprs[1] & 0xffff0000)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
- switch (fc) {
- case 0:
+ if (fc == 0) {
vcpu->run->s.regs.gprs[0] = 3 << 28;
- vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
+ vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); /* cc 0 */
return 0;
+ }
+
+ operand2 = kvm_s390_get_base_disp_s(vcpu);
+
+ if (operand2 & 0xfff)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ switch (fc) {
case 1: /* same handling for 1 and 2 */
case 2:
mem = get_zeroed_page(GFP_KERNEL);
@@ -377,8 +422,6 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
goto out_no_data;
handle_stsi_3_2_2(vcpu, (void *) mem);
break;
- default:
- goto out_no_data;
}
if (copy_to_guest_absolute(vcpu, operand2, (void *) mem, PAGE_SIZE)) {
@@ -432,20 +475,14 @@ int kvm_s390_handle_b2(struct kvm_vcpu *vcpu)
intercept_handler_t handler;
/*
- * a lot of B2 instructions are priviledged. We first check for
- * the privileged ones, that we can handle in the kernel. If the
- * kernel can handle this instruction, we check for the problem
- * state bit and (a) handle the instruction or (b) send a code 2
- * program check.
- * Anything else goes to userspace.*/
+ * A lot of B2 instructions are priviledged. Here we check for
+ * the privileged ones, that we can handle in the kernel.
+ * Anything else goes to userspace.
+ */
handler = b2_handlers[vcpu->arch.sie_block->ipa & 0x00ff];
- if (handler) {
- if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
- return kvm_s390_inject_program_int(vcpu,
- PGM_PRIVILEGED_OPERATION);
- else
- return handler(vcpu);
- }
+ if (handler)
+ return handler(vcpu);
+
return -EOPNOTSUPP;
}
@@ -453,8 +490,7 @@ static int handle_epsw(struct kvm_vcpu *vcpu)
{
int reg1, reg2;
- reg1 = (vcpu->arch.sie_block->ipb & 0x00f00000) >> 24;
- reg2 = (vcpu->arch.sie_block->ipb & 0x000f0000) >> 16;
+ kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
/* This basically extracts the mask half of the psw. */
vcpu->run->s.regs.gprs[reg1] &= 0xffffffff00000000;
@@ -467,9 +503,88 @@ static int handle_epsw(struct kvm_vcpu *vcpu)
return 0;
}
+#define PFMF_RESERVED 0xfffc0101UL
+#define PFMF_SK 0x00020000UL
+#define PFMF_CF 0x00010000UL
+#define PFMF_UI 0x00008000UL
+#define PFMF_FSC 0x00007000UL
+#define PFMF_NQ 0x00000800UL
+#define PFMF_MR 0x00000400UL
+#define PFMF_MC 0x00000200UL
+#define PFMF_KEY 0x000000feUL
+
+static int handle_pfmf(struct kvm_vcpu *vcpu)
+{
+ int reg1, reg2;
+ unsigned long start, end;
+
+ vcpu->stat.instruction_pfmf++;
+
+ kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
+
+ if (!MACHINE_HAS_PFMF)
+ return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ if (vcpu->run->s.regs.gprs[reg1] & PFMF_RESERVED)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ /* Only provide non-quiescing support if the host supports it */
+ if (vcpu->run->s.regs.gprs[reg1] & PFMF_NQ &&
+ S390_lowcore.stfl_fac_list & 0x00020000)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ /* No support for conditional-SSKE */
+ if (vcpu->run->s.regs.gprs[reg1] & (PFMF_MR | PFMF_MC))
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ start = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
+ switch (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) {
+ case 0x00000000:
+ end = (start + (1UL << 12)) & ~((1UL << 12) - 1);
+ break;
+ case 0x00001000:
+ end = (start + (1UL << 20)) & ~((1UL << 20) - 1);
+ break;
+ /* We dont support EDAT2
+ case 0x00002000:
+ end = (start + (1UL << 31)) & ~((1UL << 31) - 1);
+ break;*/
+ default:
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+ }
+ while (start < end) {
+ unsigned long useraddr;
+
+ useraddr = gmap_translate(start, vcpu->arch.gmap);
+ if (IS_ERR((void *)useraddr))
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+
+ if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) {
+ if (clear_user((void __user *)useraddr, PAGE_SIZE))
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ }
+
+ if (vcpu->run->s.regs.gprs[reg1] & PFMF_SK) {
+ if (set_guest_storage_key(current->mm, useraddr,
+ vcpu->run->s.regs.gprs[reg1] & PFMF_KEY,
+ vcpu->run->s.regs.gprs[reg1] & PFMF_NQ))
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ }
+
+ start += PAGE_SIZE;
+ }
+ if (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC)
+ vcpu->run->s.regs.gprs[reg2] = end;
+ return 0;
+}
+
static const intercept_handler_t b9_handlers[256] = {
[0x8d] = handle_epsw,
[0x9c] = handle_io_inst,
+ [0xaf] = handle_pfmf,
};
int kvm_s390_handle_b9(struct kvm_vcpu *vcpu)
@@ -478,29 +593,96 @@ int kvm_s390_handle_b9(struct kvm_vcpu *vcpu)
/* This is handled just as for the B2 instructions. */
handler = b9_handlers[vcpu->arch.sie_block->ipa & 0x00ff];
- if (handler) {
- if ((handler != handle_epsw) &&
- (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE))
- return kvm_s390_inject_program_int(vcpu,
- PGM_PRIVILEGED_OPERATION);
- else
- return handler(vcpu);
- }
+ if (handler)
+ return handler(vcpu);
+
return -EOPNOTSUPP;
}
+int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu)
+{
+ int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
+ int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
+ u64 useraddr;
+ u32 val = 0;
+ int reg, rc;
+
+ vcpu->stat.instruction_lctl++;
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ useraddr = kvm_s390_get_base_disp_rs(vcpu);
+
+ if (useraddr & 3)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x, addr:%llx", reg1, reg3,
+ useraddr);
+ trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, useraddr);
+
+ reg = reg1;
+ do {
+ rc = get_guest(vcpu, val, (u32 __user *) useraddr);
+ if (rc)
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul;
+ vcpu->arch.sie_block->gcr[reg] |= val;
+ useraddr += 4;
+ if (reg == reg3)
+ break;
+ reg = (reg + 1) % 16;
+ } while (1);
+
+ return 0;
+}
+
+static int handle_lctlg(struct kvm_vcpu *vcpu)
+{
+ int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
+ int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
+ u64 useraddr;
+ int reg, rc;
+
+ vcpu->stat.instruction_lctlg++;
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ useraddr = kvm_s390_get_base_disp_rsy(vcpu);
+
+ if (useraddr & 7)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ reg = reg1;
+
+ VCPU_EVENT(vcpu, 5, "lctlg r1:%x, r3:%x, addr:%llx", reg1, reg3,
+ useraddr);
+ trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, useraddr);
+
+ do {
+ rc = get_guest(vcpu, vcpu->arch.sie_block->gcr[reg],
+ (u64 __user *) useraddr);
+ if (rc)
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ useraddr += 8;
+ if (reg == reg3)
+ break;
+ reg = (reg + 1) % 16;
+ } while (1);
+
+ return 0;
+}
+
static const intercept_handler_t eb_handlers[256] = {
+ [0x2f] = handle_lctlg,
[0x8a] = handle_io_inst,
};
-int kvm_s390_handle_priv_eb(struct kvm_vcpu *vcpu)
+int kvm_s390_handle_eb(struct kvm_vcpu *vcpu)
{
intercept_handler_t handler;
- /* All eb instructions that end up here are privileged. */
- if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
- return kvm_s390_inject_program_int(vcpu,
- PGM_PRIVILEGED_OPERATION);
handler = eb_handlers[vcpu->arch.sie_block->ipb & 0xff];
if (handler)
return handler(vcpu);
@@ -515,6 +697,9 @@ static int handle_tprot(struct kvm_vcpu *vcpu)
vcpu->stat.instruction_tprot++;
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
kvm_s390_get_base_disp_sse(vcpu, &address1, &address2);
/* we only handle the Linux memory detection case:
@@ -560,8 +745,7 @@ static int handle_sckpf(struct kvm_vcpu *vcpu)
u32 value;
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
- return kvm_s390_inject_program_int(vcpu,
- PGM_PRIVILEGED_OPERATION);
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
if (vcpu->run->s.regs.gprs[0] & 0x00000000ffff0000)
return kvm_s390_inject_program_int(vcpu,
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index 1c48ab2845e0..bec398c57acf 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -79,8 +79,8 @@ static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr)
list_add_tail(&inti->list, &li->list);
atomic_set(&li->active, 1);
atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
- if (waitqueue_active(&li->wq))
- wake_up_interruptible(&li->wq);
+ if (waitqueue_active(li->wq))
+ wake_up_interruptible(li->wq);
spin_unlock_bh(&li->lock);
rc = SIGP_CC_ORDER_CODE_ACCEPTED;
VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr);
@@ -117,8 +117,8 @@ static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr)
list_add_tail(&inti->list, &li->list);
atomic_set(&li->active, 1);
atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
- if (waitqueue_active(&li->wq))
- wake_up_interruptible(&li->wq);
+ if (waitqueue_active(li->wq))
+ wake_up_interruptible(li->wq);
spin_unlock_bh(&li->lock);
rc = SIGP_CC_ORDER_CODE_ACCEPTED;
VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x", cpu_addr);
@@ -145,8 +145,8 @@ static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action)
atomic_set(&li->active, 1);
atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
li->action_bits |= action;
- if (waitqueue_active(&li->wq))
- wake_up_interruptible(&li->wq);
+ if (waitqueue_active(li->wq))
+ wake_up_interruptible(li->wq);
out:
spin_unlock_bh(&li->lock);
@@ -250,8 +250,8 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
list_add_tail(&inti->list, &li->list);
atomic_set(&li->active, 1);
- if (waitqueue_active(&li->wq))
- wake_up_interruptible(&li->wq);
+ if (waitqueue_active(li->wq))
+ wake_up_interruptible(li->wq);
rc = SIGP_CC_ORDER_CODE_ACCEPTED;
VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", cpu_addr, address);
@@ -333,8 +333,7 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
/* sigp in userspace can exit */
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
- return kvm_s390_inject_program_int(vcpu,
- PGM_PRIVILEGED_OPERATION);
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
order_code = kvm_s390_get_base_disp_rs(vcpu);
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 74c29d922458..17bf4d3d303a 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -689,7 +689,7 @@ int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len)
entry = *ptep;
if ((pte_val(entry) & (_PAGE_INVALID | _PAGE_RO)) == 0) {
pgste = pgste_get_lock(ptep);
- pgste_val(pgste) |= RCP_IN_BIT;
+ pgste_val(pgste) |= PGSTE_IN_BIT;
pgste_set_unlock(ptep, pgste);
start += PAGE_SIZE;
len -= PAGE_SIZE;