diff options
author | Ingo Molnar <mingo@kernel.org> | 2012-04-14 13:18:27 +0200 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2012-04-14 13:19:04 +0200 |
commit | 6ac1ef482d7ae0c690f1640bf6eb818ff9a2d91e (patch) | |
tree | 021cc9f6b477146fcebe6f3be4752abfa2ba18a9 /arch/s390 | |
parent | 682968e0c425c60f0dde37977e5beb2b12ddc4cc (diff) | |
parent | a385ec4f11bdcf81af094c03e2444ee9b7fad2e5 (diff) |
Merge branch 'perf/core' into perf/uprobes
Merge in latest upstream (and the latest perf development tree),
to prepare for tooling changes, and also to pick up v3.4 MM
changes that the uprobes code needs to take care of.
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/s390')
83 files changed, 3214 insertions, 1900 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 6d99a5fcc090..2b7c0fbe578e 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -64,6 +64,7 @@ config ARCH_SUPPORTS_DEBUG_PAGEALLOC config S390 def_bool y select USE_GENERIC_SMP_HELPERS if SMP + select GENERIC_CPU_DEVICES if !SMP select HAVE_SYSCALL_WRAPPERS select HAVE_FUNCTION_TRACER select HAVE_FUNCTION_TRACE_MCOUNT_TEST @@ -218,6 +219,7 @@ config COMPAT prompt "Kernel support for 31 bit emulation" depends on 64BIT select COMPAT_BINFMT_ELF + select ARCH_WANT_OLD_COMPAT_IPC help Select this option if you want to enable your system kernel to handle system-calls from ELF binaries for 31 bit ESA. This option diff --git a/arch/s390/crypto/crypt_s390.h b/arch/s390/crypto/crypt_s390.h index ffd1ac255f19..9178db6db0a5 100644 --- a/arch/s390/crypto/crypt_s390.h +++ b/arch/s390/crypto/crypt_s390.h @@ -17,6 +17,7 @@ #define _CRYPTO_ARCH_S390_CRYPT_S390_H #include <asm/errno.h> +#include <asm/facility.h> #define CRYPT_S390_OP_MASK 0xFF00 #define CRYPT_S390_FUNC_MASK 0x00FF diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index 8a2a887478cc..6a2cb560e968 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -293,11 +293,9 @@ static int hypfs_fill_super(struct super_block *sb, void *data, int silent) return -ENOMEM; root_inode->i_op = &simple_dir_inode_operations; root_inode->i_fop = &simple_dir_operations; - sb->s_root = root_dentry = d_alloc_root(root_inode); - if (!root_dentry) { - iput(root_inode); + sb->s_root = root_dentry = d_make_root(root_inode); + if (!root_dentry) return -ENOMEM; - } if (MACHINE_IS_VM) rc = hypfs_vm_create_files(sb, root_dentry); else diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h index 8517d2ae3b5c..748347baecb8 100644 --- a/arch/s390/include/asm/atomic.h +++ b/arch/s390/include/asm/atomic.h @@ -15,7 +15,7 @@ #include <linux/compiler.h> #include <linux/types.h> -#include <asm/system.h> +#include <asm/cmpxchg.h> #define ATOMIC_INIT(i) { (i) } diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h new file mode 100644 index 000000000000..451273ad4d34 --- /dev/null +++ b/arch/s390/include/asm/barrier.h @@ -0,0 +1,35 @@ +/* + * Copyright IBM Corp. 1999, 2009 + * + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#ifndef __ASM_BARRIER_H +#define __ASM_BARRIER_H + +/* + * Force strict CPU ordering. + * And yes, this is required on UP too when we're talking + * to devices. + * + * This is very similar to the ppc eieio/sync instruction in that is + * does a checkpoint syncronisation & makes sure that + * all memory ops have completed wrt other CPU's ( see 7-15 POP DJB ). + */ + +#define eieio() asm volatile("bcr 15,0" : : : "memory") +#define SYNC_OTHER_CORES(x) eieio() +#define mb() eieio() +#define rmb() eieio() +#define wmb() eieio() +#define read_barrier_depends() do { } while(0) +#define smp_mb() mb() +#define smp_rmb() rmb() +#define smp_wmb() wmb() +#define smp_read_barrier_depends() read_barrier_depends() +#define smp_mb__before_clear_bit() smp_mb() +#define smp_mb__after_clear_bit() smp_mb() + +#define set_mb(var, value) do { var = value; mb(); } while (0) + +#endif /* __ASM_BARRIER_H */ diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h new file mode 100644 index 000000000000..a3afecdae145 --- /dev/null +++ b/arch/s390/include/asm/cpu_mf.h @@ -0,0 +1,97 @@ +/* + * CPU-measurement facilities + * + * Copyright IBM Corp. 2012 + * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> + * Jan Glauber <jang@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + */ +#ifndef _ASM_S390_CPU_MF_H +#define _ASM_S390_CPU_MF_H + +#include <asm/facility.h> + +#define CPU_MF_INT_SF_IAE (1 << 31) /* invalid entry address */ +#define CPU_MF_INT_SF_ISE (1 << 30) /* incorrect SDBT entry */ +#define CPU_MF_INT_SF_PRA (1 << 29) /* program request alert */ +#define CPU_MF_INT_SF_SACA (1 << 23) /* sampler auth. change alert */ +#define CPU_MF_INT_SF_LSDA (1 << 22) /* loss of sample data alert */ +#define CPU_MF_INT_CF_CACA (1 << 7) /* counter auth. change alert */ +#define CPU_MF_INT_CF_LCDA (1 << 6) /* loss of counter data alert */ + +#define CPU_MF_INT_CF_MASK (CPU_MF_INT_CF_CACA|CPU_MF_INT_CF_LCDA) +#define CPU_MF_INT_SF_MASK (CPU_MF_INT_SF_IAE|CPU_MF_INT_SF_ISE| \ + CPU_MF_INT_SF_PRA|CPU_MF_INT_SF_SACA| \ + CPU_MF_INT_SF_LSDA) + +/* CPU measurement facility support */ +static inline int cpum_cf_avail(void) +{ + return MACHINE_HAS_SPP && test_facility(67); +} + +static inline int cpum_sf_avail(void) +{ + return MACHINE_HAS_SPP && test_facility(68); +} + + +struct cpumf_ctr_info { + u16 cfvn; + u16 auth_ctl; + u16 enable_ctl; + u16 act_ctl; + u16 max_cpu; + u16 csvn; + u16 max_cg; + u16 reserved1; + u32 reserved2[12]; +} __packed; + +/* Query counter information */ +static inline int qctri(struct cpumf_ctr_info *info) +{ + int rc = -EINVAL; + + asm volatile ( + "0: .insn s,0xb28e0000,%1\n" + "1: lhi %0,0\n" + "2:\n" + EX_TABLE(1b, 2b) + : "+d" (rc), "=Q" (*info)); + return rc; +} + +/* Load CPU-counter-set controls */ +static inline int lcctl(u64 ctl) +{ + int cc; + + asm volatile ( + " .insn s,0xb2840000,%1\n" + " ipm %0\n" + " srl %0,28\n" + : "=d" (cc) : "m" (ctl) : "cc"); + return cc; +} + +/* Extract CPU counter */ +static inline int ecctr(u64 ctr, u64 *val) +{ + register u64 content asm("4") = 0; + int cc; + + asm volatile ( + " .insn rre,0xb2e40000,%0,%2\n" + " ipm %1\n" + " srl %1,28\n" + : "=d" (content), "=d" (cc) : "d" (ctr) : "cc"); + if (!cc) + *val = content; + return cc; +} + +#endif /* _ASM_S390_CPU_MF_H */ diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h index c23c3900c304..24ef186a1c4f 100644 --- a/arch/s390/include/asm/cputime.h +++ b/arch/s390/include/asm/cputime.h @@ -170,24 +170,17 @@ struct s390_idle_data { unsigned int sequence; unsigned long long idle_count; unsigned long long idle_enter; + unsigned long long idle_exit; unsigned long long idle_time; int nohz_delay; }; DECLARE_PER_CPU(struct s390_idle_data, s390_idle); -void vtime_start_cpu(__u64 int_clock, __u64 enter_timer); cputime64_t s390_get_idle_time(int cpu); #define arch_idle_time(cpu) s390_get_idle_time(cpu) -static inline void s390_idle_check(struct pt_regs *regs, __u64 int_clock, - __u64 enter_timer) -{ - if (regs->psw.mask & PSW_MASK_WAIT) - vtime_start_cpu(int_clock, enter_timer); -} - static inline int s390_nohz_delay(int cpu) { return __get_cpu_var(s390_idle).nohz_delay != 0; diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h new file mode 100644 index 000000000000..ecde9417d669 --- /dev/null +++ b/arch/s390/include/asm/ctl_reg.h @@ -0,0 +1,76 @@ +/* + * Copyright IBM Corp. 1999, 2009 + * + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#ifndef __ASM_CTL_REG_H +#define __ASM_CTL_REG_H + +#ifdef __s390x__ + +#define __ctl_load(array, low, high) ({ \ + typedef struct { char _[sizeof(array)]; } addrtype; \ + asm volatile( \ + " lctlg %1,%2,%0\n" \ + : : "Q" (*(addrtype *)(&array)), \ + "i" (low), "i" (high)); \ + }) + +#define __ctl_store(array, low, high) ({ \ + typedef struct { char _[sizeof(array)]; } addrtype; \ + asm volatile( \ + " stctg %1,%2,%0\n" \ + : "=Q" (*(addrtype *)(&array)) \ + : "i" (low), "i" (high)); \ + }) + +#else /* __s390x__ */ + +#define __ctl_load(array, low, high) ({ \ + typedef struct { char _[sizeof(array)]; } addrtype; \ + asm volatile( \ + " lctl %1,%2,%0\n" \ + : : "Q" (*(addrtype *)(&array)), \ + "i" (low), "i" (high)); \ +}) + +#define __ctl_store(array, low, high) ({ \ + typedef struct { char _[sizeof(array)]; } addrtype; \ + asm volatile( \ + " stctl %1,%2,%0\n" \ + : "=Q" (*(addrtype *)(&array)) \ + : "i" (low), "i" (high)); \ + }) + +#endif /* __s390x__ */ + +#define __ctl_set_bit(cr, bit) ({ \ + unsigned long __dummy; \ + __ctl_store(__dummy, cr, cr); \ + __dummy |= 1UL << (bit); \ + __ctl_load(__dummy, cr, cr); \ +}) + +#define __ctl_clear_bit(cr, bit) ({ \ + unsigned long __dummy; \ + __ctl_store(__dummy, cr, cr); \ + __dummy &= ~(1UL << (bit)); \ + __ctl_load(__dummy, cr, cr); \ +}) + +#ifdef CONFIG_SMP + +extern void smp_ctl_set_bit(int cr, int bit); +extern void smp_ctl_clear_bit(int cr, int bit); +#define ctl_set_bit(cr, bit) smp_ctl_set_bit(cr, bit) +#define ctl_clear_bit(cr, bit) smp_ctl_clear_bit(cr, bit) + +#else + +#define ctl_set_bit(cr, bit) __ctl_set_bit(cr, bit) +#define ctl_clear_bit(cr, bit) __ctl_clear_bit(cr, bit) + +#endif /* CONFIG_SMP */ + +#endif /* __ASM_CTL_REG_H */ diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h index 9d88db1f55d0..8a8245ed14d2 100644 --- a/arch/s390/include/asm/debug.h +++ b/arch/s390/include/asm/debug.h @@ -131,6 +131,7 @@ void debug_unregister(debug_info_t* id); void debug_set_level(debug_info_t* id, int new_level); +void debug_set_critical(void); void debug_stop_all(void); static inline debug_entry_t* diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index 547f1a6a35d4..c4ee39f7a4d6 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -129,7 +129,6 @@ typedef s390_fp_regs compat_elf_fpregset_t; typedef s390_compat_regs compat_elf_gregset_t; #include <linux/sched.h> /* for task_struct */ -#include <asm/system.h> /* for save_access_regs */ #include <asm/mmu_context.h> #include <asm/vdso.h> diff --git a/arch/s390/include/asm/exec.h b/arch/s390/include/asm/exec.h new file mode 100644 index 000000000000..c4a93d6327fa --- /dev/null +++ b/arch/s390/include/asm/exec.h @@ -0,0 +1,12 @@ +/* + * Copyright IBM Corp. 1999, 2009 + * + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#ifndef __ASM_EXEC_H +#define __ASM_EXEC_H + +extern unsigned long arch_align_stack(unsigned long sp); + +#endif /* __ASM_EXEC_H */ diff --git a/arch/s390/include/asm/facility.h b/arch/s390/include/asm/facility.h new file mode 100644 index 000000000000..1e5b27edc0c9 --- /dev/null +++ b/arch/s390/include/asm/facility.h @@ -0,0 +1,63 @@ +/* + * Copyright IBM Corp. 1999, 2009 + * + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#ifndef __ASM_FACILITY_H +#define __ASM_FACILITY_H + +#include <linux/string.h> +#include <linux/preempt.h> +#include <asm/lowcore.h> + +#define MAX_FACILITY_BIT (256*8) /* stfle_fac_list has 256 bytes */ + +/* + * The test_facility function uses the bit odering where the MSB is bit 0. + * That makes it easier to query facility bits with the bit number as + * documented in the Principles of Operation. + */ +static inline int test_facility(unsigned long nr) +{ + unsigned char *ptr; + + if (nr >= MAX_FACILITY_BIT) + return 0; + ptr = (unsigned char *) &S390_lowcore.stfle_fac_list + (nr >> 3); + return (*ptr & (0x80 >> (nr & 7))) != 0; +} + +/** + * stfle - Store facility list extended + * @stfle_fac_list: array where facility list can be stored + * @size: size of passed in array in double words + */ +static inline void stfle(u64 *stfle_fac_list, int size) +{ + unsigned long nr; + + preempt_disable(); + S390_lowcore.stfl_fac_list = 0; + asm volatile( + " .insn s,0xb2b10000,0(0)\n" /* stfl */ + "0:\n" + EX_TABLE(0b, 0b) + : "=m" (S390_lowcore.stfl_fac_list)); + nr = 4; /* bytes stored by stfl */ + memcpy(stfle_fac_list, &S390_lowcore.stfl_fac_list, 4); + if (S390_lowcore.stfl_fac_list & 0x01000000) { + /* More facility bits available with stfle */ + register unsigned long reg0 asm("0") = size - 1; + + asm volatile(".insn s,0xb2b00000,0(%1)" /* stfle */ + : "+d" (reg0) + : "a" (stfle_fac_list) + : "memory", "cc"); + nr = (reg0 + 1) * 8; /* # bytes stored by stfle */ + } + memset((char *) stfle_fac_list + nr, 0, size * 8 - nr); + preempt_enable(); +} + +#endif /* __ASM_FACILITY_H */ diff --git a/arch/s390/include/asm/hardirq.h b/arch/s390/include/asm/hardirq.h index e4155d3eb2cb..510ba9ef4248 100644 --- a/arch/s390/include/asm/hardirq.h +++ b/arch/s390/include/asm/hardirq.h @@ -18,6 +18,7 @@ #define __ARCH_IRQ_STAT #define __ARCH_HAS_DO_SOFTIRQ +#define __ARCH_IRQ_EXIT_IRQS_DISABLED #define HARDIRQ_BITS 8 diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h index 6940abfbe1d9..2bd6cb897b90 100644 --- a/arch/s390/include/asm/ipl.h +++ b/arch/s390/include/asm/ipl.h @@ -169,5 +169,6 @@ enum diag308_rc { extern int diag308(unsigned long subcode, void *addr); extern void diag308_reset(void); extern void store_status(void); +extern void lgr_info_log(void); #endif /* _ASM_S390_IPL_H */ diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h index ba6d85f88d50..5289cacd4861 100644 --- a/arch/s390/include/asm/irq.h +++ b/arch/s390/include/asm/irq.h @@ -34,11 +34,18 @@ enum interruption_class { NR_IRQS, }; -typedef void (*ext_int_handler_t)(unsigned int, unsigned int, unsigned long); +struct ext_code { + unsigned short subcode; + unsigned short code; +}; + +typedef void (*ext_int_handler_t)(struct ext_code, unsigned int, unsigned long); int register_external_interrupt(u16 code, ext_int_handler_t handler); int unregister_external_interrupt(u16 code, ext_int_handler_t handler); void service_subclass_irq_register(void); void service_subclass_irq_unregister(void); +void measurement_alert_subclass_register(void); +void measurement_alert_subclass_unregister(void); #endif /* _ASM_IRQ_H */ diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h index 95a6cf2b5b67..6c32190dc73e 100644 --- a/arch/s390/include/asm/jump_label.h +++ b/arch/s390/include/asm/jump_label.h @@ -13,7 +13,7 @@ #define ASM_ALIGN ".balign 4" #endif -static __always_inline bool arch_static_branch(struct jump_label_key *key) +static __always_inline bool arch_static_branch(struct static_key *key) { asm goto("0: brcl 0,0\n" ".pushsection __jump_table, \"aw\"\n" diff --git a/arch/s390/include/asm/kvm.h b/arch/s390/include/asm/kvm.h index 82b32a100c7d..96076676e224 100644 --- a/arch/s390/include/asm/kvm.h +++ b/arch/s390/include/asm/kvm.h @@ -41,4 +41,15 @@ struct kvm_debug_exit_arch { struct kvm_guest_debug_arch { }; +#define KVM_SYNC_PREFIX (1UL << 0) +#define KVM_SYNC_GPRS (1UL << 1) +#define KVM_SYNC_ACRS (1UL << 2) +#define KVM_SYNC_CRS (1UL << 3) +/* definition of registers in kvm_run */ +struct kvm_sync_regs { + __u64 prefix; /* prefix register */ + __u64 gprs[16]; /* general purpose registers */ + __u32 acrs[16]; /* access registers */ + __u64 crs[16]; /* control registers */ +}; #endif diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index b0c235cb6ad5..7343872890a2 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -220,18 +220,17 @@ struct kvm_s390_float_interrupt { struct list_head list; atomic_t active; int next_rr_cpu; - unsigned long idle_mask [(64 + sizeof(long) - 1) / sizeof(long)]; - struct kvm_s390_local_interrupt *local_int[64]; + unsigned long idle_mask[(KVM_MAX_VCPUS + sizeof(long) - 1) + / sizeof(long)]; + struct kvm_s390_local_interrupt *local_int[KVM_MAX_VCPUS]; }; struct kvm_vcpu_arch { struct kvm_s390_sie_block *sie_block; - unsigned long guest_gprs[16]; s390_fp_regs host_fpregs; unsigned int host_acrs[NUM_ACRS]; s390_fp_regs guest_fpregs; - unsigned int guest_acrs[NUM_ACRS]; struct kvm_s390_local_interrupt local_int; struct hrtimer ckc_timer; struct tasklet_struct tasklet; @@ -246,6 +245,9 @@ struct kvm_vm_stat { u32 remote_tlb_flush; }; +struct kvm_arch_memory_slot { +}; + struct kvm_arch{ struct sca_block *sca; debug_info_t *dbf; @@ -253,5 +255,5 @@ struct kvm_arch{ struct gmap *gmap; }; -extern int sie64a(struct kvm_s390_sie_block *, unsigned long *); +extern int sie64a(struct kvm_s390_sie_block *, u64 *); #endif diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 707f2306725b..47853debb3b9 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -1,5 +1,5 @@ /* - * Copyright IBM Corp. 1999,2010 + * Copyright IBM Corp. 1999,2012 * Author(s): Hartmut Penner <hp@de.ibm.com>, * Martin Schwidefsky <schwidefsky@de.ibm.com>, * Denis Joseph Barrow, @@ -12,14 +12,6 @@ #include <asm/ptrace.h> #include <asm/cpu.h> -void restart_int_handler(void); -void ext_int_handler(void); -void system_call(void); -void pgm_check_handler(void); -void mcck_int_handler(void); -void io_int_handler(void); -void psw_restart_int_handler(void); - #ifdef CONFIG_32BIT #define LC_ORDER 0 @@ -56,7 +48,7 @@ struct _lowcore { psw_t mcck_new_psw; /* 0x0070 */ psw_t io_new_psw; /* 0x0078 */ __u32 ext_params; /* 0x0080 */ - __u16 cpu_addr; /* 0x0084 */ + __u16 ext_cpu_addr; /* 0x0084 */ __u16 ext_int_code; /* 0x0086 */ __u16 svc_ilc; /* 0x0088 */ __u16 svc_code; /* 0x008a */ @@ -117,32 +109,37 @@ struct _lowcore { __u64 steal_timer; /* 0x0288 */ __u64 last_update_timer; /* 0x0290 */ __u64 last_update_clock; /* 0x0298 */ + __u64 int_clock; /* 0x02a0 */ + __u64 mcck_clock; /* 0x02a8 */ + __u64 clock_comparator; /* 0x02b0 */ /* Current process. */ - __u32 current_task; /* 0x02a0 */ - __u32 thread_info; /* 0x02a4 */ - __u32 kernel_stack; /* 0x02a8 */ + __u32 current_task; /* 0x02b8 */ + __u32 thread_info; /* 0x02bc */ + __u32 kernel_stack; /* 0x02c0 */ + + /* Interrupt, panic and restart stack. */ + __u32 async_stack; /* 0x02c4 */ + __u32 panic_stack; /* 0x02c8 */ + __u32 restart_stack; /* 0x02cc */ - /* Interrupt and panic stack. */ - __u32 async_stack; /* 0x02ac */ - __u32 panic_stack; /* 0x02b0 */ + /* Restart function and parameter. */ + __u32 restart_fn; /* 0x02d0 */ + __u32 restart_data; /* 0x02d4 */ + __u32 restart_source; /* 0x02d8 */ /* Address space pointer. */ - __u32 kernel_asce; /* 0x02b4 */ - __u32 user_asce; /* 0x02b8 */ - __u32 current_pid; /* 0x02bc */ + __u32 kernel_asce; /* 0x02dc */ + __u32 user_asce; /* 0x02e0 */ + __u32 current_pid; /* 0x02e4 */ /* SMP info area */ - __u32 cpu_nr; /* 0x02c0 */ - __u32 softirq_pending; /* 0x02c4 */ - __u32 percpu_offset; /* 0x02c8 */ - __u32 ext_call_fast; /* 0x02cc */ - __u64 int_clock; /* 0x02d0 */ - __u64 mcck_clock; /* 0x02d8 */ - __u64 clock_comparator; /* 0x02e0 */ - __u32 machine_flags; /* 0x02e8 */ - __u32 ftrace_func; /* 0x02ec */ - __u8 pad_0x02f8[0x0300-0x02f0]; /* 0x02f0 */ + __u32 cpu_nr; /* 0x02e8 */ + __u32 softirq_pending; /* 0x02ec */ + __u32 percpu_offset; /* 0x02f0 */ + __u32 machine_flags; /* 0x02f4 */ + __u32 ftrace_func; /* 0x02f8 */ + __u8 pad_0x02fc[0x0300-0x02fc]; /* 0x02fc */ /* Interrupt response block */ __u8 irb[64]; /* 0x0300 */ @@ -157,7 +154,9 @@ struct _lowcore { __u32 ipib; /* 0x0e00 */ __u32 ipib_checksum; /* 0x0e04 */ __u32 vmcore_info; /* 0x0e08 */ - __u8 pad_0x0e0c[0x0f00-0x0e0c]; /* 0x0e0c */ + __u8 pad_0x0e0c[0x0e18-0x0e0c]; /* 0x0e0c */ + __u32 os_info; /* 0x0e18 */ + __u8 pad_0x0e1c[0x0f00-0x0e1c]; /* 0x0e1c */ /* Extended facility list */ __u64 stfle_fac_list[32]; /* 0x0f00 */ @@ -189,7 +188,7 @@ struct _lowcore { __u32 ipl_parmblock_ptr; /* 0x0014 */ __u8 pad_0x0018[0x0080-0x0018]; /* 0x0018 */ __u32 ext_params; /* 0x0080 */ - __u16 cpu_addr; /* 0x0084 */ + __u16 ext_cpu_addr; /* 0x0084 */ __u16 ext_int_code; /* 0x0086 */ __u16 svc_ilc; /* 0x0088 */ __u16 svc_code; /* 0x008a */ @@ -254,34 +253,39 @@ struct _lowcore { __u64 steal_timer; /* 0x02e0 */ __u64 last_update_timer; /* 0x02e8 */ __u64 last_update_clock; /* 0x02f0 */ + __u64 int_clock; /* 0x02f8 */ + __u64 mcck_clock; /* 0x0300 */ + __u64 clock_comparator; /* 0x0308 */ /* Current process. */ - __u64 current_task; /* 0x02f8 */ - __u64 thread_info; /* 0x0300 */ - __u64 kernel_stack; /* 0x0308 */ + __u64 current_task; /* 0x0310 */ + __u64 thread_info; /* 0x0318 */ + __u64 kernel_stack; /* 0x0320 */ + + /* Interrupt, panic and restart stack. */ + __u64 async_stack; /* 0x0328 */ + __u64 panic_stack; /* 0x0330 */ + __u64 restart_stack; /* 0x0338 */ - /* Interrupt and panic stack. */ - __u64 async_stack; /* 0x0310 */ - __u64 panic_stack; /* 0x0318 */ + /* Restart function and parameter. */ + __u64 restart_fn; /* 0x0340 */ + __u64 restart_data; /* 0x0348 */ + __u64 restart_source; /* 0x0350 */ /* Address space pointer. */ - __u64 kernel_asce; /* 0x0320 */ - __u64 user_asce; /* 0x0328 */ - __u64 current_pid; /* 0x0330 */ + __u64 kernel_asce; /* 0x0358 */ + __u64 user_asce; /* 0x0360 */ + __u64 current_pid; /* 0x0368 */ /* SMP info area */ - __u32 cpu_nr; /* 0x0338 */ - __u32 softirq_pending; /* 0x033c */ - __u64 percpu_offset; /* 0x0340 */ - __u64 ext_call_fast; /* 0x0348 */ - __u64 int_clock; /* 0x0350 */ - __u64 mcck_clock; /* 0x0358 */ - __u64 clock_comparator; /* 0x0360 */ - __u64 vdso_per_cpu_data; /* 0x0368 */ - __u64 machine_flags; /* 0x0370 */ - __u64 ftrace_func; /* 0x0378 */ - __u64 gmap; /* 0x0380 */ - __u8 pad_0x0388[0x0400-0x0388]; /* 0x0388 */ + __u32 cpu_nr; /* 0x0370 */ + __u32 softirq_pending; /* 0x0374 */ + __u64 percpu_offset; /* 0x0378 */ + __u64 vdso_per_cpu_data; /* 0x0380 */ + __u64 machine_flags; /* 0x0388 */ + __u64 ftrace_func; /* 0x0390 */ + __u64 gmap; /* 0x0398 */ + __u8 pad_0x03a0[0x0400-0x03a0]; /* 0x03a0 */ /* Interrupt response block. */ __u8 irb[64]; /* 0x0400 */ @@ -298,8 +302,15 @@ struct _lowcore { */ __u64 ipib; /* 0x0e00 */ __u32 ipib_checksum; /* 0x0e08 */ - __u64 vmcore_info; /* 0x0e0c */ - __u8 pad_0x0e14[0x0f00-0x0e14]; /* 0x0e14 */ + /* + * Because the vmcore_info pointer is not 8 byte aligned it never + * should not be accessed directly. For accessing the pointer, first + * copy it to a local pointer variable. + */ + __u8 vmcore_info[8]; /* 0x0e0c */ + __u8 pad_0x0e14[0x0e18-0x0e14]; /* 0x0e14 */ + __u64 os_info; /* 0x0e18 */ + __u8 pad_0x0e20[0x0f00-0x0e20]; /* 0x0e20 */ /* Extended facility list */ __u64 stfle_fac_list[32]; /* 0x0f00 */ diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h index 4506791adcd5..6340178748bf 100644 --- a/arch/s390/include/asm/mmu.h +++ b/arch/s390/include/asm/mmu.h @@ -1,6 +1,8 @@ #ifndef __MMU_H #define __MMU_H +#include <linux/errno.h> + typedef struct { atomic_t attach_count; unsigned int flush_mm; @@ -21,4 +23,18 @@ typedef struct { .context.pgtable_list = LIST_HEAD_INIT(name.context.pgtable_list), \ .context.gmap_list = LIST_HEAD_INIT(name.context.gmap_list), +static inline int tprot(unsigned long addr) +{ + int rc = -EFAULT; + + asm volatile( + " tprot 0(%1),0\n" + "0: ipm %0\n" + " srl %0,28\n" + "1:\n" + EX_TABLE(0b,1b) + : "+d" (rc) : "a" (addr) : "cc"); + return rc; +} + #endif diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index 5682f160ff82..5d09e405c54d 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -12,6 +12,7 @@ #include <asm/pgalloc.h> #include <asm/uaccess.h> #include <asm/tlbflush.h> +#include <asm/ctl_reg.h> #include <asm-generic/mm_hooks.h> static inline int init_new_context(struct task_struct *tsk, diff --git a/arch/s390/include/asm/os_info.h b/arch/s390/include/asm/os_info.h new file mode 100644 index 000000000000..d07518af09ea --- /dev/null +++ b/arch/s390/include/asm/os_info.h @@ -0,0 +1,50 @@ +/* + * OS info memory interface + * + * Copyright IBM Corp. 2012 + * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com> + */ +#ifndef _ASM_S390_OS_INFO_H +#define _ASM_S390_OS_INFO_H + +#define OS_INFO_VERSION_MAJOR 1 +#define OS_INFO_VERSION_MINOR 1 +#define OS_INFO_MAGIC 0x4f53494e464f535aULL /* OSINFOSZ */ + +#define OS_INFO_VMCOREINFO 0 +#define OS_INFO_REIPL_BLOCK 1 +#define OS_INFO_INIT_FN 2 + +struct os_info_entry { + u64 addr; + u64 size; + u32 csum; +} __packed; + +struct os_info { + u64 magic; + u32 csum; + u16 version_major; + u16 version_minor; + u64 crashkernel_addr; + u64 crashkernel_size; + struct os_info_entry entry[3]; + u8 reserved[4004]; +} __packed; + +void os_info_init(void); +void os_info_entry_add(int nr, void *ptr, u64 len); +void os_info_crashkernel_add(unsigned long base, unsigned long size); +u32 os_info_csum(struct os_info *os_info); + +#ifdef CONFIG_CRASH_DUMP +void *os_info_old_entry(int nr, unsigned long *size); +int copy_from_oldmem(void *dest, void *src, size_t count); +#else +static inline void *os_info_old_entry(int nr, unsigned long *size) +{ + return NULL; +} +#endif + +#endif /* _ASM_S390_OS_INFO_H */ diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h index 4eb444edbe49..7941968e12b4 100644 --- a/arch/s390/include/asm/perf_event.h +++ b/arch/s390/include/asm/perf_event.h @@ -1,8 +1,16 @@ /* * Performance event support - s390 specific definitions. * - * Copyright 2009 Martin Schwidefsky, IBM Corporation. + * Copyright IBM Corp. 2009, 2012 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + * Hendrik Brueckner <brueckner@linux.vnet.ibm.com> */ -/* Empty, just to avoid compiling error */ +#include <asm/cpu_mf.h> +/* CPU-measurement counter facility */ +#define PERF_CPUM_CF_MAX_CTR 160 + +/* Per-CPU flags for PMU states */ +#define PMU_F_RESERVED 0x1000 +#define PMU_F_ENABLED 0x2000 diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index d25843a6a915..d499b30ea487 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -14,6 +14,7 @@ #define __ASM_S390_PROCESSOR_H #include <linux/linkage.h> +#include <linux/irqflags.h> #include <asm/cpu.h> #include <asm/page.h> #include <asm/ptrace.h> @@ -156,6 +157,14 @@ unsigned long get_wchan(struct task_struct *p); #define KSTK_EIP(tsk) (task_pt_regs(tsk)->psw.addr) #define KSTK_ESP(tsk) (task_pt_regs(tsk)->gprs[15]) +static inline unsigned short stap(void) +{ + unsigned short cpu_address; + + asm volatile("stap %0" : "=m" (cpu_address)); + return cpu_address; +} + /* * Give up the time slice of the virtual PU. */ @@ -304,6 +313,21 @@ static inline void __noreturn disabled_wait(unsigned long code) } /* + * Use to set psw mask except for the first byte which + * won't be changed by this function. + */ +static inline void +__set_psw_mask(unsigned long mask) +{ + __load_psw_mask(mask | (arch_local_save_flags() & ~(-1UL >> 8))); +} + +#define local_mcck_enable() \ + __set_psw_mask(psw_kernel_bits | PSW_MASK_DAT | PSW_MASK_MCHECK) +#define local_mcck_disable() \ + __set_psw_mask(psw_kernel_bits | PSW_MASK_DAT) + +/* * Basic Machine Check/Program Check Handler. */ diff --git a/arch/s390/include/asm/qeth.h b/arch/s390/include/asm/qeth.h index 90efda0b137d..2c7c898c03e4 100644 --- a/arch/s390/include/asm/qeth.h +++ b/arch/s390/include/asm/qeth.h @@ -20,6 +20,7 @@ #define SIOC_QETH_ARP_FLUSH_CACHE (SIOCDEVPRIVATE + 4) #define SIOC_QETH_ADP_SET_SNMP_CONTROL (SIOCDEVPRIVATE + 5) #define SIOC_QETH_GET_CARD_TYPE (SIOCDEVPRIVATE + 6) +#define SIOC_QETH_QUERY_OAT (SIOCDEVPRIVATE + 7) struct qeth_arp_cache_entry { __u8 macaddr[6]; @@ -107,4 +108,10 @@ struct qeth_arp_query_user_data { char *entries; } __attribute__((packed)); +struct qeth_query_oat_data { + __u32 command; + __u32 buffer_len; + __u32 response_len; + __u64 ptr; +}; #endif /* __ASM_S390_QETH_IOCTL_H__ */ diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 097183c70407..b21e46e5d4b8 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -140,6 +140,20 @@ extern char vmpoff_cmd[]; #define NSS_NAME_SIZE 8 extern char kernel_nss_name[]; +#ifdef CONFIG_PFAULT +extern int pfault_init(void); +extern void pfault_fini(void); +#else /* CONFIG_PFAULT */ +#define pfault_init() ({-1;}) +#define pfault_fini() do { } while (0) +#endif /* CONFIG_PFAULT */ + +extern void cmma_init(void); + +extern void (*_machine_restart)(char *command); +extern void (*_machine_halt)(void); +extern void (*_machine_power_off)(void); + #else /* __ASSEMBLY__ */ #ifndef __s390x__ diff --git a/arch/s390/include/asm/sigp.h b/arch/s390/include/asm/sigp.h deleted file mode 100644 index 7040b8567cd0..000000000000 --- a/arch/s390/include/asm/sigp.h +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Routines and structures for signalling other processors. - * - * Copyright IBM Corp. 1999,2010 - * Author(s): Denis Joseph Barrow, - * Martin Schwidefsky <schwidefsky@de.ibm.com>, - * Heiko Carstens <heiko.carstens@de.ibm.com>, - */ - -#ifndef __ASM_SIGP_H -#define __ASM_SIGP_H - -#include <asm/system.h> - -/* Get real cpu address from logical cpu number. */ -extern unsigned short __cpu_logical_map[]; - -static inline int cpu_logical_map(int cpu) -{ -#ifdef CONFIG_SMP - return __cpu_logical_map[cpu]; -#else - return stap(); -#endif -} - -enum { - sigp_sense = 1, - sigp_external_call = 2, - sigp_emergency_signal = 3, - sigp_start = 4, - sigp_stop = 5, - sigp_restart = 6, - sigp_stop_and_store_status = 9, - sigp_initial_cpu_reset = 11, - sigp_cpu_reset = 12, - sigp_set_prefix = 13, - sigp_store_status_at_address = 14, - sigp_store_extended_status_at_address = 15, - sigp_set_architecture = 18, - sigp_conditional_emergency_signal = 19, - sigp_sense_running = 21, -}; - -enum { - sigp_order_code_accepted = 0, - sigp_status_stored = 1, - sigp_busy = 2, - sigp_not_operational = 3, -}; - -/* - * Definitions for external call. - */ -enum { - ec_schedule = 0, - ec_call_function, - ec_call_function_single, - ec_stop_cpu, -}; - -/* - * Signal processor. - */ -static inline int raw_sigp(u16 cpu, int order) -{ - register unsigned long reg1 asm ("1") = 0; - int ccode; - - asm volatile( - " sigp %1,%2,0(%3)\n" - " ipm %0\n" - " srl %0,28\n" - : "=d" (ccode) - : "d" (reg1), "d" (cpu), - "a" (order) : "cc" , "memory"); - return ccode; -} - -/* - * Signal processor with parameter. - */ -static inline int raw_sigp_p(u32 parameter, u16 cpu, int order) -{ - register unsigned int reg1 asm ("1") = parameter; - int ccode; - - asm volatile( - " sigp %1,%2,0(%3)\n" - " ipm %0\n" - " srl %0,28\n" - : "=d" (ccode) - : "d" (reg1), "d" (cpu), - "a" (order) : "cc" , "memory"); - return ccode; -} - -/* - * Signal processor with parameter and return status. - */ -static inline int raw_sigp_ps(u32 *status, u32 parm, u16 cpu, int order) -{ - register unsigned int reg1 asm ("1") = parm; - int ccode; - - asm volatile( - " sigp %1,%2,0(%3)\n" - " ipm %0\n" - " srl %0,28\n" - : "=d" (ccode), "+d" (reg1) - : "d" (cpu), "a" (order) - : "cc" , "memory"); - *status = reg1; - return ccode; -} - -static inline int sigp(int cpu, int order) -{ - return raw_sigp(cpu_logical_map(cpu), order); -} - -static inline int sigp_p(u32 parameter, int cpu, int order) -{ - return raw_sigp_p(parameter, cpu_logical_map(cpu), order); -} - -static inline int sigp_ps(u32 *status, u32 parm, int cpu, int order) -{ - return raw_sigp_ps(status, parm, cpu_logical_map(cpu), order); -} - -#endif /* __ASM_SIGP_H */ diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h index c32e9123b40c..c77c6de6f6c0 100644 --- a/arch/s390/include/asm/smp.h +++ b/arch/s390/include/asm/smp.h @@ -1,5 +1,5 @@ /* - * Copyright IBM Corp. 1999,2009 + * Copyright IBM Corp. 1999,2012 * Author(s): Denis Joseph Barrow, * Martin Schwidefsky <schwidefsky@de.ibm.com>, * Heiko Carstens <heiko.carstens@de.ibm.com>, @@ -9,72 +9,53 @@ #ifdef CONFIG_SMP -#include <asm/system.h> -#include <asm/sigp.h> - -extern void machine_restart_smp(char *); -extern void machine_halt_smp(void); -extern void machine_power_off_smp(void); +#include <asm/lowcore.h> #define raw_smp_processor_id() (S390_lowcore.cpu_nr) -extern int __cpu_disable (void); -extern void __cpu_die (unsigned int cpu); -extern int __cpu_up (unsigned int cpu); - extern struct mutex smp_cpu_state_mutex; +extern struct save_area *zfcpdump_save_areas[NR_CPUS + 1]; + +extern int __cpu_up(unsigned int cpu); extern void arch_send_call_function_single_ipi(int cpu); extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); -extern struct save_area *zfcpdump_save_areas[NR_CPUS + 1]; - -extern void smp_switch_to_ipl_cpu(void (*func)(void *), void *); -extern void smp_switch_to_cpu(void (*)(void *), void *, unsigned long sp, - int from, int to); -extern void smp_restart_with_online_cpu(void); -extern void smp_restart_cpu(void); +extern void smp_call_online_cpu(void (*func)(void *), void *); +extern void smp_call_ipl_cpu(void (*func)(void *), void *); -/* - * returns 1 if (virtual) cpu is scheduled - * returns 0 otherwise - */ -static inline int smp_vcpu_scheduled(int cpu) -{ - u32 status; - - switch (sigp_ps(&status, 0, cpu, sigp_sense_running)) { - case sigp_status_stored: - /* Check for running status */ - if (status & 0x400) - return 0; - break; - case sigp_not_operational: - return 0; - default: - break; - } - return 1; -} +extern int smp_find_processor_id(u16 address); +extern int smp_store_status(int cpu); +extern int smp_vcpu_scheduled(int cpu); +extern void smp_yield_cpu(int cpu); +extern void smp_yield(void); +extern void smp_stop_cpu(void); #else /* CONFIG_SMP */ -static inline void smp_switch_to_ipl_cpu(void (*func)(void *), void *data) +static inline void smp_call_ipl_cpu(void (*func)(void *), void *data) { func(data); } -static inline void smp_restart_with_online_cpu(void) +static inline void smp_call_online_cpu(void (*func)(void *), void *data) { + func(data); } -#define smp_vcpu_scheduled (1) +static inline int smp_find_processor_id(int address) { return 0; } +static inline int smp_vcpu_scheduled(int cpu) { return 1; } +static inline void smp_yield_cpu(int cpu) { } +static inline void smp_yield(void) { } +static inline void smp_stop_cpu(void) { } #endif /* CONFIG_SMP */ #ifdef CONFIG_HOTPLUG_CPU extern int smp_rescan_cpus(void); extern void __noreturn cpu_die(void); +extern void __cpu_die(unsigned int cpu); +extern int __cpu_disable(void); #else static inline int smp_rescan_cpus(void) { return 0; } static inline void cpu_die(void) { } diff --git a/arch/s390/include/asm/socket.h b/arch/s390/include/asm/socket.h index 67b5c1b14b51..c91b720965c0 100644 --- a/arch/s390/include/asm/socket.h +++ b/arch/s390/include/asm/socket.h @@ -72,5 +72,9 @@ #define SO_WIFI_STATUS 41 #define SCM_WIFI_STATUS SO_WIFI_STATUS +#define SO_PEEK_OFF 42 + +/* Instruct lower device to use last 4-bytes of skb data as FCS */ +#define SO_NOFCS 43 #endif /* _ASM_SOCKET_H */ diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h new file mode 100644 index 000000000000..f223068b7822 --- /dev/null +++ b/arch/s390/include/asm/switch_to.h @@ -0,0 +1,100 @@ +/* + * Copyright IBM Corp. 1999, 2009 + * + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#ifndef __ASM_SWITCH_TO_H +#define __ASM_SWITCH_TO_H + +#include <linux/thread_info.h> + +extern struct task_struct *__switch_to(void *, void *); +extern void update_per_regs(struct task_struct *task); + +static inline void save_fp_regs(s390_fp_regs *fpregs) +{ + asm volatile( + " std 0,%O0+8(%R0)\n" + " std 2,%O0+24(%R0)\n" + " std 4,%O0+40(%R0)\n" + " std 6,%O0+56(%R0)" + : "=Q" (*fpregs) : "Q" (*fpregs)); + if (!MACHINE_HAS_IEEE) + return; + asm volatile( + " stfpc %0\n" + " std 1,%O0+16(%R0)\n" + " std 3,%O0+32(%R0)\n" + " std 5,%O0+48(%R0)\n" + " std 7,%O0+64(%R0)\n" + " std 8,%O0+72(%R0)\n" + " std 9,%O0+80(%R0)\n" + " std 10,%O0+88(%R0)\n" + " std 11,%O0+96(%R0)\n" + " std 12,%O0+104(%R0)\n" + " std 13,%O0+112(%R0)\n" + " std 14,%O0+120(%R0)\n" + " std 15,%O0+128(%R0)\n" + : "=Q" (*fpregs) : "Q" (*fpregs)); +} + +static inline void restore_fp_regs(s390_fp_regs *fpregs) +{ + asm volatile( + " ld 0,%O0+8(%R0)\n" + " ld 2,%O0+24(%R0)\n" + " ld 4,%O0+40(%R0)\n" + " ld 6,%O0+56(%R0)" + : : "Q" (*fpregs)); + if (!MACHINE_HAS_IEEE) + return; + asm volatile( + " lfpc %0\n" + " ld 1,%O0+16(%R0)\n" + " ld 3,%O0+32(%R0)\n" + " ld 5,%O0+48(%R0)\n" + " ld 7,%O0+64(%R0)\n" + " ld 8,%O0+72(%R0)\n" + " ld 9,%O0+80(%R0)\n" + " ld 10,%O0+88(%R0)\n" + " ld 11,%O0+96(%R0)\n" + " ld 12,%O0+104(%R0)\n" + " ld 13,%O0+112(%R0)\n" + " ld 14,%O0+120(%R0)\n" + " ld 15,%O0+128(%R0)\n" + : : "Q" (*fpregs)); +} + +static inline void save_access_regs(unsigned int *acrs) +{ + asm volatile("stam 0,15,%0" : "=Q" (*acrs)); +} + +static inline void restore_access_regs(unsigned int *acrs) +{ + asm volatile("lam 0,15,%0" : : "Q" (*acrs)); +} + +#define switch_to(prev,next,last) do { \ + if (prev->mm) { \ + save_fp_regs(&prev->thread.fp_regs); \ + save_access_regs(&prev->thread.acrs[0]); \ + } \ + if (next->mm) { \ + restore_fp_regs(&next->thread.fp_regs); \ + restore_access_regs(&next->thread.acrs[0]); \ + update_per_regs(next); \ + } \ + prev = __switch_to(prev,next); \ +} while (0) + +extern void account_vtime(struct task_struct *, struct task_struct *); +extern void account_tick_vtime(struct task_struct *); + +#define finish_arch_switch(prev) do { \ + set_fs(current->thread.mm_segment); \ + account_vtime(prev, current); \ +} while (0) + +#endif /* __ASM_SWITCH_TO_H */ diff --git a/arch/s390/include/asm/system.h b/arch/s390/include/asm/system.h deleted file mode 100644 index d73cc6b60000..000000000000 --- a/arch/s390/include/asm/system.h +++ /dev/null @@ -1,281 +0,0 @@ -/* - * Copyright IBM Corp. 1999, 2009 - * - * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> - */ - -#ifndef __ASM_SYSTEM_H -#define __ASM_SYSTEM_H - -#include <linux/kernel.h> -#include <linux/errno.h> -#include <asm/types.h> -#include <asm/ptrace.h> -#include <asm/setup.h> -#include <asm/processor.h> -#include <asm/lowcore.h> -#include <asm/cmpxchg.h> - -#ifdef __KERNEL__ - -struct task_struct; - -extern struct task_struct *__switch_to(void *, void *); -extern void update_per_regs(struct task_struct *task); - -static inline void save_fp_regs(s390_fp_regs *fpregs) -{ - asm volatile( - " std 0,%O0+8(%R0)\n" - " std 2,%O0+24(%R0)\n" - " std 4,%O0+40(%R0)\n" - " std 6,%O0+56(%R0)" - : "=Q" (*fpregs) : "Q" (*fpregs)); - if (!MACHINE_HAS_IEEE) - return; - asm volatile( - " stfpc %0\n" - " std 1,%O0+16(%R0)\n" - " std 3,%O0+32(%R0)\n" - " std 5,%O0+48(%R0)\n" - " std 7,%O0+64(%R0)\n" - " std 8,%O0+72(%R0)\n" - " std 9,%O0+80(%R0)\n" - " std 10,%O0+88(%R0)\n" - " std 11,%O0+96(%R0)\n" - " std 12,%O0+104(%R0)\n" - " std 13,%O0+112(%R0)\n" - " std 14,%O0+120(%R0)\n" - " std 15,%O0+128(%R0)\n" - : "=Q" (*fpregs) : "Q" (*fpregs)); -} - -static inline void restore_fp_regs(s390_fp_regs *fpregs) -{ - asm volatile( - " ld 0,%O0+8(%R0)\n" - " ld 2,%O0+24(%R0)\n" - " ld 4,%O0+40(%R0)\n" - " ld 6,%O0+56(%R0)" - : : "Q" (*fpregs)); - if (!MACHINE_HAS_IEEE) - return; - asm volatile( - " lfpc %0\n" - " ld 1,%O0+16(%R0)\n" - " ld 3,%O0+32(%R0)\n" - " ld 5,%O0+48(%R0)\n" - " ld 7,%O0+64(%R0)\n" - " ld 8,%O0+72(%R0)\n" - " ld 9,%O0+80(%R0)\n" - " ld 10,%O0+88(%R0)\n" - " ld 11,%O0+96(%R0)\n" - " ld 12,%O0+104(%R0)\n" - " ld 13,%O0+112(%R0)\n" - " ld 14,%O0+120(%R0)\n" - " ld 15,%O0+128(%R0)\n" - : : "Q" (*fpregs)); -} - -static inline void save_access_regs(unsigned int *acrs) -{ - asm volatile("stam 0,15,%0" : "=Q" (*acrs)); -} - -static inline void restore_access_regs(unsigned int *acrs) -{ - asm volatile("lam 0,15,%0" : : "Q" (*acrs)); -} - -#define switch_to(prev,next,last) do { \ - if (prev->mm) { \ - save_fp_regs(&prev->thread.fp_regs); \ - save_access_regs(&prev->thread.acrs[0]); \ - } \ - if (next->mm) { \ - restore_fp_regs(&next->thread.fp_regs); \ - restore_access_regs(&next->thread.acrs[0]); \ - update_per_regs(next); \ - } \ - prev = __switch_to(prev,next); \ -} while (0) - -extern void account_vtime(struct task_struct *, struct task_struct *); -extern void account_tick_vtime(struct task_struct *); - -#ifdef CONFIG_PFAULT -extern int pfault_init(void); -extern void pfault_fini(void); -#else /* CONFIG_PFAULT */ -#define pfault_init() ({-1;}) -#define pfault_fini() do { } while (0) -#endif /* CONFIG_PFAULT */ - -extern void cmma_init(void); -extern int memcpy_real(void *, void *, size_t); -extern void copy_to_absolute_zero(void *dest, void *src, size_t count); -extern int copy_to_user_real(void __user *dest, void *src, size_t count); -extern int copy_from_user_real(void *dest, void __user *src, size_t count); - -#define finish_arch_switch(prev) do { \ - set_fs(current->thread.mm_segment); \ - account_vtime(prev, current); \ -} while (0) - -#define nop() asm volatile("nop") - -/* - * Force strict CPU ordering. - * And yes, this is required on UP too when we're talking - * to devices. - * - * This is very similar to the ppc eieio/sync instruction in that is - * does a checkpoint syncronisation & makes sure that - * all memory ops have completed wrt other CPU's ( see 7-15 POP DJB ). - */ - -#define eieio() asm volatile("bcr 15,0" : : : "memory") -#define SYNC_OTHER_CORES(x) eieio() -#define mb() eieio() -#define rmb() eieio() -#define wmb() eieio() -#define read_barrier_depends() do { } while(0) -#define smp_mb() mb() -#define smp_rmb() rmb() -#define smp_wmb() wmb() -#define smp_read_barrier_depends() read_barrier_depends() -#define smp_mb__before_clear_bit() smp_mb() -#define smp_mb__after_clear_bit() smp_mb() - - -#define set_mb(var, value) do { var = value; mb(); } while (0) - -#ifdef __s390x__ - -#define __ctl_load(array, low, high) ({ \ - typedef struct { char _[sizeof(array)]; } addrtype; \ - asm volatile( \ - " lctlg %1,%2,%0\n" \ - : : "Q" (*(addrtype *)(&array)), \ - "i" (low), "i" (high)); \ - }) - -#define __ctl_store(array, low, high) ({ \ - typedef struct { char _[sizeof(array)]; } addrtype; \ - asm volatile( \ - " stctg %1,%2,%0\n" \ - : "=Q" (*(addrtype *)(&array)) \ - : "i" (low), "i" (high)); \ - }) - -#else /* __s390x__ */ - -#define __ctl_load(array, low, high) ({ \ - typedef struct { char _[sizeof(array)]; } addrtype; \ - asm volatile( \ - " lctl %1,%2,%0\n" \ - : : "Q" (*(addrtype *)(&array)), \ - "i" (low), "i" (high)); \ -}) - -#define __ctl_store(array, low, high) ({ \ - typedef struct { char _[sizeof(array)]; } addrtype; \ - asm volatile( \ - " stctl %1,%2,%0\n" \ - : "=Q" (*(addrtype *)(&array)) \ - : "i" (low), "i" (high)); \ - }) - -#endif /* __s390x__ */ - -#define __ctl_set_bit(cr, bit) ({ \ - unsigned long __dummy; \ - __ctl_store(__dummy, cr, cr); \ - __dummy |= 1UL << (bit); \ - __ctl_load(__dummy, cr, cr); \ -}) - -#define __ctl_clear_bit(cr, bit) ({ \ - unsigned long __dummy; \ - __ctl_store(__dummy, cr, cr); \ - __dummy &= ~(1UL << (bit)); \ - __ctl_load(__dummy, cr, cr); \ -}) - -/* - * Use to set psw mask except for the first byte which - * won't be changed by this function. - */ -static inline void -__set_psw_mask(unsigned long mask) -{ - __load_psw_mask(mask | (arch_local_save_flags() & ~(-1UL >> 8))); -} - -#define local_mcck_enable() \ - __set_psw_mask(psw_kernel_bits | PSW_MASK_DAT | PSW_MASK_MCHECK) -#define local_mcck_disable() \ - __set_psw_mask(psw_kernel_bits | PSW_MASK_DAT) - -#ifdef CONFIG_SMP - -extern void smp_ctl_set_bit(int cr, int bit); -extern void smp_ctl_clear_bit(int cr, int bit); -#define ctl_set_bit(cr, bit) smp_ctl_set_bit(cr, bit) -#define ctl_clear_bit(cr, bit) smp_ctl_clear_bit(cr, bit) - -#else - -#define ctl_set_bit(cr, bit) __ctl_set_bit(cr, bit) -#define ctl_clear_bit(cr, bit) __ctl_clear_bit(cr, bit) - -#endif /* CONFIG_SMP */ - -#define MAX_FACILITY_BIT (256*8) /* stfle_fac_list has 256 bytes */ - -/* - * The test_facility function uses the bit odering where the MSB is bit 0. - * That makes it easier to query facility bits with the bit number as - * documented in the Principles of Operation. - */ -static inline int test_facility(unsigned long nr) -{ - unsigned char *ptr; - - if (nr >= MAX_FACILITY_BIT) - return 0; - ptr = (unsigned char *) &S390_lowcore.stfle_fac_list + (nr >> 3); - return (*ptr & (0x80 >> (nr & 7))) != 0; -} - -static inline unsigned short stap(void) -{ - unsigned short cpu_address; - - asm volatile("stap %0" : "=m" (cpu_address)); - return cpu_address; -} - -extern void (*_machine_restart)(char *command); -extern void (*_machine_halt)(void); -extern void (*_machine_power_off)(void); - -extern unsigned long arch_align_stack(unsigned long sp); - -static inline int tprot(unsigned long addr) -{ - int rc = -EFAULT; - - asm volatile( - " tprot 0(%1),0\n" - "0: ipm %0\n" - " srl %0,28\n" - "1:\n" - EX_TABLE(0b,1b) - : "+d" (rc) : "a" (addr) : "cc"); - return rc; -} - -#endif /* __KERNEL__ */ - -#endif diff --git a/arch/s390/include/asm/timer.h b/arch/s390/include/asm/timer.h index 814243cafdfe..e63069ba39e3 100644 --- a/arch/s390/include/asm/timer.h +++ b/arch/s390/include/asm/timer.h @@ -33,8 +33,8 @@ struct vtimer_queue { spinlock_t lock; __u64 timer; /* last programmed timer */ __u64 elapsed; /* elapsed time of timer expire values */ - __u64 idle; /* temp var for idle */ - int do_spt; /* =1: reprogram cpu timer in idle */ + __u64 idle_enter; /* cpu timer on idle enter */ + __u64 idle_exit; /* cpu timer on idle exit */ }; extern void init_virt_timer(struct vtimer_list *timer); diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index 2b23885e81e9..8f2cada4f7c9 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -16,6 +16,7 @@ */ #include <linux/sched.h> #include <linux/errno.h> +#include <asm/ctl_reg.h> #define VERIFY_READ 0 #define VERIFY_WRITE 1 @@ -375,4 +376,9 @@ clear_user(void __user *to, unsigned long n) return n; } +extern int memcpy_real(void *, void *, size_t); +extern void copy_to_absolute_zero(void *dest, void *src, size_t count); +extern int copy_to_user_real(void __user *dest, void *src, size_t count); +extern int copy_from_user_real(void *dest, void __user *src, size_t count); + #endif /* __S390_UACCESS_H */ diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h index 533f35751aeb..c4a11cfad3c8 100644 --- a/arch/s390/include/asm/vdso.h +++ b/arch/s390/include/asm/vdso.h @@ -40,8 +40,8 @@ struct vdso_per_cpu_data { extern struct vdso_data *vdso_data; #ifdef CONFIG_64BIT -int vdso_alloc_per_cpu(int cpu, struct _lowcore *lowcore); -void vdso_free_per_cpu(int cpu, struct _lowcore *lowcore); +int vdso_alloc_per_cpu(struct _lowcore *lowcore); +void vdso_free_per_cpu(struct _lowcore *lowcore); #endif #endif /* __ASSEMBLY__ */ diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 7d9ec924e7e7..884b18afc864 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -23,7 +23,7 @@ CFLAGS_sysinfo.o += -Iinclude/math-emu -Iarch/s390/math-emu -w obj-y := bitmap.o traps.o time.o process.o base.o early.o setup.o vtime.o \ processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o \ debug.o irq.o ipl.o dis.o diag.o mem_detect.o sclp.o vdso.o \ - sysinfo.o jump_label.o + sysinfo.o jump_label.o lgr.o os_info.o obj-y += $(if $(CONFIG_64BIT),entry64.o,entry.o) obj-y += $(if $(CONFIG_64BIT),reipl64.o,reipl.o) @@ -34,8 +34,6 @@ extra-y += $(if $(CONFIG_64BIT),head64.o,head31.o) obj-$(CONFIG_MODULES) += s390_ksyms.o module.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_SCHED_BOOK) += topology.o -obj-$(CONFIG_SMP) += $(if $(CONFIG_64BIT),switch_cpu64.o, \ - switch_cpu.o) obj-$(CONFIG_HIBERNATION) += suspend.o swsusp_asm64.o obj-$(CONFIG_AUDIT) += audit.o compat-obj-$(CONFIG_AUDIT) += compat_audit.o @@ -50,6 +48,7 @@ obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o +obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf.o # Kexec part S390_KEXEC_OBJS := machine_kexec.o crash.o diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 6e6a72e66d60..83e6edf5cf17 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -8,8 +8,9 @@ #include <linux/kbuild.h> #include <linux/sched.h> +#include <asm/cputime.h> +#include <asm/timer.h> #include <asm/vdso.h> -#include <asm/sigp.h> #include <asm/pgtable.h> /* @@ -70,15 +71,15 @@ int main(void) DEFINE(__CLOCK_MONOTONIC, CLOCK_MONOTONIC); DEFINE(__CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC); BLANK(); - /* constants for SIGP */ - DEFINE(__SIGP_STOP, sigp_stop); - DEFINE(__SIGP_RESTART, sigp_restart); - DEFINE(__SIGP_SENSE, sigp_sense); - DEFINE(__SIGP_INITIAL_CPU_RESET, sigp_initial_cpu_reset); - BLANK(); + /* idle data offsets */ + DEFINE(__IDLE_ENTER, offsetof(struct s390_idle_data, idle_enter)); + DEFINE(__IDLE_EXIT, offsetof(struct s390_idle_data, idle_exit)); + /* vtimer queue offsets */ + DEFINE(__VQ_IDLE_ENTER, offsetof(struct vtimer_queue, idle_enter)); + DEFINE(__VQ_IDLE_EXIT, offsetof(struct vtimer_queue, idle_exit)); /* lowcore offsets */ DEFINE(__LC_EXT_PARAMS, offsetof(struct _lowcore, ext_params)); - DEFINE(__LC_CPU_ADDRESS, offsetof(struct _lowcore, cpu_addr)); + DEFINE(__LC_EXT_CPU_ADDR, offsetof(struct _lowcore, ext_cpu_addr)); DEFINE(__LC_EXT_INT_CODE, offsetof(struct _lowcore, ext_int_code)); DEFINE(__LC_SVC_ILC, offsetof(struct _lowcore, svc_ilc)); DEFINE(__LC_SVC_INT_CODE, offsetof(struct _lowcore, svc_code)); @@ -95,20 +96,19 @@ int main(void) DEFINE(__LC_IO_INT_WORD, offsetof(struct _lowcore, io_int_word)); DEFINE(__LC_STFL_FAC_LIST, offsetof(struct _lowcore, stfl_fac_list)); DEFINE(__LC_MCCK_CODE, offsetof(struct _lowcore, mcck_interruption_code)); - DEFINE(__LC_DUMP_REIPL, offsetof(struct _lowcore, ipib)); - BLANK(); - DEFINE(__LC_RST_NEW_PSW, offsetof(struct _lowcore, restart_psw)); DEFINE(__LC_RST_OLD_PSW, offsetof(struct _lowcore, restart_old_psw)); DEFINE(__LC_EXT_OLD_PSW, offsetof(struct _lowcore, external_old_psw)); DEFINE(__LC_SVC_OLD_PSW, offsetof(struct _lowcore, svc_old_psw)); DEFINE(__LC_PGM_OLD_PSW, offsetof(struct _lowcore, program_old_psw)); DEFINE(__LC_MCK_OLD_PSW, offsetof(struct _lowcore, mcck_old_psw)); DEFINE(__LC_IO_OLD_PSW, offsetof(struct _lowcore, io_old_psw)); + DEFINE(__LC_RST_NEW_PSW, offsetof(struct _lowcore, restart_psw)); DEFINE(__LC_EXT_NEW_PSW, offsetof(struct _lowcore, external_new_psw)); DEFINE(__LC_SVC_NEW_PSW, offsetof(struct _lowcore, svc_new_psw)); DEFINE(__LC_PGM_NEW_PSW, offsetof(struct _lowcore, program_new_psw)); DEFINE(__LC_MCK_NEW_PSW, offsetof(struct _lowcore, mcck_new_psw)); DEFINE(__LC_IO_NEW_PSW, offsetof(struct _lowcore, io_new_psw)); + BLANK(); DEFINE(__LC_SAVE_AREA_SYNC, offsetof(struct _lowcore, save_area_sync)); DEFINE(__LC_SAVE_AREA_ASYNC, offsetof(struct _lowcore, save_area_async)); DEFINE(__LC_SAVE_AREA_RESTART, offsetof(struct _lowcore, save_area_restart)); @@ -129,12 +129,16 @@ int main(void) DEFINE(__LC_KERNEL_STACK, offsetof(struct _lowcore, kernel_stack)); DEFINE(__LC_ASYNC_STACK, offsetof(struct _lowcore, async_stack)); DEFINE(__LC_PANIC_STACK, offsetof(struct _lowcore, panic_stack)); + DEFINE(__LC_RESTART_STACK, offsetof(struct _lowcore, restart_stack)); + DEFINE(__LC_RESTART_FN, offsetof(struct _lowcore, restart_fn)); DEFINE(__LC_USER_ASCE, offsetof(struct _lowcore, user_asce)); DEFINE(__LC_INT_CLOCK, offsetof(struct _lowcore, int_clock)); DEFINE(__LC_MCCK_CLOCK, offsetof(struct _lowcore, mcck_clock)); DEFINE(__LC_MACHINE_FLAGS, offsetof(struct _lowcore, machine_flags)); DEFINE(__LC_FTRACE_FUNC, offsetof(struct _lowcore, ftrace_func)); DEFINE(__LC_IRB, offsetof(struct _lowcore, irb)); + DEFINE(__LC_DUMP_REIPL, offsetof(struct _lowcore, ipib)); + BLANK(); DEFINE(__LC_CPU_TIMER_SAVE_AREA, offsetof(struct _lowcore, cpu_timer_save_area)); DEFINE(__LC_CLOCK_COMP_SAVE_AREA, offsetof(struct _lowcore, clock_comp_save_area)); DEFINE(__LC_PSW_SAVE_AREA, offsetof(struct _lowcore, psw_save_area)); diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index 6fe78c2f95d9..28040fd5e8a2 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -27,6 +27,7 @@ #include <asm/ucontext.h> #include <asm/uaccess.h> #include <asm/lowcore.h> +#include <asm/switch_to.h> #include "compat_linux.h" #include "compat_ptrace.h" #include "entry.h" @@ -581,7 +582,6 @@ give_sigsegv: int handle_signal32(unsigned long sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *oldset, struct pt_regs *regs) { - sigset_t blocked; int ret; /* Set up the stack frame */ @@ -591,10 +591,7 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka, ret = setup_frame32(sig, ka, oldset, regs); if (ret) return ret; - sigorsets(&blocked, ¤t->blocked, &ka->sa.sa_mask); - if (!(ka->sa.sa_flags & SA_NODEFER)) - sigaddset(&blocked, sig); - set_current_blocked(&blocked); + block_sigmask(ka, sig); return 0; } diff --git a/arch/s390/kernel/cpcmd.c b/arch/s390/kernel/cpcmd.c index 3e8b8816f309..e3dd886e1b32 100644 --- a/arch/s390/kernel/cpcmd.c +++ b/arch/s390/kernel/cpcmd.c @@ -18,7 +18,6 @@ #include <linux/string.h> #include <asm/ebcdic.h> #include <asm/cpcmd.h> -#include <asm/system.h> #include <asm/io.h> static DEFINE_SPINLOCK(cpcmd_lock); diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index c383ce440d99..cc1172b26873 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -14,6 +14,7 @@ #include <linux/bootmem.h> #include <linux/elf.h> #include <asm/ipl.h> +#include <asm/os_info.h> #define PTR_ADD(x, y) (((char *) (x)) + ((unsigned long) (y))) #define PTR_SUB(x, y) (((char *) (x)) - ((unsigned long) (y))) @@ -51,7 +52,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, /* * Copy memory from old kernel */ -static int copy_from_oldmem(void *dest, void *src, size_t count) +int copy_from_oldmem(void *dest, void *src, size_t count) { unsigned long copied = 0; int rc; @@ -224,28 +225,44 @@ static void *nt_prpsinfo(void *ptr) } /* - * Initialize vmcoreinfo note (new kernel) + * Get vmcoreinfo using lowcore->vmcore_info (new kernel) */ -static void *nt_vmcoreinfo(void *ptr) +static void *get_vmcoreinfo_old(unsigned long *size) { char nt_name[11], *vmcoreinfo; Elf64_Nhdr note; void *addr; if (copy_from_oldmem(&addr, &S390_lowcore.vmcore_info, sizeof(addr))) - return ptr; + return NULL; memset(nt_name, 0, sizeof(nt_name)); if (copy_from_oldmem(¬e, addr, sizeof(note))) - return ptr; + return NULL; if (copy_from_oldmem(nt_name, addr + sizeof(note), sizeof(nt_name) - 1)) - return ptr; + return NULL; if (strcmp(nt_name, "VMCOREINFO") != 0) - return ptr; - vmcoreinfo = kzalloc_panic(note.n_descsz + 1); + return NULL; + vmcoreinfo = kzalloc_panic(note.n_descsz); if (copy_from_oldmem(vmcoreinfo, addr + 24, note.n_descsz)) + return NULL; + *size = note.n_descsz; + return vmcoreinfo; +} + +/* + * Initialize vmcoreinfo note (new kernel) + */ +static void *nt_vmcoreinfo(void *ptr) +{ + unsigned long size; + void *vmcoreinfo; + + vmcoreinfo = os_info_old_entry(OS_INFO_VMCOREINFO, &size); + if (!vmcoreinfo) + vmcoreinfo = get_vmcoreinfo_old(&size); + if (!vmcoreinfo) return ptr; - vmcoreinfo[note.n_descsz + 1] = 0; - return nt_init(ptr, 0, vmcoreinfo, note.n_descsz, "VMCOREINFO"); + return nt_init(ptr, 0, vmcoreinfo, size, "VMCOREINFO"); } /* diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index 6848828b962e..19e5e9eba546 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -2,8 +2,8 @@ * arch/s390/kernel/debug.c * S/390 debug facility * - * Copyright (C) 1999, 2000 IBM Deutschland Entwicklung GmbH, - * IBM Corporation + * Copyright IBM Corp. 1999, 2012 + * * Author(s): Michael Holzheu (holzheu@de.ibm.com), * Holger Smolinski (Holger.Smolinski@de.ibm.com) * @@ -167,6 +167,7 @@ static debug_info_t *debug_area_last = NULL; static DEFINE_MUTEX(debug_mutex); static int initialized; +static int debug_critical; static const struct file_operations debug_file_ops = { .owner = THIS_MODULE, @@ -932,6 +933,11 @@ debug_stop_all(void) } +void debug_set_critical(void) +{ + debug_critical = 1; +} + /* * debug_event_common: * - write debug entry with given size @@ -945,7 +951,11 @@ debug_event_common(debug_info_t * id, int level, const void *buf, int len) if (!debug_active || !id->areas) return NULL; - spin_lock_irqsave(&id->lock, flags); + if (debug_critical) { + if (!spin_trylock_irqsave(&id->lock, flags)) + return NULL; + } else + spin_lock_irqsave(&id->lock, flags); active = get_active_entry(id); memset(DEBUG_DATA(active), 0, id->buf_size); memcpy(DEBUG_DATA(active), buf, min(len, id->buf_size)); @@ -968,7 +978,11 @@ debug_entry_t if (!debug_active || !id->areas) return NULL; - spin_lock_irqsave(&id->lock, flags); + if (debug_critical) { + if (!spin_trylock_irqsave(&id->lock, flags)) + return NULL; + } else + spin_lock_irqsave(&id->lock, flags); active = get_active_entry(id); memset(DEBUG_DATA(active), 0, id->buf_size); memcpy(DEBUG_DATA(active), buf, min(len, id->buf_size)); @@ -1013,7 +1027,11 @@ debug_sprintf_event(debug_info_t* id, int level,char *string,...) return NULL; numargs=debug_count_numargs(string); - spin_lock_irqsave(&id->lock, flags); + if (debug_critical) { + if (!spin_trylock_irqsave(&id->lock, flags)) + return NULL; + } else + spin_lock_irqsave(&id->lock, flags); active = get_active_entry(id); curr_event=(debug_sprintf_entry_t *) DEBUG_DATA(active); va_start(ap,string); @@ -1047,7 +1065,11 @@ debug_sprintf_exception(debug_info_t* id, int level,char *string,...) numargs=debug_count_numargs(string); - spin_lock_irqsave(&id->lock, flags); + if (debug_critical) { + if (!spin_trylock_irqsave(&id->lock, flags)) + return NULL; + } else + spin_lock_irqsave(&id->lock, flags); active = get_active_entry(id); curr_event=(debug_sprintf_entry_t *)DEBUG_DATA(active); va_start(ap,string); @@ -1428,10 +1450,10 @@ debug_hex_ascii_format_fn(debug_info_t * id, struct debug_view *view, rc += sprintf(out_buf + rc, "| "); for (i = 0; i < id->buf_size; i++) { unsigned char c = in_buf[i]; - if (!isprint(c)) - rc += sprintf(out_buf + rc, "."); - else + if (isascii(c) && isprint(c)) rc += sprintf(out_buf + rc, "%c", c); + else + rc += sprintf(out_buf + rc, "."); } rc += sprintf(out_buf + rc, "\n"); return rc; diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index e2f847599c8e..3221c6fca8bb 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -24,7 +24,6 @@ #include <linux/kprobes.h> #include <linux/kdebug.h> -#include <asm/system.h> #include <asm/uaccess.h> #include <asm/io.h> #include <linux/atomic.h> diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 52098d6dfaa7..9475e682727f 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -29,6 +29,7 @@ #include <asm/sysinfo.h> #include <asm/cpcmd.h> #include <asm/sclp.h> +#include <asm/facility.h> #include "entry.h" /* @@ -262,25 +263,8 @@ static noinline __init void setup_lowcore_early(void) static noinline __init void setup_facility_list(void) { - unsigned long nr; - - S390_lowcore.stfl_fac_list = 0; - asm volatile( - " .insn s,0xb2b10000,0(0)\n" /* stfl */ - "0:\n" - EX_TABLE(0b,0b) : "=m" (S390_lowcore.stfl_fac_list)); - memcpy(&S390_lowcore.stfle_fac_list, &S390_lowcore.stfl_fac_list, 4); - nr = 4; /* # bytes stored by stfl */ - if (test_facility(7)) { - /* More facility bits available with stfle */ - register unsigned long reg0 asm("0") = MAX_FACILITY_BIT/64 - 1; - asm volatile(".insn s,0xb2b00000,%0" /* stfle */ - : "=m" (S390_lowcore.stfle_fac_list), "+d" (reg0) - : : "cc"); - nr = (reg0 + 1) * 8; /* # bytes stored by stfle */ - } - memset((char *) S390_lowcore.stfle_fac_list + nr, 0, - MAX_FACILITY_BIT/8 - nr); + stfle(S390_lowcore.stfle_fac_list, + ARRAY_SIZE(S390_lowcore.stfle_fac_list)); } static noinline __init void setup_hpage(void) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 3705700ed374..74ee563fe62b 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -2,7 +2,7 @@ * arch/s390/kernel/entry.S * S390 low-level entry points. * - * Copyright (C) IBM Corp. 1999,2006 + * Copyright (C) IBM Corp. 1999,2012 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), * Hartmut Penner (hp@de.ibm.com), * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), @@ -105,14 +105,14 @@ STACK_SIZE = 1 << STACK_SHIFT .macro ADD64 high,low,timer al \high,\timer - al \low,\timer+4 + al \low,4+\timer brc 12,.+8 ahi \high,1 .endm .macro SUB64 high,low,timer sl \high,\timer - sl \low,\timer+4 + sl \low,4+\timer brc 3,.+8 ahi \high,-1 .endm @@ -471,7 +471,6 @@ io_tif: jnz io_work # there is work to do (signals etc.) io_restore: mvc __LC_RETURN_PSW(8),__PT_PSW(%r11) - ni __LC_RETURN_PSW+1,0xfd # clean wait state bit stpt __LC_EXIT_TIMER lm %r0,%r15,__PT_R0(%r11) lpsw __LC_RETURN_PSW @@ -606,12 +605,32 @@ ext_skip: stm %r8,%r9,__PT_PSW(%r11) TRACE_IRQS_OFF lr %r2,%r11 # pass pointer to pt_regs - l %r3,__LC_CPU_ADDRESS # get cpu address + interruption code + l %r3,__LC_EXT_CPU_ADDR # get cpu address + interruption code l %r4,__LC_EXT_PARAMS # get external parameters l %r1,BASED(.Ldo_extint) basr %r14,%r1 # call do_extint j io_return +/* + * Load idle PSW. The second "half" of this function is in cleanup_idle. + */ +ENTRY(psw_idle) + st %r4,__SF_EMPTY(%r15) + basr %r1,0 + la %r1,psw_idle_lpsw+4-.(%r1) + st %r1,__SF_EMPTY+4(%r15) + oi __SF_EMPTY+4(%r15),0x80 + la %r1,.Lvtimer_max-psw_idle_lpsw-4(%r1) + stck __IDLE_ENTER(%r2) + ltr %r5,%r5 + stpt __VQ_IDLE_ENTER(%r3) + jz psw_idle_lpsw + spt 0(%r1) +psw_idle_lpsw: + lpsw __SF_EMPTY(%r15) + br %r14 +psw_idle_end: + __critical_end: /* @@ -673,7 +692,6 @@ mcck_skip: TRACE_IRQS_ON mcck_return: mvc __LC_RETURN_MCCK_PSW(8),__PT_PSW(%r11) # move return PSW - ni __LC_RETURN_MCCK_PSW+1,0xfd # clear wait state bit tm __LC_RETURN_MCCK_PSW+1,0x01 # returning to user ? jno 0f lm %r0,%r15,__PT_R0(%r11) @@ -691,77 +709,30 @@ mcck_panic: 0: ahi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) j mcck_skip -/* - * Restart interruption handler, kick starter for additional CPUs - */ -#ifdef CONFIG_SMP - __CPUINIT -ENTRY(restart_int_handler) - basr %r1,0 -restart_base: - spt restart_vtime-restart_base(%r1) - stck __LC_LAST_UPDATE_CLOCK - mvc __LC_LAST_UPDATE_TIMER(8),restart_vtime-restart_base(%r1) - mvc __LC_EXIT_TIMER(8),restart_vtime-restart_base(%r1) - l %r15,__LC_GPREGS_SAVE_AREA+60 # load ksp - lctl %c0,%c15,__LC_CREGS_SAVE_AREA # get new ctl regs - lam %a0,%a15,__LC_AREGS_SAVE_AREA - lm %r6,%r15,__SF_GPRS(%r15)# load registers from clone - l %r1,__LC_THREAD_INFO - mvc __LC_USER_TIMER(8),__TI_user_timer(%r1) - mvc __LC_SYSTEM_TIMER(8),__TI_system_timer(%r1) - xc __LC_STEAL_TIMER(8),__LC_STEAL_TIMER - ssm __LC_PGM_NEW_PSW # turn dat on, keep irqs off - basr %r14,0 - l %r14,restart_addr-.(%r14) - basr %r14,%r14 # call start_secondary -restart_addr: - .long start_secondary - .align 8 -restart_vtime: - .long 0x7fffffff,0xffffffff - .previous -#else -/* - * If we do not run with SMP enabled, let the new CPU crash ... - */ -ENTRY(restart_int_handler) - basr %r1,0 -restart_base: - lpsw restart_crash-restart_base(%r1) - .align 8 -restart_crash: - .long 0x000a0000,0x00000000 -restart_go: -#endif - # # PSW restart interrupt handler # -ENTRY(psw_restart_int_handler) +ENTRY(restart_int_handler) st %r15,__LC_SAVE_AREA_RESTART - basr %r15,0 -0: l %r15,.Lrestart_stack-0b(%r15) # load restart stack - l %r15,0(%r15) + l %r15,__LC_RESTART_STACK ahi %r15,-__PT_SIZE # create pt_regs on stack + xc 0(__PT_SIZE,%r15),0(%r15) stm %r0,%r14,__PT_R0(%r15) mvc __PT_R15(4,%r15),__LC_SAVE_AREA_RESTART mvc __PT_PSW(8,%r15),__LC_RST_OLD_PSW # store restart old psw - ahi %r15,-STACK_FRAME_OVERHEAD - xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) - basr %r14,0 -1: l %r14,.Ldo_restart-1b(%r14) - basr %r14,%r14 - basr %r14,0 # load disabled wait PSW if -2: lpsw restart_psw_crash-2b(%r14) # do_restart returns - .align 4 -.Ldo_restart: - .long do_restart -.Lrestart_stack: - .long restart_stack - .align 8 -restart_psw_crash: - .long 0x000a0000,0x00000000 + restart_psw_crash + ahi %r15,-STACK_FRAME_OVERHEAD # create stack frame on stack + xc 0(STACK_FRAME_OVERHEAD,%r15),0(%r15) + lm %r1,%r3,__LC_RESTART_FN # load fn, parm & source cpu + ltr %r3,%r3 # test source cpu address + jm 1f # negative -> skip source stop +0: sigp %r4,%r3,1 # sigp sense to source cpu + brc 10,0b # wait for status stored +1: basr %r14,%r1 # call function + stap __SF_EMPTY(%r15) # store cpu address + lh %r3,__SF_EMPTY(%r15) +2: sigp %r4,%r3,5 # sigp stop to current cpu + brc 2,2b +3: j 3b .section .kprobes.text, "ax" @@ -795,6 +766,8 @@ cleanup_table: .long io_tif + 0x80000000 .long io_restore + 0x80000000 .long io_done + 0x80000000 + .long psw_idle + 0x80000000 + .long psw_idle_end + 0x80000000 cleanup_critical: cl %r9,BASED(cleanup_table) # system_call @@ -813,6 +786,10 @@ cleanup_critical: jl cleanup_io_tif cl %r9,BASED(cleanup_table+28) # io_done jl cleanup_io_restore + cl %r9,BASED(cleanup_table+32) # psw_idle + jl 0f + cl %r9,BASED(cleanup_table+36) # psw_idle_end + jl cleanup_idle 0: br %r14 cleanup_system_call: @@ -896,7 +873,6 @@ cleanup_io_restore: jhe 0f l %r9,12(%r11) # get saved r11 pointer to pt_regs mvc __LC_RETURN_PSW(8),__PT_PSW(%r9) - ni __LC_RETURN_PSW+1,0xfd # clear wait state bit mvc 0(32,%r11),__PT_R8(%r9) lm %r0,%r7,__PT_R0(%r9) 0: lm %r8,%r9,__LC_RETURN_PSW @@ -904,11 +880,52 @@ cleanup_io_restore: cleanup_io_restore_insn: .long io_done - 4 + 0x80000000 +cleanup_idle: + # copy interrupt clock & cpu timer + mvc __IDLE_EXIT(8,%r2),__LC_INT_CLOCK + mvc __VQ_IDLE_EXIT(8,%r3),__LC_ASYNC_ENTER_TIMER + chi %r11,__LC_SAVE_AREA_ASYNC + je 0f + mvc __IDLE_EXIT(8,%r2),__LC_MCCK_CLOCK + mvc __VQ_IDLE_EXIT(8,%r3),__LC_MCCK_ENTER_TIMER +0: # check if stck has been executed + cl %r9,BASED(cleanup_idle_insn) + jhe 1f + mvc __IDLE_ENTER(8,%r2),__IDLE_EXIT(%r2) + mvc __VQ_IDLE_ENTER(8,%r3),__VQ_IDLE_EXIT(%r3) + j 2f +1: # check if the cpu timer has been reprogrammed + ltr %r5,%r5 + jz 2f + spt __VQ_IDLE_ENTER(%r3) +2: # account system time going idle + lm %r9,%r10,__LC_STEAL_TIMER + ADD64 %r9,%r10,__IDLE_ENTER(%r2) + SUB64 %r9,%r10,__LC_LAST_UPDATE_CLOCK + stm %r9,%r10,__LC_STEAL_TIMER + mvc __LC_LAST_UPDATE_CLOCK(8),__IDLE_EXIT(%r2) + lm %r9,%r10,__LC_SYSTEM_TIMER + ADD64 %r9,%r10,__LC_LAST_UPDATE_TIMER + SUB64 %r9,%r10,__VQ_IDLE_ENTER(%r3) + stm %r9,%r10,__LC_SYSTEM_TIMER + mvc __LC_LAST_UPDATE_TIMER(8),__VQ_IDLE_EXIT(%r3) + # prepare return psw + n %r8,BASED(cleanup_idle_wait) # clear wait state bit + l %r9,24(%r11) # return from psw_idle + br %r14 +cleanup_idle_insn: + .long psw_idle_lpsw + 0x80000000 +cleanup_idle_wait: + .long 0xfffdffff + /* * Integer constants */ .align 4 -.Lnr_syscalls: .long NR_syscalls +.Lnr_syscalls: + .long NR_syscalls +.Lvtimer_max: + .quad 0x7fffffffffffffff /* * Symbol constants diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index bf538aaf407d..6cdddac93a2e 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -4,11 +4,22 @@ #include <linux/types.h> #include <linux/signal.h> #include <asm/ptrace.h> - +#include <asm/cputime.h> +#include <asm/timer.h> extern void (*pgm_check_table[128])(struct pt_regs *); extern void *restart_stack; +void system_call(void); +void pgm_check_handler(void); +void ext_int_handler(void); +void io_int_handler(void); +void mcck_int_handler(void); +void restart_int_handler(void); +void restart_call_handler(void); +void psw_idle(struct s390_idle_data *, struct vtimer_queue *, + unsigned long, int); + asmlinkage long do_syscall_trace_enter(struct pt_regs *regs); asmlinkage void do_syscall_trace_exit(struct pt_regs *regs); @@ -24,9 +35,9 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *oldset, struct pt_regs *regs); void do_notify_resume(struct pt_regs *regs); -void do_extint(struct pt_regs *regs, unsigned int, unsigned int, unsigned long); +struct ext_code; +void do_extint(struct pt_regs *regs, struct ext_code, unsigned int, unsigned long); void do_restart(void); -int __cpuinit start_secondary(void *cpuvoid); void __init startup_init(void); void die(struct pt_regs *regs, const char *str); diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 412a7b8783d7..4e1c292fa7e3 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -2,7 +2,7 @@ * arch/s390/kernel/entry64.S * S390 low-level entry points. * - * Copyright (C) IBM Corp. 1999,2010 + * Copyright (C) IBM Corp. 1999,2012 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), * Hartmut Penner (hp@de.ibm.com), * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), @@ -489,7 +489,6 @@ io_restore: lg %r14,__LC_VDSO_PER_CPU lmg %r0,%r10,__PT_R0(%r11) mvc __LC_RETURN_PSW(16),__PT_PSW(%r11) - ni __LC_RETURN_PSW+1,0xfd # clear wait state bit stpt __LC_EXIT_TIMER mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER lmg %r11,%r15,__PT_R11(%r11) @@ -625,12 +624,30 @@ ext_skip: TRACE_IRQS_OFF lghi %r1,4096 lgr %r2,%r11 # pass pointer to pt_regs - llgf %r3,__LC_CPU_ADDRESS # get cpu address + interruption code + llgf %r3,__LC_EXT_CPU_ADDR # get cpu address + interruption code llgf %r4,__LC_EXT_PARAMS # get external parameter lg %r5,__LC_EXT_PARAMS2-4096(%r1) # get 64 bit external parameter brasl %r14,do_extint j io_return +/* + * Load idle PSW. The second "half" of this function is in cleanup_idle. + */ +ENTRY(psw_idle) + stg %r4,__SF_EMPTY(%r15) + larl %r1,psw_idle_lpsw+4 + stg %r1,__SF_EMPTY+8(%r15) + larl %r1,.Lvtimer_max + stck __IDLE_ENTER(%r2) + ltr %r5,%r5 + stpt __VQ_IDLE_ENTER(%r3) + jz psw_idle_lpsw + spt 0(%r1) +psw_idle_lpsw: + lpswe __SF_EMPTY(%r15) + br %r14 +psw_idle_end: + __critical_end: /* @@ -696,7 +713,6 @@ mcck_return: lg %r14,__LC_VDSO_PER_CPU lmg %r0,%r10,__PT_R0(%r11) mvc __LC_RETURN_MCCK_PSW(16),__PT_PSW(%r11) # move return PSW - ni __LC_RETURN_MCCK_PSW+1,0xfd # clear wait state bit tm __LC_RETURN_MCCK_PSW+1,0x01 # returning to user ? jno 0f stpt __LC_EXIT_TIMER @@ -713,68 +729,30 @@ mcck_panic: 0: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) j mcck_skip -/* - * Restart interruption handler, kick starter for additional CPUs - */ -#ifdef CONFIG_SMP - __CPUINIT -ENTRY(restart_int_handler) - basr %r1,0 -restart_base: - spt restart_vtime-restart_base(%r1) - stck __LC_LAST_UPDATE_CLOCK - mvc __LC_LAST_UPDATE_TIMER(8),restart_vtime-restart_base(%r1) - mvc __LC_EXIT_TIMER(8),restart_vtime-restart_base(%r1) - lghi %r10,__LC_GPREGS_SAVE_AREA - lg %r15,120(%r10) # load ksp - lghi %r10,__LC_CREGS_SAVE_AREA - lctlg %c0,%c15,0(%r10) # get new ctl regs - lghi %r10,__LC_AREGS_SAVE_AREA - lam %a0,%a15,0(%r10) - lmg %r6,%r15,__SF_GPRS(%r15)# load registers from clone - lg %r1,__LC_THREAD_INFO - mvc __LC_USER_TIMER(8),__TI_user_timer(%r1) - mvc __LC_SYSTEM_TIMER(8),__TI_system_timer(%r1) - xc __LC_STEAL_TIMER(8),__LC_STEAL_TIMER - ssm __LC_PGM_NEW_PSW # turn dat on, keep irqs off - brasl %r14,start_secondary - .align 8 -restart_vtime: - .long 0x7fffffff,0xffffffff - .previous -#else -/* - * If we do not run with SMP enabled, let the new CPU crash ... - */ -ENTRY(restart_int_handler) - basr %r1,0 -restart_base: - lpswe restart_crash-restart_base(%r1) - .align 8 -restart_crash: - .long 0x000a0000,0x00000000,0x00000000,0x00000000 -restart_go: -#endif - # # PSW restart interrupt handler # -ENTRY(psw_restart_int_handler) +ENTRY(restart_int_handler) stg %r15,__LC_SAVE_AREA_RESTART - larl %r15,restart_stack # load restart stack - lg %r15,0(%r15) + lg %r15,__LC_RESTART_STACK aghi %r15,-__PT_SIZE # create pt_regs on stack + xc 0(__PT_SIZE,%r15),0(%r15) stmg %r0,%r14,__PT_R0(%r15) mvc __PT_R15(8,%r15),__LC_SAVE_AREA_RESTART mvc __PT_PSW(16,%r15),__LC_RST_OLD_PSW # store restart old psw - aghi %r15,-STACK_FRAME_OVERHEAD - xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) - brasl %r14,do_restart - larl %r14,restart_psw_crash # load disabled wait PSW if - lpswe 0(%r14) # do_restart returns - .align 8 -restart_psw_crash: - .quad 0x0002000080000000,0x0000000000000000 + restart_psw_crash + aghi %r15,-STACK_FRAME_OVERHEAD # create stack frame on stack + xc 0(STACK_FRAME_OVERHEAD,%r15),0(%r15) + lmg %r1,%r3,__LC_RESTART_FN # load fn, parm & source cpu + ltgr %r3,%r3 # test source cpu address + jm 1f # negative -> skip source stop +0: sigp %r4,%r3,1 # sigp sense to source cpu + brc 10,0b # wait for status stored +1: basr %r14,%r1 # call function + stap __SF_EMPTY(%r15) # store cpu address + llgh %r3,__SF_EMPTY(%r15) +2: sigp %r4,%r3,5 # sigp stop to current cpu + brc 2,2b +3: j 3b .section .kprobes.text, "ax" @@ -808,6 +786,8 @@ cleanup_table: .quad io_tif .quad io_restore .quad io_done + .quad psw_idle + .quad psw_idle_end cleanup_critical: clg %r9,BASED(cleanup_table) # system_call @@ -826,6 +806,10 @@ cleanup_critical: jl cleanup_io_tif clg %r9,BASED(cleanup_table+56) # io_done jl cleanup_io_restore + clg %r9,BASED(cleanup_table+64) # psw_idle + jl 0f + clg %r9,BASED(cleanup_table+72) # psw_idle_end + jl cleanup_idle 0: br %r14 @@ -915,7 +899,6 @@ cleanup_io_restore: je 0f lg %r9,24(%r11) # get saved r11 pointer to pt_regs mvc __LC_RETURN_PSW(16),__PT_PSW(%r9) - ni __LC_RETURN_PSW+1,0xfd # clear wait state bit mvc 0(64,%r11),__PT_R8(%r9) lmg %r0,%r7,__PT_R0(%r9) 0: lmg %r8,%r9,__LC_RETURN_PSW @@ -923,6 +906,42 @@ cleanup_io_restore: cleanup_io_restore_insn: .quad io_done - 4 +cleanup_idle: + # copy interrupt clock & cpu timer + mvc __IDLE_EXIT(8,%r2),__LC_INT_CLOCK + mvc __VQ_IDLE_EXIT(8,%r3),__LC_ASYNC_ENTER_TIMER + cghi %r11,__LC_SAVE_AREA_ASYNC + je 0f + mvc __IDLE_EXIT(8,%r2),__LC_MCCK_CLOCK + mvc __VQ_IDLE_EXIT(8,%r3),__LC_MCCK_ENTER_TIMER +0: # check if stck & stpt have been executed + clg %r9,BASED(cleanup_idle_insn) + jhe 1f + mvc __IDLE_ENTER(8,%r2),__IDLE_EXIT(%r2) + mvc __VQ_IDLE_ENTER(8,%r3),__VQ_IDLE_EXIT(%r3) + j 2f +1: # check if the cpu timer has been reprogrammed + ltr %r5,%r5 + jz 2f + spt __VQ_IDLE_ENTER(%r3) +2: # account system time going idle + lg %r9,__LC_STEAL_TIMER + alg %r9,__IDLE_ENTER(%r2) + slg %r9,__LC_LAST_UPDATE_CLOCK + stg %r9,__LC_STEAL_TIMER + mvc __LC_LAST_UPDATE_CLOCK(8),__IDLE_EXIT(%r2) + lg %r9,__LC_SYSTEM_TIMER + alg %r9,__LC_LAST_UPDATE_TIMER + slg %r9,__VQ_IDLE_ENTER(%r3) + stg %r9,__LC_SYSTEM_TIMER + mvc __LC_LAST_UPDATE_TIMER(8),__VQ_IDLE_EXIT(%r3) + # prepare return psw + nihh %r8,0xfffd # clear wait state bit + lg %r9,48(%r11) # return from psw_idle + br %r14 +cleanup_idle_insn: + .quad psw_idle_lpsw + /* * Integer constants */ @@ -931,6 +950,8 @@ cleanup_io_restore_insn: .quad __critical_start .Lcritical_length: .quad __critical_end - __critical_start +.Lvtimer_max: + .quad 0x7fffffffffffffff #if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE) diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index affa8e68124a..8342e65a140d 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -2,7 +2,7 @@ * arch/s390/kernel/ipl.c * ipl/reipl/dump support for Linux on s390. * - * Copyright IBM Corp. 2005,2007 + * Copyright IBM Corp. 2005,2012 * Author(s): Michael Holzheu <holzheu@de.ibm.com> * Heiko Carstens <heiko.carstens@de.ibm.com> * Volker Sameske <sameske@de.ibm.com> @@ -17,6 +17,7 @@ #include <linux/fs.h> #include <linux/gfp.h> #include <linux/crash_dump.h> +#include <linux/debug_locks.h> #include <asm/ipl.h> #include <asm/smp.h> #include <asm/setup.h> @@ -25,8 +26,9 @@ #include <asm/ebcdic.h> #include <asm/reset.h> #include <asm/sclp.h> -#include <asm/sigp.h> #include <asm/checksum.h> +#include <asm/debug.h> +#include <asm/os_info.h> #include "entry.h" #define IPL_PARM_BLOCK_VERSION 0 @@ -571,7 +573,7 @@ static void __ipl_run(void *unused) static void ipl_run(struct shutdown_trigger *trigger) { - smp_switch_to_ipl_cpu(__ipl_run, NULL); + smp_call_ipl_cpu(__ipl_run, NULL); } static int __init ipl_init(void) @@ -950,6 +952,13 @@ static struct attribute_group reipl_nss_attr_group = { .attrs = reipl_nss_attrs, }; +static void set_reipl_block_actual(struct ipl_parameter_block *reipl_block) +{ + reipl_block_actual = reipl_block; + os_info_entry_add(OS_INFO_REIPL_BLOCK, reipl_block_actual, + reipl_block->hdr.len); +} + /* reipl type */ static int reipl_set_type(enum ipl_type type) @@ -965,7 +974,7 @@ static int reipl_set_type(enum ipl_type type) reipl_method = REIPL_METHOD_CCW_VM; else reipl_method = REIPL_METHOD_CCW_CIO; - reipl_block_actual = reipl_block_ccw; + set_reipl_block_actual(reipl_block_ccw); break; case IPL_TYPE_FCP: if (diag308_set_works) @@ -974,7 +983,7 @@ static int reipl_set_type(enum ipl_type type) reipl_method = REIPL_METHOD_FCP_RO_VM; else reipl_method = REIPL_METHOD_FCP_RO_DIAG; - reipl_block_actual = reipl_block_fcp; + set_reipl_block_actual(reipl_block_fcp); break; case IPL_TYPE_FCP_DUMP: reipl_method = REIPL_METHOD_FCP_DUMP; @@ -984,7 +993,7 @@ static int reipl_set_type(enum ipl_type type) reipl_method = REIPL_METHOD_NSS_DIAG; else reipl_method = REIPL_METHOD_NSS; - reipl_block_actual = reipl_block_nss; + set_reipl_block_actual(reipl_block_nss); break; case IPL_TYPE_UNKNOWN: reipl_method = REIPL_METHOD_DEFAULT; @@ -1101,7 +1110,7 @@ static void __reipl_run(void *unused) static void reipl_run(struct shutdown_trigger *trigger) { - smp_switch_to_ipl_cpu(__reipl_run, NULL); + smp_call_ipl_cpu(__reipl_run, NULL); } static void reipl_block_ccw_init(struct ipl_parameter_block *ipb) @@ -1256,6 +1265,29 @@ static int __init reipl_fcp_init(void) return 0; } +static int __init reipl_type_init(void) +{ + enum ipl_type reipl_type = ipl_info.type; + struct ipl_parameter_block *reipl_block; + unsigned long size; + + reipl_block = os_info_old_entry(OS_INFO_REIPL_BLOCK, &size); + if (!reipl_block) + goto out; + /* + * If we have an OS info reipl block, this will be used + */ + if (reipl_block->hdr.pbt == DIAG308_IPL_TYPE_FCP) { + memcpy(reipl_block_fcp, reipl_block, size); + reipl_type = IPL_TYPE_FCP; + } else if (reipl_block->hdr.pbt == DIAG308_IPL_TYPE_CCW) { + memcpy(reipl_block_ccw, reipl_block, size); + reipl_type = IPL_TYPE_CCW; + } +out: + return reipl_set_type(reipl_type); +} + static int __init reipl_init(void) { int rc; @@ -1277,10 +1309,7 @@ static int __init reipl_init(void) rc = reipl_nss_init(); if (rc) return rc; - rc = reipl_set_type(ipl_info.type); - if (rc) - return rc; - return 0; + return reipl_type_init(); } static struct shutdown_action __refdata reipl_action = { @@ -1421,7 +1450,7 @@ static void dump_run(struct shutdown_trigger *trigger) if (dump_method == DUMP_METHOD_NONE) return; smp_send_stop(); - smp_switch_to_ipl_cpu(__dump_run, NULL); + smp_call_ipl_cpu(__dump_run, NULL); } static int __init dump_ccw_init(void) @@ -1499,30 +1528,12 @@ static struct shutdown_action __refdata dump_action = { static void dump_reipl_run(struct shutdown_trigger *trigger) { - preempt_disable(); - /* - * Bypass dynamic address translation (DAT) when storing IPL parameter - * information block address and checksum into the prefix area - * (corresponding to absolute addresses 0-8191). - * When enhanced DAT applies and the STE format control in one, - * the absolute address is formed without prefixing. In this case a - * normal store (stg/st) into the prefix area would no more match to - * absolute addresses 0-8191. - */ -#ifdef CONFIG_64BIT - asm volatile("sturg %0,%1" - :: "a" ((unsigned long) reipl_block_actual), - "a" (&lowcore_ptr[smp_processor_id()]->ipib)); -#else - asm volatile("stura %0,%1" - :: "a" ((unsigned long) reipl_block_actual), - "a" (&lowcore_ptr[smp_processor_id()]->ipib)); -#endif - asm volatile("stura %0,%1" - :: "a" (csum_partial(reipl_block_actual, - reipl_block_actual->hdr.len, 0)), - "a" (&lowcore_ptr[smp_processor_id()]->ipib_checksum)); - preempt_enable(); + u32 csum; + + csum = csum_partial(reipl_block_actual, reipl_block_actual->hdr.len, 0); + copy_to_absolute_zero(&S390_lowcore.ipib_checksum, &csum, sizeof(csum)); + copy_to_absolute_zero(&S390_lowcore.ipib, &reipl_block_actual, + sizeof(reipl_block_actual)); dump_run(trigger); } @@ -1623,9 +1634,7 @@ static void stop_run(struct shutdown_trigger *trigger) if (strcmp(trigger->name, ON_PANIC_STR) == 0 || strcmp(trigger->name, ON_RESTART_STR) == 0) disabled_wait((unsigned long) __builtin_return_address(0)); - while (sigp(smp_processor_id(), sigp_stop) == sigp_busy) - cpu_relax(); - for (;;); + smp_stop_cpu(); } static struct shutdown_action stop_action = {SHUTDOWN_ACTION_STOP_STR, @@ -1713,6 +1722,7 @@ static struct kobj_attribute on_panic_attr = static void do_panic(void) { + lgr_info_log(); on_panic_trigger.action->fn(&on_panic_trigger); stop_run(&on_panic_trigger); } @@ -1738,9 +1748,8 @@ static ssize_t on_restart_store(struct kobject *kobj, static struct kobj_attribute on_restart_attr = __ATTR(on_restart, 0644, on_restart_show, on_restart_store); -void do_restart(void) +static void __do_restart(void *ignore) { - smp_restart_with_online_cpu(); smp_send_stop(); #ifdef CONFIG_CRASH_DUMP crash_kexec(NULL); @@ -1749,6 +1758,14 @@ void do_restart(void) stop_run(&on_restart_trigger); } +void do_restart(void) +{ + tracing_off(); + debug_locks_off(); + lgr_info_log(); + smp_call_online_cpu(__do_restart, NULL); +} + /* on halt */ static struct shutdown_trigger on_halt_trigger = {ON_HALT_STR, &stop_action}; diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index b9a7fdd9c814..1c2cdd59ccd0 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -165,13 +165,6 @@ static inline int ext_hash(u16 code) return (code + (code >> 9)) & 0xff; } -static void ext_int_hash_update(struct rcu_head *head) -{ - struct ext_int_info *p = container_of(head, struct ext_int_info, rcu); - - kfree(p); -} - int register_external_interrupt(u16 code, ext_int_handler_t handler) { struct ext_int_info *p; @@ -202,38 +195,34 @@ int unregister_external_interrupt(u16 code, ext_int_handler_t handler) list_for_each_entry_rcu(p, &ext_int_hash[index], entry) if (p->code == code && p->handler == handler) { list_del_rcu(&p->entry); - call_rcu(&p->rcu, ext_int_hash_update); + kfree_rcu(p, rcu); } spin_unlock_irqrestore(&ext_int_hash_lock, flags); return 0; } EXPORT_SYMBOL(unregister_external_interrupt); -void __irq_entry do_extint(struct pt_regs *regs, unsigned int ext_int_code, +void __irq_entry do_extint(struct pt_regs *regs, struct ext_code ext_code, unsigned int param32, unsigned long param64) { struct pt_regs *old_regs; - unsigned short code; struct ext_int_info *p; int index; - code = (unsigned short) ext_int_code; old_regs = set_irq_regs(regs); - s390_idle_check(regs, S390_lowcore.int_clock, - S390_lowcore.async_enter_timer); irq_enter(); if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator) /* Serve timer interrupts first. */ clock_comparator_work(); kstat_cpu(smp_processor_id()).irqs[EXTERNAL_INTERRUPT]++; - if (code != 0x1004) + if (ext_code.code != 0x1004) __get_cpu_var(s390_idle).nohz_delay = 1; - index = ext_hash(code); + index = ext_hash(ext_code.code); rcu_read_lock(); list_for_each_entry_rcu(p, &ext_int_hash[index], entry) - if (likely(p->code == code)) - p->handler(ext_int_code, param32, param64); + if (likely(p->code == ext_code.code)) + p->handler(ext_code, param32, param64); rcu_read_unlock(); irq_exit(); set_irq_regs(old_regs); @@ -266,3 +255,26 @@ void service_subclass_irq_unregister(void) spin_unlock(&sc_irq_lock); } EXPORT_SYMBOL(service_subclass_irq_unregister); + +static DEFINE_SPINLOCK(ma_subclass_lock); +static int ma_subclass_refcount; + +void measurement_alert_subclass_register(void) +{ + spin_lock(&ma_subclass_lock); + if (!ma_subclass_refcount) + ctl_set_bit(0, 5); + ma_subclass_refcount++; + spin_unlock(&ma_subclass_lock); +} +EXPORT_SYMBOL(measurement_alert_subclass_register); + +void measurement_alert_subclass_unregister(void) +{ + spin_lock(&ma_subclass_lock); + ma_subclass_refcount--; + if (!ma_subclass_refcount) + ctl_clear_bit(0, 5); + spin_unlock(&ma_subclass_lock); +} +EXPORT_SYMBOL(measurement_alert_subclass_unregister); diff --git a/arch/s390/kernel/lgr.c b/arch/s390/kernel/lgr.c new file mode 100644 index 000000000000..87f080b17af1 --- /dev/null +++ b/arch/s390/kernel/lgr.c @@ -0,0 +1,200 @@ +/* + * Linux Guest Relocation (LGR) detection + * + * Copyright IBM Corp. 2012 + * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com> + */ + +#include <linux/module.h> +#include <linux/timer.h> +#include <linux/slab.h> +#include <asm/facility.h> +#include <asm/sysinfo.h> +#include <asm/ebcdic.h> +#include <asm/debug.h> +#include <asm/ipl.h> + +#define LGR_TIMER_INTERVAL_SECS (30 * 60) +#define VM_LEVEL_MAX 2 /* Maximum is 8, but we only record two levels */ + +/* + * LGR info: Contains stfle and stsi data + */ +struct lgr_info { + /* Bit field with facility information: 4 DWORDs are stored */ + u64 stfle_fac_list[4]; + /* Level of system (1 = CEC, 2 = LPAR, 3 = z/VM */ + u32 level; + /* Level 1: CEC info (stsi 1.1.1) */ + char manufacturer[16]; + char type[4]; + char sequence[16]; + char plant[4]; + char model[16]; + /* Level 2: LPAR info (stsi 2.2.2) */ + u16 lpar_number; + char name[8]; + /* Level 3: VM info (stsi 3.2.2) */ + u8 vm_count; + struct { + char name[8]; + char cpi[16]; + } vm[VM_LEVEL_MAX]; +} __packed __aligned(8); + +/* + * LGR globals + */ +static void *lgr_page; +static struct lgr_info lgr_info_last; +static struct lgr_info lgr_info_cur; +static struct debug_info *lgr_dbf; + +/* + * Return number of valid stsi levels + */ +static inline int stsi_0(void) +{ + int rc = stsi(NULL, 0, 0, 0); + + return rc == -ENOSYS ? rc : (((unsigned int) rc) >> 28); +} + +/* + * Copy buffer and then convert it to ASCII + */ +static void cpascii(char *dst, char *src, int size) +{ + memcpy(dst, src, size); + EBCASC(dst, size); +} + +/* + * Fill LGR info with 1.1.1 stsi data + */ +static void lgr_stsi_1_1_1(struct lgr_info *lgr_info) +{ + struct sysinfo_1_1_1 *si = lgr_page; + + if (stsi(si, 1, 1, 1) == -ENOSYS) + return; + cpascii(lgr_info->manufacturer, si->manufacturer, + sizeof(si->manufacturer)); + cpascii(lgr_info->type, si->type, sizeof(si->type)); + cpascii(lgr_info->model, si->model, sizeof(si->model)); + cpascii(lgr_info->sequence, si->sequence, sizeof(si->sequence)); + cpascii(lgr_info->plant, si->plant, sizeof(si->plant)); +} + +/* + * Fill LGR info with 2.2.2 stsi data + */ +static void lgr_stsi_2_2_2(struct lgr_info *lgr_info) +{ + struct sysinfo_2_2_2 *si = lgr_page; + + if (stsi(si, 2, 2, 2) == -ENOSYS) + return; + cpascii(lgr_info->name, si->name, sizeof(si->name)); + memcpy(&lgr_info->lpar_number, &si->lpar_number, + sizeof(lgr_info->lpar_number)); +} + +/* + * Fill LGR info with 3.2.2 stsi data + */ +static void lgr_stsi_3_2_2(struct lgr_info *lgr_info) +{ + struct sysinfo_3_2_2 *si = lgr_page; + int i; + + if (stsi(si, 3, 2, 2) == -ENOSYS) + return; + for (i = 0; i < min_t(u8, si->count, VM_LEVEL_MAX); i++) { + cpascii(lgr_info->vm[i].name, si->vm[i].name, + sizeof(si->vm[i].name)); + cpascii(lgr_info->vm[i].cpi, si->vm[i].cpi, + sizeof(si->vm[i].cpi)); + } + lgr_info->vm_count = si->count; +} + +/* + * Fill LGR info with current data + */ +static void lgr_info_get(struct lgr_info *lgr_info) +{ + memset(lgr_info, 0, sizeof(*lgr_info)); + stfle(lgr_info->stfle_fac_list, ARRAY_SIZE(lgr_info->stfle_fac_list)); + lgr_info->level = stsi_0(); + if (lgr_info->level == -ENOSYS) + return; + if (lgr_info->level >= 1) + lgr_stsi_1_1_1(lgr_info); + if (lgr_info->level >= 2) + lgr_stsi_2_2_2(lgr_info); + if (lgr_info->level >= 3) + lgr_stsi_3_2_2(lgr_info); +} + +/* + * Check if LGR info has changed and if yes log new LGR info to s390dbf + */ +void lgr_info_log(void) +{ + static DEFINE_SPINLOCK(lgr_info_lock); + unsigned long flags; + + if (!spin_trylock_irqsave(&lgr_info_lock, flags)) + return; + lgr_info_get(&lgr_info_cur); + if (memcmp(&lgr_info_last, &lgr_info_cur, sizeof(lgr_info_cur)) != 0) { + debug_event(lgr_dbf, 1, &lgr_info_cur, sizeof(lgr_info_cur)); + lgr_info_last = lgr_info_cur; + } + spin_unlock_irqrestore(&lgr_info_lock, flags); +} +EXPORT_SYMBOL_GPL(lgr_info_log); + +static void lgr_timer_set(void); + +/* + * LGR timer callback + */ +static void lgr_timer_fn(unsigned long ignored) +{ + lgr_info_log(); + lgr_timer_set(); +} + +static struct timer_list lgr_timer = + TIMER_DEFERRED_INITIALIZER(lgr_timer_fn, 0, 0); + +/* + * Setup next LGR timer + */ +static void lgr_timer_set(void) +{ + mod_timer(&lgr_timer, jiffies + LGR_TIMER_INTERVAL_SECS * HZ); +} + +/* + * Initialize LGR: Add s390dbf, write initial lgr_info and setup timer + */ +static int __init lgr_init(void) +{ + lgr_page = (void *) __get_free_pages(GFP_KERNEL, 0); + if (!lgr_page) + return -ENOMEM; + lgr_dbf = debug_register("lgr", 1, 1, sizeof(struct lgr_info)); + if (!lgr_dbf) { + free_page((unsigned long) lgr_page); + return -ENOMEM; + } + debug_register_view(lgr_dbf, &debug_hex_ascii_view); + lgr_info_get(&lgr_info_last); + debug_event(lgr_dbf, 1, &lgr_info_last, sizeof(lgr_info_last)); + lgr_timer_set(); + return 0; +} +module_init(lgr_init); diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index 47b168fb29c4..bdad47d54478 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -14,11 +14,11 @@ #include <linux/delay.h> #include <linux/reboot.h> #include <linux/ftrace.h> +#include <linux/debug_locks.h> #include <asm/cio.h> #include <asm/setup.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> -#include <asm/system.h> #include <asm/smp.h> #include <asm/reset.h> #include <asm/ipl.h> @@ -49,50 +49,21 @@ static void add_elf_notes(int cpu) } /* - * Store status of next available physical CPU - */ -static int store_status_next(int start_cpu, int this_cpu) -{ - struct save_area *sa = (void *) 4608 + store_prefix(); - int cpu, rc; - - for (cpu = start_cpu; cpu < 65536; cpu++) { - if (cpu == this_cpu) - continue; - do { - rc = raw_sigp(cpu, sigp_stop_and_store_status); - } while (rc == sigp_busy); - if (rc != sigp_order_code_accepted) - continue; - if (sa->pref_reg) - return cpu; - } - return -1; -} - -/* * Initialize CPU ELF notes */ void setup_regs(void) { unsigned long sa = S390_lowcore.prefixreg_save_area + SAVE_AREA_BASE; - int cpu, this_cpu, phys_cpu = 0, first = 1; + int cpu, this_cpu; - this_cpu = stap(); - - if (!S390_lowcore.prefixreg_save_area) - first = 0; + this_cpu = smp_find_processor_id(stap()); + add_elf_notes(this_cpu); for_each_online_cpu(cpu) { - if (first) { - add_elf_notes(cpu); - first = 0; + if (cpu == this_cpu) + continue; + if (smp_store_status(cpu)) continue; - } - phys_cpu = store_status_next(phys_cpu, this_cpu); - if (phys_cpu == -1) - break; add_elf_notes(cpu); - phys_cpu++; } /* Copy dump CPU store status info to absolute zero */ memcpy((void *) SAVE_AREA_BASE, (void *) sa, sizeof(struct save_area)); @@ -238,10 +209,14 @@ static void __machine_kexec(void *data) struct kimage *image = data; pfault_fini(); - if (image->type == KEXEC_TYPE_CRASH) + tracing_off(); + debug_locks_off(); + if (image->type == KEXEC_TYPE_CRASH) { + lgr_info_log(); s390_reset_system(__do_machine_kdump, data); - else + } else { s390_reset_system(__do_machine_kexec, data); + } disabled_wait((unsigned long) __builtin_return_address(0)); } @@ -255,5 +230,5 @@ void machine_kexec(struct kimage *image) return; tracer_disable(); smp_send_stop(); - smp_switch_to_ipl_cpu(__machine_kexec, image); + smp_call_ipl_cpu(__machine_kexec, image); } diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 0fd2e863e114..8c372ca61350 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -254,8 +254,6 @@ void notrace s390_do_machine_check(struct pt_regs *regs) int umode; nmi_enter(); - s390_idle_check(regs, S390_lowcore.mcck_clock, - S390_lowcore.mcck_enter_timer); kstat_cpu(smp_processor_id()).irqs[NMI_NMI]++; mci = (struct mci *) &S390_lowcore.mcck_interruption_code; mcck = &__get_cpu_var(cpu_mcck); diff --git a/arch/s390/kernel/os_info.c b/arch/s390/kernel/os_info.c new file mode 100644 index 000000000000..e8d6c214d498 --- /dev/null +++ b/arch/s390/kernel/os_info.c @@ -0,0 +1,168 @@ +/* + * OS info memory interface + * + * Copyright IBM Corp. 2012 + * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com> + */ + +#define KMSG_COMPONENT "os_info" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/crash_dump.h> +#include <linux/kernel.h> +#include <asm/checksum.h> +#include <asm/lowcore.h> +#include <asm/os_info.h> + +/* + * OS info structure has to be page aligned + */ +static struct os_info os_info __page_aligned_data; + +/* + * Compute checksum over OS info structure + */ +u32 os_info_csum(struct os_info *os_info) +{ + int size = sizeof(*os_info) - offsetof(struct os_info, version_major); + return csum_partial(&os_info->version_major, size, 0); +} + +/* + * Add crashkernel info to OS info and update checksum + */ +void os_info_crashkernel_add(unsigned long base, unsigned long size) +{ + os_info.crashkernel_addr = (u64)(unsigned long)base; + os_info.crashkernel_size = (u64)(unsigned long)size; + os_info.csum = os_info_csum(&os_info); +} + +/* + * Add OS info entry and update checksum + */ +void os_info_entry_add(int nr, void *ptr, u64 size) +{ + os_info.entry[nr].addr = (u64)(unsigned long)ptr; + os_info.entry[nr].size = size; + os_info.entry[nr].csum = csum_partial(ptr, size, 0); + os_info.csum = os_info_csum(&os_info); +} + +/* + * Initialize OS info struture and set lowcore pointer + */ +void __init os_info_init(void) +{ + void *ptr = &os_info; + + os_info.version_major = OS_INFO_VERSION_MAJOR; + os_info.version_minor = OS_INFO_VERSION_MINOR; + os_info.magic = OS_INFO_MAGIC; + os_info.csum = os_info_csum(&os_info); + copy_to_absolute_zero(&S390_lowcore.os_info, &ptr, sizeof(ptr)); +} + +#ifdef CONFIG_CRASH_DUMP + +static struct os_info *os_info_old; + +/* + * Allocate and copy OS info entry from oldmem + */ +static void os_info_old_alloc(int nr, int align) +{ + unsigned long addr, size = 0; + char *buf, *buf_align, *msg; + u32 csum; + + addr = os_info_old->entry[nr].addr; + if (!addr) { + msg = "not available"; + goto fail; + } + size = os_info_old->entry[nr].size; + buf = kmalloc(size + align - 1, GFP_KERNEL); + if (!buf) { + msg = "alloc failed"; + goto fail; + } + buf_align = PTR_ALIGN(buf, align); + if (copy_from_oldmem(buf_align, (void *) addr, size)) { + msg = "copy failed"; + goto fail_free; + } + csum = csum_partial(buf_align, size, 0); + if (csum != os_info_old->entry[nr].csum) { + msg = "checksum failed"; + goto fail_free; + } + os_info_old->entry[nr].addr = (u64)(unsigned long)buf_align; + msg = "copied"; + goto out; +fail_free: + kfree(buf); +fail: + os_info_old->entry[nr].addr = 0; +out: + pr_info("entry %i: %s (addr=0x%lx size=%lu)\n", + nr, msg, addr, size); +} + +/* + * Initialize os info and os info entries from oldmem + */ +static void os_info_old_init(void) +{ + static int os_info_init; + unsigned long addr; + + if (os_info_init) + return; + if (!OLDMEM_BASE) + goto fail; + if (copy_from_oldmem(&addr, &S390_lowcore.os_info, sizeof(addr))) + goto fail; + if (addr == 0 || addr % PAGE_SIZE) + goto fail; + os_info_old = kzalloc(sizeof(*os_info_old), GFP_KERNEL); + if (!os_info_old) + goto fail; + if (copy_from_oldmem(os_info_old, (void *) addr, sizeof(*os_info_old))) + goto fail_free; + if (os_info_old->magic != OS_INFO_MAGIC) + goto fail_free; + if (os_info_old->csum != os_info_csum(os_info_old)) + goto fail_free; + if (os_info_old->version_major > OS_INFO_VERSION_MAJOR) + goto fail_free; + os_info_old_alloc(OS_INFO_VMCOREINFO, 1); + os_info_old_alloc(OS_INFO_REIPL_BLOCK, 1); + os_info_old_alloc(OS_INFO_INIT_FN, PAGE_SIZE); + pr_info("crashkernel: addr=0x%lx size=%lu\n", + (unsigned long) os_info_old->crashkernel_addr, + (unsigned long) os_info_old->crashkernel_size); + os_info_init = 1; + return; +fail_free: + kfree(os_info_old); +fail: + os_info_init = 1; + os_info_old = NULL; +} + +/* + * Return pointer to os infor entry and its size + */ +void *os_info_old_entry(int nr, unsigned long *size) +{ + os_info_old_init(); + + if (!os_info_old) + return NULL; + if (!os_info_old->entry[nr].addr) + return NULL; + *size = (unsigned long) os_info_old->entry[nr].size; + return (void *)(unsigned long)os_info_old->entry[nr].addr; +} +#endif diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c new file mode 100644 index 000000000000..46405086479c --- /dev/null +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -0,0 +1,690 @@ +/* + * Performance event support for s390x - CPU-measurement Counter Facility + * + * Copyright IBM Corp. 2012 + * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + */ +#define KMSG_COMPONENT "cpum_cf" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/kernel.h> +#include <linux/kernel_stat.h> +#include <linux/perf_event.h> +#include <linux/percpu.h> +#include <linux/notifier.h> +#include <linux/init.h> +#include <linux/export.h> +#include <asm/ctl_reg.h> +#include <asm/irq.h> +#include <asm/cpu_mf.h> + +/* CPU-measurement counter facility supports these CPU counter sets: + * For CPU counter sets: + * Basic counter set: 0-31 + * Problem-state counter set: 32-63 + * Crypto-activity counter set: 64-127 + * Extented counter set: 128-159 + */ +enum cpumf_ctr_set { + /* CPU counter sets */ + CPUMF_CTR_SET_BASIC = 0, + CPUMF_CTR_SET_USER = 1, + CPUMF_CTR_SET_CRYPTO = 2, + CPUMF_CTR_SET_EXT = 3, + + /* Maximum number of counter sets */ + CPUMF_CTR_SET_MAX, +}; + +#define CPUMF_LCCTL_ENABLE_SHIFT 16 +#define CPUMF_LCCTL_ACTCTL_SHIFT 0 +static const u64 cpumf_state_ctl[CPUMF_CTR_SET_MAX] = { + [CPUMF_CTR_SET_BASIC] = 0x02, + [CPUMF_CTR_SET_USER] = 0x04, + [CPUMF_CTR_SET_CRYPTO] = 0x08, + [CPUMF_CTR_SET_EXT] = 0x01, +}; + +static void ctr_set_enable(u64 *state, int ctr_set) +{ + *state |= cpumf_state_ctl[ctr_set] << CPUMF_LCCTL_ENABLE_SHIFT; +} +static void ctr_set_disable(u64 *state, int ctr_set) +{ + *state &= ~(cpumf_state_ctl[ctr_set] << CPUMF_LCCTL_ENABLE_SHIFT); +} +static void ctr_set_start(u64 *state, int ctr_set) +{ + *state |= cpumf_state_ctl[ctr_set] << CPUMF_LCCTL_ACTCTL_SHIFT; +} +static void ctr_set_stop(u64 *state, int ctr_set) +{ + *state &= ~(cpumf_state_ctl[ctr_set] << CPUMF_LCCTL_ACTCTL_SHIFT); +} + +/* Local CPUMF event structure */ +struct cpu_hw_events { + struct cpumf_ctr_info info; + atomic_t ctr_set[CPUMF_CTR_SET_MAX]; + u64 state, tx_state; + unsigned int flags; +}; +static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { + .ctr_set = { + [CPUMF_CTR_SET_BASIC] = ATOMIC_INIT(0), + [CPUMF_CTR_SET_USER] = ATOMIC_INIT(0), + [CPUMF_CTR_SET_CRYPTO] = ATOMIC_INIT(0), + [CPUMF_CTR_SET_EXT] = ATOMIC_INIT(0), + }, + .state = 0, + .flags = 0, +}; + +static int get_counter_set(u64 event) +{ + int set = -1; + + if (event < 32) + set = CPUMF_CTR_SET_BASIC; + else if (event < 64) + set = CPUMF_CTR_SET_USER; + else if (event < 128) + set = CPUMF_CTR_SET_CRYPTO; + else if (event < 160) + set = CPUMF_CTR_SET_EXT; + + return set; +} + +static int validate_event(const struct hw_perf_event *hwc) +{ + switch (hwc->config_base) { + case CPUMF_CTR_SET_BASIC: + case CPUMF_CTR_SET_USER: + case CPUMF_CTR_SET_CRYPTO: + case CPUMF_CTR_SET_EXT: + /* check for reserved counters */ + if ((hwc->config >= 6 && hwc->config <= 31) || + (hwc->config >= 38 && hwc->config <= 63) || + (hwc->config >= 80 && hwc->config <= 127)) + return -EOPNOTSUPP; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int validate_ctr_version(const struct hw_perf_event *hwc) +{ + struct cpu_hw_events *cpuhw; + int err = 0; + + cpuhw = &get_cpu_var(cpu_hw_events); + + /* check required version for counter sets */ + switch (hwc->config_base) { + case CPUMF_CTR_SET_BASIC: + case CPUMF_CTR_SET_USER: + if (cpuhw->info.cfvn < 1) + err = -EOPNOTSUPP; + break; + case CPUMF_CTR_SET_CRYPTO: + case CPUMF_CTR_SET_EXT: + if (cpuhw->info.csvn < 1) + err = -EOPNOTSUPP; + break; + } + + put_cpu_var(cpu_hw_events); + return err; +} + +static int validate_ctr_auth(const struct hw_perf_event *hwc) +{ + struct cpu_hw_events *cpuhw; + u64 ctrs_state; + int err = 0; + + cpuhw = &get_cpu_var(cpu_hw_events); + + /* check authorization for cpu counter sets */ + ctrs_state = cpumf_state_ctl[hwc->config_base]; + if (!(ctrs_state & cpuhw->info.auth_ctl)) + err = -EPERM; + + put_cpu_var(cpu_hw_events); + return err; +} + +/* + * Change the CPUMF state to active. + * Enable and activate the CPU-counter sets according + * to the per-cpu control state. + */ +static void cpumf_pmu_enable(struct pmu *pmu) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + int err; + + if (cpuhw->flags & PMU_F_ENABLED) + return; + + err = lcctl(cpuhw->state); + if (err) { + pr_err("Enabling the performance measuring unit " + "failed with rc=%lx\n", err); + return; + } + + cpuhw->flags |= PMU_F_ENABLED; +} + +/* + * Change the CPUMF state to inactive. + * Disable and enable (inactive) the CPU-counter sets according + * to the per-cpu control state. + */ +static void cpumf_pmu_disable(struct pmu *pmu) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + int err; + u64 inactive; + + if (!(cpuhw->flags & PMU_F_ENABLED)) + return; + + inactive = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1); + err = lcctl(inactive); + if (err) { + pr_err("Disabling the performance measuring unit " + "failed with rc=%lx\n", err); + return; + } + + cpuhw->flags &= ~PMU_F_ENABLED; +} + + +/* Number of perf events counting hardware events */ +static atomic_t num_events = ATOMIC_INIT(0); +/* Used to avoid races in calling reserve/release_cpumf_hardware */ +static DEFINE_MUTEX(pmc_reserve_mutex); + +/* CPU-measurement alerts for the counter facility */ +static void cpumf_measurement_alert(struct ext_code ext_code, + unsigned int alert, unsigned long unused) +{ + struct cpu_hw_events *cpuhw; + + if (!(alert & CPU_MF_INT_CF_MASK)) + return; + + kstat_cpu(smp_processor_id()).irqs[EXTINT_CPM]++; + cpuhw = &__get_cpu_var(cpu_hw_events); + + /* Measurement alerts are shared and might happen when the PMU + * is not reserved. Ignore these alerts in this case. */ + if (!(cpuhw->flags & PMU_F_RESERVED)) + return; + + /* counter authorization change alert */ + if (alert & CPU_MF_INT_CF_CACA) + qctri(&cpuhw->info); + + /* loss of counter data alert */ + if (alert & CPU_MF_INT_CF_LCDA) + pr_err("CPU[%i] Counter data was lost\n", smp_processor_id()); +} + +#define PMC_INIT 0 +#define PMC_RELEASE 1 +static void setup_pmc_cpu(void *flags) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + + switch (*((int *) flags)) { + case PMC_INIT: + memset(&cpuhw->info, 0, sizeof(cpuhw->info)); + qctri(&cpuhw->info); + cpuhw->flags |= PMU_F_RESERVED; + break; + + case PMC_RELEASE: + cpuhw->flags &= ~PMU_F_RESERVED; + break; + } + + /* Disable CPU counter sets */ + lcctl(0); +} + +/* Initialize the CPU-measurement facility */ +static int reserve_pmc_hardware(void) +{ + int flags = PMC_INIT; + + on_each_cpu(setup_pmc_cpu, &flags, 1); + measurement_alert_subclass_register(); + + return 0; +} + +/* Release the CPU-measurement facility */ +static void release_pmc_hardware(void) +{ + int flags = PMC_RELEASE; + + on_each_cpu(setup_pmc_cpu, &flags, 1); + measurement_alert_subclass_unregister(); +} + +/* Release the PMU if event is the last perf event */ +static void hw_perf_event_destroy(struct perf_event *event) +{ + if (!atomic_add_unless(&num_events, -1, 1)) { + mutex_lock(&pmc_reserve_mutex); + if (atomic_dec_return(&num_events) == 0) + release_pmc_hardware(); + mutex_unlock(&pmc_reserve_mutex); + } +} + +/* CPUMF <-> perf event mappings for kernel+userspace (basic set) */ +static const int cpumf_generic_events_basic[] = { + [PERF_COUNT_HW_CPU_CYCLES] = 0, + [PERF_COUNT_HW_INSTRUCTIONS] = 1, + [PERF_COUNT_HW_CACHE_REFERENCES] = -1, + [PERF_COUNT_HW_CACHE_MISSES] = -1, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = -1, + [PERF_COUNT_HW_BRANCH_MISSES] = -1, + [PERF_COUNT_HW_BUS_CYCLES] = -1, +}; +/* CPUMF <-> perf event mappings for userspace (problem-state set) */ +static const int cpumf_generic_events_user[] = { + [PERF_COUNT_HW_CPU_CYCLES] = 32, + [PERF_COUNT_HW_INSTRUCTIONS] = 33, + [PERF_COUNT_HW_CACHE_REFERENCES] = -1, + [PERF_COUNT_HW_CACHE_MISSES] = -1, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = -1, + [PERF_COUNT_HW_BRANCH_MISSES] = -1, + [PERF_COUNT_HW_BUS_CYCLES] = -1, +}; + +static int __hw_perf_event_init(struct perf_event *event) +{ + struct perf_event_attr *attr = &event->attr; + struct hw_perf_event *hwc = &event->hw; + int err; + u64 ev; + + switch (attr->type) { + case PERF_TYPE_RAW: + /* Raw events are used to access counters directly, + * hence do not permit excludes */ + if (attr->exclude_kernel || attr->exclude_user || + attr->exclude_hv) + return -EOPNOTSUPP; + ev = attr->config; + break; + + case PERF_TYPE_HARDWARE: + ev = attr->config; + /* Count user space (problem-state) only */ + if (!attr->exclude_user && attr->exclude_kernel) { + if (ev >= ARRAY_SIZE(cpumf_generic_events_user)) + return -EOPNOTSUPP; + ev = cpumf_generic_events_user[ev]; + + /* No support for kernel space counters only */ + } else if (!attr->exclude_kernel && attr->exclude_user) { + return -EOPNOTSUPP; + + /* Count user and kernel space */ + } else { + if (ev >= ARRAY_SIZE(cpumf_generic_events_basic)) + return -EOPNOTSUPP; + ev = cpumf_generic_events_basic[ev]; + } + break; + + default: + return -ENOENT; + } + + if (ev == -1) + return -ENOENT; + + if (ev >= PERF_CPUM_CF_MAX_CTR) + return -EINVAL; + + /* The CPU measurement counter facility does not have any interrupts + * to do sampling. Sampling must be provided by external means, + * for example, by timers. + */ + if (hwc->sample_period) + return -EINVAL; + + /* Use the hardware perf event structure to store the counter number + * in 'config' member and the counter set to which the counter belongs + * in the 'config_base'. The counter set (config_base) is then used + * to enable/disable the counters. + */ + hwc->config = ev; + hwc->config_base = get_counter_set(ev); + + /* Validate the counter that is assigned to this event. + * Because the counter facility can use numerous counters at the + * same time without constraints, it is not necessary to explicity + * validate event groups (event->group_leader != event). + */ + err = validate_event(hwc); + if (err) + return err; + + /* Initialize for using the CPU-measurement counter facility */ + if (!atomic_inc_not_zero(&num_events)) { + mutex_lock(&pmc_reserve_mutex); + if (atomic_read(&num_events) == 0 && reserve_pmc_hardware()) + err = -EBUSY; + else + atomic_inc(&num_events); + mutex_unlock(&pmc_reserve_mutex); + } + event->destroy = hw_perf_event_destroy; + + /* Finally, validate version and authorization of the counter set */ + err = validate_ctr_auth(hwc); + if (!err) + err = validate_ctr_version(hwc); + + return err; +} + +static int cpumf_pmu_event_init(struct perf_event *event) +{ + int err; + + switch (event->attr.type) { + case PERF_TYPE_HARDWARE: + case PERF_TYPE_HW_CACHE: + case PERF_TYPE_RAW: + err = __hw_perf_event_init(event); + break; + default: + return -ENOENT; + } + + if (unlikely(err) && event->destroy) + event->destroy(event); + + return err; +} + +static int hw_perf_event_reset(struct perf_event *event) +{ + u64 prev, new; + int err; + + do { + prev = local64_read(&event->hw.prev_count); + err = ecctr(event->hw.config, &new); + if (err) { + if (err != 3) + break; + /* The counter is not (yet) available. This + * might happen if the counter set to which + * this counter belongs is in the disabled + * state. + */ + new = 0; + } + } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev); + + return err; +} + +static int hw_perf_event_update(struct perf_event *event) +{ + u64 prev, new, delta; + int err; + + do { + prev = local64_read(&event->hw.prev_count); + err = ecctr(event->hw.config, &new); + if (err) + goto out; + } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev); + + delta = (prev <= new) ? new - prev + : (-1ULL - prev) + new + 1; /* overflow */ + local64_add(delta, &event->count); +out: + return err; +} + +static void cpumf_pmu_read(struct perf_event *event) +{ + if (event->hw.state & PERF_HES_STOPPED) + return; + + hw_perf_event_update(event); +} + +static void cpumf_pmu_start(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + + if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) + return; + + if (WARN_ON_ONCE(hwc->config == -1)) + return; + + if (flags & PERF_EF_RELOAD) + WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); + + hwc->state = 0; + + /* (Re-)enable and activate the counter set */ + ctr_set_enable(&cpuhw->state, hwc->config_base); + ctr_set_start(&cpuhw->state, hwc->config_base); + + /* The counter set to which this counter belongs can be already active. + * Because all counters in a set are active, the event->hw.prev_count + * needs to be synchronized. At this point, the counter set can be in + * the inactive or disabled state. + */ + hw_perf_event_reset(event); + + /* increment refcount for this counter set */ + atomic_inc(&cpuhw->ctr_set[hwc->config_base]); +} + +static void cpumf_pmu_stop(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + + if (!(hwc->state & PERF_HES_STOPPED)) { + /* Decrement reference count for this counter set and if this + * is the last used counter in the set, clear activation + * control and set the counter set state to inactive. + */ + if (!atomic_dec_return(&cpuhw->ctr_set[hwc->config_base])) + ctr_set_stop(&cpuhw->state, hwc->config_base); + event->hw.state |= PERF_HES_STOPPED; + } + + if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { + hw_perf_event_update(event); + event->hw.state |= PERF_HES_UPTODATE; + } +} + +static int cpumf_pmu_add(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + + /* Check authorization for the counter set to which this + * counter belongs. + * For group events transaction, the authorization check is + * done in cpumf_pmu_commit_txn(). + */ + if (!(cpuhw->flags & PERF_EVENT_TXN)) + if (validate_ctr_auth(&event->hw)) + return -EPERM; + + ctr_set_enable(&cpuhw->state, event->hw.config_base); + event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; + + if (flags & PERF_EF_START) + cpumf_pmu_start(event, PERF_EF_RELOAD); + + perf_event_update_userpage(event); + + return 0; +} + +static void cpumf_pmu_del(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + + cpumf_pmu_stop(event, PERF_EF_UPDATE); + + /* Check if any counter in the counter set is still used. If not used, + * change the counter set to the disabled state. This also clears the + * content of all counters in the set. + * + * When a new perf event has been added but not yet started, this can + * clear enable control and resets all counters in a set. Therefore, + * cpumf_pmu_start() always has to reenable a counter set. + */ + if (!atomic_read(&cpuhw->ctr_set[event->hw.config_base])) + ctr_set_disable(&cpuhw->state, event->hw.config_base); + + perf_event_update_userpage(event); +} + +/* + * Start group events scheduling transaction. + * Set flags to perform a single test at commit time. + */ +static void cpumf_pmu_start_txn(struct pmu *pmu) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + + perf_pmu_disable(pmu); + cpuhw->flags |= PERF_EVENT_TXN; + cpuhw->tx_state = cpuhw->state; +} + +/* + * Stop and cancel a group events scheduling tranctions. + * Assumes cpumf_pmu_del() is called for each successful added + * cpumf_pmu_add() during the transaction. + */ +static void cpumf_pmu_cancel_txn(struct pmu *pmu) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + + WARN_ON(cpuhw->tx_state != cpuhw->state); + + cpuhw->flags &= ~PERF_EVENT_TXN; + perf_pmu_enable(pmu); +} + +/* + * Commit the group events scheduling transaction. On success, the + * transaction is closed. On error, the transaction is kept open + * until cpumf_pmu_cancel_txn() is called. + */ +static int cpumf_pmu_commit_txn(struct pmu *pmu) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + u64 state; + + /* check if the updated state can be scheduled */ + state = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1); + state >>= CPUMF_LCCTL_ENABLE_SHIFT; + if ((state & cpuhw->info.auth_ctl) != state) + return -EPERM; + + cpuhw->flags &= ~PERF_EVENT_TXN; + perf_pmu_enable(pmu); + return 0; +} + +/* Performance monitoring unit for s390x */ +static struct pmu cpumf_pmu = { + .pmu_enable = cpumf_pmu_enable, + .pmu_disable = cpumf_pmu_disable, + .event_init = cpumf_pmu_event_init, + .add = cpumf_pmu_add, + .del = cpumf_pmu_del, + .start = cpumf_pmu_start, + .stop = cpumf_pmu_stop, + .read = cpumf_pmu_read, + .start_txn = cpumf_pmu_start_txn, + .commit_txn = cpumf_pmu_commit_txn, + .cancel_txn = cpumf_pmu_cancel_txn, +}; + +static int __cpuinit cpumf_pmu_notifier(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + unsigned int cpu = (long) hcpu; + int flags; + + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_ONLINE: + flags = PMC_INIT; + smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1); + break; + case CPU_DOWN_PREPARE: + flags = PMC_RELEASE; + smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1); + break; + default: + break; + } + + return NOTIFY_OK; +} + +static int __init cpumf_pmu_init(void) +{ + int rc; + + if (!cpum_cf_avail()) + return -ENODEV; + + /* clear bit 15 of cr0 to unauthorize problem-state to + * extract measurement counters */ + ctl_clear_bit(0, 48); + + /* register handler for measurement-alert interruptions */ + rc = register_external_interrupt(0x1407, cpumf_measurement_alert); + if (rc) { + pr_err("Registering for CPU-measurement alerts " + "failed with rc=%i\n", rc); + goto out; + } + + rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", PERF_TYPE_RAW); + if (rc) { + pr_err("Registering the cpum_cf PMU failed with rc=%i\n", rc); + unregister_external_interrupt(0x1407, cpumf_measurement_alert); + goto out; + } + perf_cpu_notifier(cpumf_pmu_notifier); +out: + return rc; +} +early_initcall(cpumf_pmu_init); diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c new file mode 100644 index 000000000000..f58f37f66824 --- /dev/null +++ b/arch/s390/kernel/perf_event.c @@ -0,0 +1,124 @@ +/* + * Performance event support for s390x + * + * Copyright IBM Corp. 2012 + * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + */ +#define KMSG_COMPONENT "perf" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/kernel.h> +#include <linux/perf_event.h> +#include <linux/percpu.h> +#include <linux/export.h> +#include <asm/irq.h> +#include <asm/cpu_mf.h> +#include <asm/lowcore.h> +#include <asm/processor.h> + +const char *perf_pmu_name(void) +{ + if (cpum_cf_avail() || cpum_sf_avail()) + return "CPU-measurement facilities (CPUMF)"; + return "pmu"; +} +EXPORT_SYMBOL(perf_pmu_name); + +int perf_num_counters(void) +{ + int num = 0; + + if (cpum_cf_avail()) + num += PERF_CPUM_CF_MAX_CTR; + + return num; +} +EXPORT_SYMBOL(perf_num_counters); + +void perf_event_print_debug(void) +{ + struct cpumf_ctr_info cf_info; + unsigned long flags; + int cpu; + + if (!cpum_cf_avail()) + return; + + local_irq_save(flags); + + cpu = smp_processor_id(); + memset(&cf_info, 0, sizeof(cf_info)); + if (!qctri(&cf_info)) { + pr_info("CPU[%i] CPUM_CF: ver=%u.%u A=%04x E=%04x C=%04x\n", + cpu, cf_info.cfvn, cf_info.csvn, + cf_info.auth_ctl, cf_info.enable_ctl, cf_info.act_ctl); + print_hex_dump_bytes("CPUMF Query: ", DUMP_PREFIX_OFFSET, + &cf_info, sizeof(cf_info)); + } + + local_irq_restore(flags); +} + +/* See also arch/s390/kernel/traps.c */ +static unsigned long __store_trace(struct perf_callchain_entry *entry, + unsigned long sp, + unsigned long low, unsigned long high) +{ + struct stack_frame *sf; + struct pt_regs *regs; + + while (1) { + sp = sp & PSW_ADDR_INSN; + if (sp < low || sp > high - sizeof(*sf)) + return sp; + sf = (struct stack_frame *) sp; + perf_callchain_store(entry, sf->gprs[8] & PSW_ADDR_INSN); + /* Follow the backchain. */ + while (1) { + low = sp; + sp = sf->back_chain & PSW_ADDR_INSN; + if (!sp) + break; + if (sp <= low || sp > high - sizeof(*sf)) + return sp; + sf = (struct stack_frame *) sp; + perf_callchain_store(entry, + sf->gprs[8] & PSW_ADDR_INSN); + } + /* Zero backchain detected, check for interrupt frame. */ + sp = (unsigned long) (sf + 1); + if (sp <= low || sp > high - sizeof(*regs)) + return sp; + regs = (struct pt_regs *) sp; + perf_callchain_store(entry, sf->gprs[8] & PSW_ADDR_INSN); + low = sp; + sp = regs->gprs[15]; + } +} + +void perf_callchain_kernel(struct perf_callchain_entry *entry, + struct pt_regs *regs) +{ + unsigned long head; + struct stack_frame *head_sf; + + if (user_mode(regs)) + return; + + head = regs->gprs[15]; + head_sf = (struct stack_frame *) head; + + if (!head_sf || !head_sf->back_chain) + return; + + head = head_sf->back_chain; + head = __store_trace(entry, head, S390_lowcore.async_stack - ASYNC_SIZE, + S390_lowcore.async_stack); + + __store_trace(entry, head, S390_lowcore.thread_info, + S390_lowcore.thread_info + THREAD_SIZE); +} diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index e795933eb2cb..60055cefdd04 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -23,13 +23,13 @@ #include <linux/kprobes.h> #include <linux/random.h> #include <linux/module.h> -#include <asm/system.h> #include <asm/io.h> #include <asm/processor.h> #include <asm/irq.h> #include <asm/timer.h> #include <asm/nmi.h> #include <asm/smp.h> +#include <asm/switch_to.h> #include "entry.h" asmlinkage void ret_from_fork(void) asm ("ret_from_fork"); @@ -77,13 +77,8 @@ static void default_idle(void) local_irq_enable(); return; } - trace_hardirqs_on(); - /* Don't trace preempt off for idle. */ - stop_critical_timings(); - /* Stop virtual timer and halt the cpu. */ + /* Halt the cpu and keep track of cpu time accounting. */ vtime_stop_cpu(); - /* Reenable preemption tracer. */ - start_critical_timings(); } void cpu_idle(void) @@ -97,9 +92,7 @@ void cpu_idle(void) tick_nohz_idle_exit(); if (test_thread_flag(TIF_MCCK_PENDING)) s390_handle_mcck(); - preempt_enable_no_resched(); - schedule(); - preempt_disable(); + schedule_preempt_disabled(); } } diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 61f95489d70c..02f300fbf070 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -26,9 +26,9 @@ #include <asm/page.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> -#include <asm/system.h> #include <asm/uaccess.h> #include <asm/unistd.h> +#include <asm/switch_to.h> #include "entry.h" #ifdef CONFIG_COMPAT diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 3b2efc81f34e..06264ae8ccd9 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -2,7 +2,7 @@ * arch/s390/kernel/setup.c * * S390 version - * Copyright (C) IBM Corp. 1999,2010 + * Copyright (C) IBM Corp. 1999,2012 * Author(s): Hartmut Penner (hp@de.ibm.com), * Martin Schwidefsky (schwidefsky@de.ibm.com) * @@ -50,7 +50,7 @@ #include <asm/ipl.h> #include <asm/uaccess.h> -#include <asm/system.h> +#include <asm/facility.h> #include <asm/smp.h> #include <asm/mmu_context.h> #include <asm/cpcmd.h> @@ -62,6 +62,8 @@ #include <asm/ebcdic.h> #include <asm/kvm_virtio.h> #include <asm/diag.h> +#include <asm/os_info.h> +#include "entry.h" long psw_kernel_bits = PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_ASC_PRIMARY | PSW_MASK_EA | PSW_MASK_BA; @@ -351,8 +353,9 @@ static void setup_addressing_mode(void) } } -static void __init -setup_lowcore(void) +void *restart_stack __attribute__((__section__(".data"))); + +static void __init setup_lowcore(void) { struct _lowcore *lc; @@ -363,7 +366,7 @@ setup_lowcore(void) lc = __alloc_bootmem_low(LC_PAGES * PAGE_SIZE, LC_PAGES * PAGE_SIZE, 0); lc->restart_psw.mask = psw_kernel_bits; lc->restart_psw.addr = - PSW_ADDR_AMODE | (unsigned long) psw_restart_int_handler; + PSW_ADDR_AMODE | (unsigned long) restart_int_handler; lc->external_new_psw.mask = psw_kernel_bits | PSW_MASK_DAT | PSW_MASK_MCHECK; lc->external_new_psw.addr = @@ -412,6 +415,24 @@ setup_lowcore(void) lc->last_update_timer = S390_lowcore.last_update_timer; lc->last_update_clock = S390_lowcore.last_update_clock; lc->ftrace_func = S390_lowcore.ftrace_func; + + restart_stack = __alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0); + restart_stack += ASYNC_SIZE; + + /* + * Set up PSW restart to call ipl.c:do_restart(). Copy the relevant + * restart data to the absolute zero lowcore. This is necesary if + * PSW restart is done on an offline CPU that has lowcore zero. + */ + lc->restart_stack = (unsigned long) restart_stack; + lc->restart_fn = (unsigned long) do_restart; + lc->restart_data = 0; + lc->restart_source = -1UL; + memcpy(&S390_lowcore.restart_stack, &lc->restart_stack, + 4*sizeof(unsigned long)); + copy_to_absolute_zero(&S390_lowcore.restart_psw, + &lc->restart_psw, sizeof(psw_t)); + set_prefix((u32)(unsigned long) lc); lowcore_ptr[0] = lc; } @@ -572,27 +593,6 @@ static void __init setup_memory_end(void) } } -void *restart_stack __attribute__((__section__(".data"))); - -/* - * Setup new PSW and allocate stack for PSW restart interrupt - */ -static void __init setup_restart_psw(void) -{ - psw_t psw; - - restart_stack = __alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0); - restart_stack += ASYNC_SIZE; - - /* - * Setup restart PSW for absolute zero lowcore. This is necesary - * if PSW restart is done on an offline CPU that has lowcore zero - */ - psw.mask = PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_EA | PSW_MASK_BA; - psw.addr = PSW_ADDR_AMODE | (unsigned long) psw_restart_int_handler; - copy_to_absolute_zero(&S390_lowcore.restart_psw, &psw, sizeof(psw)); -} - static void __init setup_vmcoreinfo(void) { #ifdef CONFIG_KEXEC @@ -747,7 +747,7 @@ static void __init reserve_crashkernel(void) { #ifdef CONFIG_CRASH_DUMP unsigned long long crash_base, crash_size; - char *msg; + char *msg = NULL; int rc; rc = parse_crashkernel(boot_command_line, memory_end, &crash_size, @@ -779,11 +779,11 @@ static void __init reserve_crashkernel(void) pr_info("Reserving %lluMB of memory at %lluMB " "for crashkernel (System RAM: %luMB)\n", crash_size >> 20, crash_base >> 20, memory_end >> 20); + os_info_crashkernel_add(crash_base, crash_size); #endif } -static void __init -setup_memory(void) +static void __init setup_memory(void) { unsigned long bootmap_size; unsigned long start_pfn, end_pfn; @@ -1014,8 +1014,7 @@ static void __init setup_hwcaps(void) * was printed. */ -void __init -setup_arch(char **cmdline_p) +void __init setup_arch(char **cmdline_p) { /* * print what head.S has found out about the machine @@ -1060,6 +1059,7 @@ setup_arch(char **cmdline_p) parse_early_param(); + os_info_init(); setup_ipl(); setup_memory_end(); setup_addressing_mode(); @@ -1068,7 +1068,6 @@ setup_arch(char **cmdline_p) setup_memory(); setup_resources(); setup_vmcoreinfo(); - setup_restart_psw(); setup_lowcore(); cpu_init(); diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index 2d421d90fada..f7582b27f600 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -30,6 +30,7 @@ #include <asm/ucontext.h> #include <asm/uaccess.h> #include <asm/lowcore.h> +#include <asm/switch_to.h> #include "entry.h" #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) @@ -384,7 +385,6 @@ static int handle_signal(unsigned long sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *oldset, struct pt_regs *regs) { - sigset_t blocked; int ret; /* Set up the stack frame */ @@ -394,10 +394,7 @@ static int handle_signal(unsigned long sig, struct k_sigaction *ka, ret = setup_frame(sig, ka, oldset, regs); if (ret) return ret; - sigorsets(&blocked, ¤t->blocked, &ka->sa.sa_mask); - if (!(ka->sa.sa_flags & SA_NODEFER)) - sigaddset(&blocked, sig); - set_current_blocked(&blocked); + block_sigmask(ka, sig); return 0; } diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 2398ce6b15ae..1f77227669e8 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -1,23 +1,18 @@ /* - * arch/s390/kernel/smp.c + * SMP related functions * - * Copyright IBM Corp. 1999, 2009 - * Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), - * Martin Schwidefsky (schwidefsky@de.ibm.com) - * Heiko Carstens (heiko.carstens@de.ibm.com) + * Copyright IBM Corp. 1999,2012 + * Author(s): Denis Joseph Barrow, + * Martin Schwidefsky <schwidefsky@de.ibm.com>, + * Heiko Carstens <heiko.carstens@de.ibm.com>, * * based on other smp stuff by * (c) 1995 Alan Cox, CymruNET Ltd <alan@cymru.net> * (c) 1998 Ingo Molnar * - * We work with logical cpu numbering everywhere we can. The only - * functions using the real cpu address (got from STAP) are the sigp - * functions. For all other functions we use the identity mapping. - * That means that cpu_number_map[i] == i for every cpu. cpu_number_map is - * used e.g. to find the idle task belonging to a logical cpu. Every array - * in the kernel is sorted by the logical cpu number and not by the physical - * one which is causing all the confusion with __cpu_logical_map and - * cpu_number_map in other architectures. + * The code outside of smp.c uses logical cpu numbers, only smp.c does + * the translation of logical to physical cpu ids. All new code that + * operates on physical cpu numbers needs to go into smp.c. */ #define KMSG_COMPONENT "cpu" @@ -31,198 +26,435 @@ #include <linux/spinlock.h> #include <linux/kernel_stat.h> #include <linux/delay.h> -#include <linux/cache.h> #include <linux/interrupt.h> #include <linux/irqflags.h> #include <linux/cpu.h> -#include <linux/timex.h> -#include <linux/bootmem.h> #include <linux/slab.h> #include <linux/crash_dump.h> #include <asm/asm-offsets.h> +#include <asm/switch_to.h> +#include <asm/facility.h> #include <asm/ipl.h> #include <asm/setup.h> -#include <asm/sigp.h> -#include <asm/pgalloc.h> #include <asm/irq.h> -#include <asm/cpcmd.h> #include <asm/tlbflush.h> #include <asm/timer.h> #include <asm/lowcore.h> #include <asm/sclp.h> -#include <asm/cputime.h> #include <asm/vdso.h> -#include <asm/cpu.h> +#include <asm/debug.h> +#include <asm/os_info.h> #include "entry.h" -/* logical cpu to cpu address */ -unsigned short __cpu_logical_map[NR_CPUS]; +enum { + sigp_sense = 1, + sigp_external_call = 2, + sigp_emergency_signal = 3, + sigp_start = 4, + sigp_stop = 5, + sigp_restart = 6, + sigp_stop_and_store_status = 9, + sigp_initial_cpu_reset = 11, + sigp_cpu_reset = 12, + sigp_set_prefix = 13, + sigp_store_status_at_address = 14, + sigp_store_extended_status_at_address = 15, + sigp_set_architecture = 18, + sigp_conditional_emergency_signal = 19, + sigp_sense_running = 21, +}; -static struct task_struct *current_set[NR_CPUS]; +enum { + sigp_order_code_accepted = 0, + sigp_status_stored = 1, + sigp_busy = 2, + sigp_not_operational = 3, +}; -static u8 smp_cpu_type; -static int smp_use_sigp_detection; +enum { + ec_schedule = 0, + ec_call_function, + ec_call_function_single, + ec_stop_cpu, +}; -enum s390_cpu_state { +enum { CPU_STATE_STANDBY, CPU_STATE_CONFIGURED, }; +struct pcpu { + struct cpu cpu; + struct task_struct *idle; /* idle process for the cpu */ + struct _lowcore *lowcore; /* lowcore page(s) for the cpu */ + unsigned long async_stack; /* async stack for the cpu */ + unsigned long panic_stack; /* panic stack for the cpu */ + unsigned long ec_mask; /* bit mask for ec_xxx functions */ + int state; /* physical cpu state */ + u32 status; /* last status received via sigp */ + u16 address; /* physical cpu address */ +}; + +static u8 boot_cpu_type; +static u16 boot_cpu_address; +static struct pcpu pcpu_devices[NR_CPUS]; + DEFINE_MUTEX(smp_cpu_state_mutex); -static int smp_cpu_state[NR_CPUS]; -static DEFINE_PER_CPU(struct cpu, cpu_devices); +/* + * Signal processor helper functions. + */ +static inline int __pcpu_sigp(u16 addr, u8 order, u32 parm, u32 *status) +{ + register unsigned int reg1 asm ("1") = parm; + int cc; -static void smp_ext_bitcall(int, int); + asm volatile( + " sigp %1,%2,0(%3)\n" + " ipm %0\n" + " srl %0,28\n" + : "=d" (cc), "+d" (reg1) : "d" (addr), "a" (order) : "cc"); + if (status && cc == 1) + *status = reg1; + return cc; +} -static int raw_cpu_stopped(int cpu) +static inline int __pcpu_sigp_relax(u16 addr, u8 order, u32 parm, u32 *status) { - u32 status; + int cc; - switch (raw_sigp_ps(&status, 0, cpu, sigp_sense)) { - case sigp_status_stored: - /* Check for stopped and check stop state */ - if (status & 0x50) - return 1; - break; - default: - break; + while (1) { + cc = __pcpu_sigp(addr, order, parm, status); + if (cc != sigp_busy) + return cc; + cpu_relax(); } - return 0; } -static inline int cpu_stopped(int cpu) +static int pcpu_sigp_retry(struct pcpu *pcpu, u8 order, u32 parm) { - return raw_cpu_stopped(cpu_logical_map(cpu)); + int cc, retry; + + for (retry = 0; ; retry++) { + cc = __pcpu_sigp(pcpu->address, order, parm, &pcpu->status); + if (cc != sigp_busy) + break; + if (retry >= 3) + udelay(10); + } + return cc; +} + +static inline int pcpu_stopped(struct pcpu *pcpu) +{ + if (__pcpu_sigp(pcpu->address, sigp_sense, + 0, &pcpu->status) != sigp_status_stored) + return 0; + /* Check for stopped and check stop state */ + return !!(pcpu->status & 0x50); +} + +static inline int pcpu_running(struct pcpu *pcpu) +{ + if (__pcpu_sigp(pcpu->address, sigp_sense_running, + 0, &pcpu->status) != sigp_status_stored) + return 1; + /* Check for running status */ + return !(pcpu->status & 0x400); } /* - * Ensure that PSW restart is done on an online CPU + * Find struct pcpu by cpu address. */ -void smp_restart_with_online_cpu(void) +static struct pcpu *pcpu_find_address(const struct cpumask *mask, int address) { int cpu; - for_each_online_cpu(cpu) { - if (stap() == __cpu_logical_map[cpu]) { - /* We are online: Enable DAT again and return */ - __load_psw_mask(psw_kernel_bits | PSW_MASK_DAT); - return; - } + for_each_cpu(cpu, mask) + if (pcpu_devices[cpu].address == address) + return pcpu_devices + cpu; + return NULL; +} + +static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit) +{ + int order; + + set_bit(ec_bit, &pcpu->ec_mask); + order = pcpu_running(pcpu) ? + sigp_external_call : sigp_emergency_signal; + pcpu_sigp_retry(pcpu, order, 0); +} + +static int __cpuinit pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) +{ + struct _lowcore *lc; + + if (pcpu != &pcpu_devices[0]) { + pcpu->lowcore = (struct _lowcore *) + __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER); + pcpu->async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER); + pcpu->panic_stack = __get_free_page(GFP_KERNEL); + if (!pcpu->lowcore || !pcpu->panic_stack || !pcpu->async_stack) + goto out; } - /* We are not online: Do PSW restart on an online CPU */ - while (sigp(cpu, sigp_restart) == sigp_busy) - cpu_relax(); - /* And stop ourself */ - while (raw_sigp(stap(), sigp_stop) == sigp_busy) - cpu_relax(); - for (;;); + lc = pcpu->lowcore; + memcpy(lc, &S390_lowcore, 512); + memset((char *) lc + 512, 0, sizeof(*lc) - 512); + lc->async_stack = pcpu->async_stack + ASYNC_SIZE; + lc->panic_stack = pcpu->panic_stack + PAGE_SIZE; + lc->cpu_nr = cpu; +#ifndef CONFIG_64BIT + if (MACHINE_HAS_IEEE) { + lc->extended_save_area_addr = get_zeroed_page(GFP_KERNEL); + if (!lc->extended_save_area_addr) + goto out; + } +#else + if (vdso_alloc_per_cpu(lc)) + goto out; +#endif + lowcore_ptr[cpu] = lc; + pcpu_sigp_retry(pcpu, sigp_set_prefix, (u32)(unsigned long) lc); + return 0; +out: + if (pcpu != &pcpu_devices[0]) { + free_page(pcpu->panic_stack); + free_pages(pcpu->async_stack, ASYNC_ORDER); + free_pages((unsigned long) pcpu->lowcore, LC_ORDER); + } + return -ENOMEM; } -void smp_switch_to_ipl_cpu(void (*func)(void *), void *data) +static void pcpu_free_lowcore(struct pcpu *pcpu) { - struct _lowcore *lc, *current_lc; - struct stack_frame *sf; - struct pt_regs *regs; - unsigned long sp; - - if (smp_processor_id() == 0) - func(data); - __load_psw_mask(PSW_DEFAULT_KEY | PSW_MASK_BASE | - PSW_MASK_EA | PSW_MASK_BA); - /* Disable lowcore protection */ - __ctl_clear_bit(0, 28); - current_lc = lowcore_ptr[smp_processor_id()]; - lc = lowcore_ptr[0]; - if (!lc) - lc = current_lc; - lc->restart_psw.mask = - PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_EA | PSW_MASK_BA; - lc->restart_psw.addr = PSW_ADDR_AMODE | (unsigned long) smp_restart_cpu; - if (!cpu_online(0)) - smp_switch_to_cpu(func, data, 0, stap(), __cpu_logical_map[0]); - while (sigp(0, sigp_stop_and_store_status) == sigp_busy) - cpu_relax(); - sp = lc->panic_stack; - sp -= sizeof(struct pt_regs); - regs = (struct pt_regs *) sp; - memcpy(®s->gprs, ¤t_lc->gpregs_save_area, sizeof(regs->gprs)); - regs->psw = current_lc->psw_save_area; - sp -= STACK_FRAME_OVERHEAD; - sf = (struct stack_frame *) sp; - sf->back_chain = 0; - smp_switch_to_cpu(func, data, sp, stap(), __cpu_logical_map[0]); + pcpu_sigp_retry(pcpu, sigp_set_prefix, 0); + lowcore_ptr[pcpu - pcpu_devices] = NULL; +#ifndef CONFIG_64BIT + if (MACHINE_HAS_IEEE) { + struct _lowcore *lc = pcpu->lowcore; + + free_page((unsigned long) lc->extended_save_area_addr); + lc->extended_save_area_addr = 0; + } +#else + vdso_free_per_cpu(pcpu->lowcore); +#endif + if (pcpu != &pcpu_devices[0]) { + free_page(pcpu->panic_stack); + free_pages(pcpu->async_stack, ASYNC_ORDER); + free_pages((unsigned long) pcpu->lowcore, LC_ORDER); + } +} + +static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) +{ + struct _lowcore *lc = pcpu->lowcore; + + atomic_inc(&init_mm.context.attach_count); + lc->cpu_nr = cpu; + lc->percpu_offset = __per_cpu_offset[cpu]; + lc->kernel_asce = S390_lowcore.kernel_asce; + lc->machine_flags = S390_lowcore.machine_flags; + lc->ftrace_func = S390_lowcore.ftrace_func; + lc->user_timer = lc->system_timer = lc->steal_timer = 0; + __ctl_store(lc->cregs_save_area, 0, 15); + save_access_regs((unsigned int *) lc->access_regs_save_area); + memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list, + MAX_FACILITY_BIT/8); +} + +static void pcpu_attach_task(struct pcpu *pcpu, struct task_struct *tsk) +{ + struct _lowcore *lc = pcpu->lowcore; + struct thread_info *ti = task_thread_info(tsk); + + lc->kernel_stack = (unsigned long) task_stack_page(tsk) + THREAD_SIZE; + lc->thread_info = (unsigned long) task_thread_info(tsk); + lc->current_task = (unsigned long) tsk; + lc->user_timer = ti->user_timer; + lc->system_timer = ti->system_timer; + lc->steal_timer = 0; +} + +static void pcpu_start_fn(struct pcpu *pcpu, void (*func)(void *), void *data) +{ + struct _lowcore *lc = pcpu->lowcore; + + lc->restart_stack = lc->kernel_stack; + lc->restart_fn = (unsigned long) func; + lc->restart_data = (unsigned long) data; + lc->restart_source = -1UL; + pcpu_sigp_retry(pcpu, sigp_restart, 0); +} + +/* + * Call function via PSW restart on pcpu and stop the current cpu. + */ +static void pcpu_delegate(struct pcpu *pcpu, void (*func)(void *), + void *data, unsigned long stack) +{ + struct _lowcore *lc = pcpu->lowcore; + unsigned short this_cpu; + + __load_psw_mask(psw_kernel_bits); + this_cpu = stap(); + if (pcpu->address == this_cpu) + func(data); /* should not return */ + /* Stop target cpu (if func returns this stops the current cpu). */ + pcpu_sigp_retry(pcpu, sigp_stop, 0); + /* Restart func on the target cpu and stop the current cpu. */ + lc->restart_stack = stack; + lc->restart_fn = (unsigned long) func; + lc->restart_data = (unsigned long) data; + lc->restart_source = (unsigned long) this_cpu; + asm volatile( + "0: sigp 0,%0,6 # sigp restart to target cpu\n" + " brc 2,0b # busy, try again\n" + "1: sigp 0,%1,5 # sigp stop to current cpu\n" + " brc 2,1b # busy, try again\n" + : : "d" (pcpu->address), "d" (this_cpu) : "0", "1", "cc"); + for (;;) ; +} + +/* + * Call function on an online CPU. + */ +void smp_call_online_cpu(void (*func)(void *), void *data) +{ + struct pcpu *pcpu; + + /* Use the current cpu if it is online. */ + pcpu = pcpu_find_address(cpu_online_mask, stap()); + if (!pcpu) + /* Use the first online cpu. */ + pcpu = pcpu_devices + cpumask_first(cpu_online_mask); + pcpu_delegate(pcpu, func, data, (unsigned long) restart_stack); } -static void smp_stop_cpu(void) +/* + * Call function on the ipl CPU. + */ +void smp_call_ipl_cpu(void (*func)(void *), void *data) { - while (sigp(smp_processor_id(), sigp_stop) == sigp_busy) + pcpu_delegate(&pcpu_devices[0], func, data, + pcpu_devices->panic_stack + PAGE_SIZE); +} + +int smp_find_processor_id(u16 address) +{ + int cpu; + + for_each_present_cpu(cpu) + if (pcpu_devices[cpu].address == address) + return cpu; + return -1; +} + +int smp_vcpu_scheduled(int cpu) +{ + return pcpu_running(pcpu_devices + cpu); +} + +void smp_yield(void) +{ + if (MACHINE_HAS_DIAG44) + asm volatile("diag 0,0,0x44"); +} + +void smp_yield_cpu(int cpu) +{ + if (MACHINE_HAS_DIAG9C) + asm volatile("diag %0,0,0x9c" + : : "d" (pcpu_devices[cpu].address)); + else if (MACHINE_HAS_DIAG44) + asm volatile("diag 0,0,0x44"); +} + +/* + * Send cpus emergency shutdown signal. This gives the cpus the + * opportunity to complete outstanding interrupts. + */ +void smp_emergency_stop(cpumask_t *cpumask) +{ + u64 end; + int cpu; + + end = get_clock() + (1000000UL << 12); + for_each_cpu(cpu, cpumask) { + struct pcpu *pcpu = pcpu_devices + cpu; + set_bit(ec_stop_cpu, &pcpu->ec_mask); + while (__pcpu_sigp(pcpu->address, sigp_emergency_signal, + 0, NULL) == sigp_busy && + get_clock() < end) + cpu_relax(); + } + while (get_clock() < end) { + for_each_cpu(cpu, cpumask) + if (pcpu_stopped(pcpu_devices + cpu)) + cpumask_clear_cpu(cpu, cpumask); + if (cpumask_empty(cpumask)) + break; cpu_relax(); + } } +/* + * Stop all cpus but the current one. + */ void smp_send_stop(void) { cpumask_t cpumask; int cpu; - u64 end; /* Disable all interrupts/machine checks */ __load_psw_mask(psw_kernel_bits | PSW_MASK_DAT); trace_hardirqs_off(); + debug_set_critical(); cpumask_copy(&cpumask, cpu_online_mask); cpumask_clear_cpu(smp_processor_id(), &cpumask); - if (oops_in_progress) { - /* - * Give the other cpus the opportunity to complete - * outstanding interrupts before stopping them. - */ - end = get_clock() + (1000000UL << 12); - for_each_cpu(cpu, &cpumask) { - set_bit(ec_stop_cpu, (unsigned long *) - &lowcore_ptr[cpu]->ext_call_fast); - while (sigp(cpu, sigp_emergency_signal) == sigp_busy && - get_clock() < end) - cpu_relax(); - } - while (get_clock() < end) { - for_each_cpu(cpu, &cpumask) - if (cpu_stopped(cpu)) - cpumask_clear_cpu(cpu, &cpumask); - if (cpumask_empty(&cpumask)) - break; - cpu_relax(); - } - } + if (oops_in_progress) + smp_emergency_stop(&cpumask); /* stop all processors */ for_each_cpu(cpu, &cpumask) { - while (sigp(cpu, sigp_stop) == sigp_busy) - cpu_relax(); - while (!cpu_stopped(cpu)) + struct pcpu *pcpu = pcpu_devices + cpu; + pcpu_sigp_retry(pcpu, sigp_stop, 0); + while (!pcpu_stopped(pcpu)) cpu_relax(); } } /* + * Stop the current cpu. + */ +void smp_stop_cpu(void) +{ + pcpu_sigp_retry(pcpu_devices + smp_processor_id(), sigp_stop, 0); + for (;;) ; +} + +/* * This is the main routine where commands issued by other * cpus are handled. */ - -static void do_ext_call_interrupt(unsigned int ext_int_code, +static void do_ext_call_interrupt(struct ext_code ext_code, unsigned int param32, unsigned long param64) { unsigned long bits; + int cpu; - if ((ext_int_code & 0xffff) == 0x1202) - kstat_cpu(smp_processor_id()).irqs[EXTINT_EXC]++; + cpu = smp_processor_id(); + if (ext_code.code == 0x1202) + kstat_cpu(cpu).irqs[EXTINT_EXC]++; else - kstat_cpu(smp_processor_id()).irqs[EXTINT_EMS]++; + kstat_cpu(cpu).irqs[EXTINT_EMS]++; /* * handle bit signal external calls */ - bits = xchg(&S390_lowcore.ext_call_fast, 0); + bits = xchg(&pcpu_devices[cpu].ec_mask, 0); if (test_bit(ec_stop_cpu, &bits)) smp_stop_cpu(); @@ -238,38 +470,17 @@ static void do_ext_call_interrupt(unsigned int ext_int_code, } -/* - * Send an external call sigp to another cpu and return without waiting - * for its completion. - */ -static void smp_ext_bitcall(int cpu, int sig) -{ - int order; - - /* - * Set signaling bit in lowcore of target cpu and kick it - */ - set_bit(sig, (unsigned long *) &lowcore_ptr[cpu]->ext_call_fast); - while (1) { - order = smp_vcpu_scheduled(cpu) ? - sigp_external_call : sigp_emergency_signal; - if (sigp(cpu, order) != sigp_busy) - break; - udelay(10); - } -} - void arch_send_call_function_ipi_mask(const struct cpumask *mask) { int cpu; for_each_cpu(cpu, mask) - smp_ext_bitcall(cpu, ec_call_function); + pcpu_ec_call(pcpu_devices + cpu, ec_call_function); } void arch_send_call_function_single_ipi(int cpu) { - smp_ext_bitcall(cpu, ec_call_function_single); + pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single); } #ifndef CONFIG_64BIT @@ -295,15 +506,16 @@ EXPORT_SYMBOL(smp_ptlb_all); */ void smp_send_reschedule(int cpu) { - smp_ext_bitcall(cpu, ec_schedule); + pcpu_ec_call(pcpu_devices + cpu, ec_schedule); } /* * parameter area for the set/clear control bit callbacks */ struct ec_creg_mask_parms { - unsigned long orvals[16]; - unsigned long andvals[16]; + unsigned long orval; + unsigned long andval; + int cr; }; /* @@ -313,11 +525,9 @@ static void smp_ctl_bit_callback(void *info) { struct ec_creg_mask_parms *pp = info; unsigned long cregs[16]; - int i; __ctl_store(cregs, 0, 15); - for (i = 0; i <= 15; i++) - cregs[i] = (cregs[i] & pp->andvals[i]) | pp->orvals[i]; + cregs[pp->cr] = (cregs[pp->cr] & pp->andval) | pp->orval; __ctl_load(cregs, 0, 15); } @@ -326,11 +536,8 @@ static void smp_ctl_bit_callback(void *info) */ void smp_ctl_set_bit(int cr, int bit) { - struct ec_creg_mask_parms parms; + struct ec_creg_mask_parms parms = { 1UL << bit, -1UL, cr }; - memset(&parms.orvals, 0, sizeof(parms.orvals)); - memset(&parms.andvals, 0xff, sizeof(parms.andvals)); - parms.orvals[cr] = 1UL << bit; on_each_cpu(smp_ctl_bit_callback, &parms, 1); } EXPORT_SYMBOL(smp_ctl_set_bit); @@ -340,226 +547,178 @@ EXPORT_SYMBOL(smp_ctl_set_bit); */ void smp_ctl_clear_bit(int cr, int bit) { - struct ec_creg_mask_parms parms; + struct ec_creg_mask_parms parms = { 0, ~(1UL << bit), cr }; - memset(&parms.orvals, 0, sizeof(parms.orvals)); - memset(&parms.andvals, 0xff, sizeof(parms.andvals)); - parms.andvals[cr] = ~(1UL << bit); on_each_cpu(smp_ctl_bit_callback, &parms, 1); } EXPORT_SYMBOL(smp_ctl_clear_bit); #if defined(CONFIG_ZFCPDUMP) || defined(CONFIG_CRASH_DUMP) -static void __init smp_get_save_area(unsigned int cpu, unsigned int phy_cpu) +struct save_area *zfcpdump_save_areas[NR_CPUS + 1]; +EXPORT_SYMBOL_GPL(zfcpdump_save_areas); + +static void __init smp_get_save_area(int cpu, u16 address) { - if (ipl_info.type != IPL_TYPE_FCP_DUMP && !OLDMEM_BASE) - return; + void *lc = pcpu_devices[0].lowcore; + struct save_area *save_area; + if (is_kdump_kernel()) return; + if (!OLDMEM_BASE && (address == boot_cpu_address || + ipl_info.type != IPL_TYPE_FCP_DUMP)) + return; if (cpu >= NR_CPUS) { - pr_warning("CPU %i exceeds the maximum %i and is excluded from " - "the dump\n", cpu, NR_CPUS - 1); + pr_warning("CPU %i exceeds the maximum %i and is excluded " + "from the dump\n", cpu, NR_CPUS - 1); return; } - zfcpdump_save_areas[cpu] = kmalloc(sizeof(struct save_area), GFP_KERNEL); - while (raw_sigp(phy_cpu, sigp_stop_and_store_status) == sigp_busy) - cpu_relax(); - memcpy_real(zfcpdump_save_areas[cpu], - (void *)(unsigned long) store_prefix() + SAVE_AREA_BASE, - sizeof(struct save_area)); + save_area = kmalloc(sizeof(struct save_area), GFP_KERNEL); + if (!save_area) + panic("could not allocate memory for save area\n"); + zfcpdump_save_areas[cpu] = save_area; +#ifdef CONFIG_CRASH_DUMP + if (address == boot_cpu_address) { + /* Copy the registers of the boot cpu. */ + copy_oldmem_page(1, (void *) save_area, sizeof(*save_area), + SAVE_AREA_BASE - PAGE_SIZE, 0); + return; + } +#endif + /* Get the registers of a non-boot cpu. */ + __pcpu_sigp_relax(address, sigp_stop_and_store_status, 0, NULL); + memcpy_real(save_area, lc + SAVE_AREA_BASE, sizeof(*save_area)); } -struct save_area *zfcpdump_save_areas[NR_CPUS + 1]; -EXPORT_SYMBOL_GPL(zfcpdump_save_areas); - -#else - -static inline void smp_get_save_area(unsigned int cpu, unsigned int phy_cpu) { } - -#endif /* CONFIG_ZFCPDUMP */ - -static int cpu_known(int cpu_id) +int smp_store_status(int cpu) { - int cpu; + struct pcpu *pcpu; - for_each_present_cpu(cpu) { - if (__cpu_logical_map[cpu] == cpu_id) - return 1; - } + pcpu = pcpu_devices + cpu; + if (__pcpu_sigp_relax(pcpu->address, sigp_stop_and_store_status, + 0, NULL) != sigp_order_code_accepted) + return -EIO; return 0; } -static int smp_rescan_cpus_sigp(cpumask_t avail) -{ - int cpu_id, logical_cpu; +#else /* CONFIG_ZFCPDUMP || CONFIG_CRASH_DUMP */ - logical_cpu = cpumask_first(&avail); - if (logical_cpu >= nr_cpu_ids) - return 0; - for (cpu_id = 0; cpu_id <= MAX_CPU_ADDRESS; cpu_id++) { - if (cpu_known(cpu_id)) - continue; - __cpu_logical_map[logical_cpu] = cpu_id; - cpu_set_polarization(logical_cpu, POLARIZATION_UNKNOWN); - if (!cpu_stopped(logical_cpu)) - continue; - set_cpu_present(logical_cpu, true); - smp_cpu_state[logical_cpu] = CPU_STATE_CONFIGURED; - logical_cpu = cpumask_next(logical_cpu, &avail); - if (logical_cpu >= nr_cpu_ids) - break; - } - return 0; -} +static inline void smp_get_save_area(int cpu, u16 address) { } + +#endif /* CONFIG_ZFCPDUMP || CONFIG_CRASH_DUMP */ -static int smp_rescan_cpus_sclp(cpumask_t avail) +static struct sclp_cpu_info *smp_get_cpu_info(void) { + static int use_sigp_detection; struct sclp_cpu_info *info; - int cpu_id, logical_cpu, cpu; - int rc; - - logical_cpu = cpumask_first(&avail); - if (logical_cpu >= nr_cpu_ids) - return 0; - info = kmalloc(sizeof(*info), GFP_KERNEL); - if (!info) - return -ENOMEM; - rc = sclp_get_cpu_info(info); - if (rc) - goto out; - for (cpu = 0; cpu < info->combined; cpu++) { - if (info->has_cpu_type && info->cpu[cpu].type != smp_cpu_type) - continue; - cpu_id = info->cpu[cpu].address; - if (cpu_known(cpu_id)) - continue; - __cpu_logical_map[logical_cpu] = cpu_id; - cpu_set_polarization(logical_cpu, POLARIZATION_UNKNOWN); - set_cpu_present(logical_cpu, true); - if (cpu >= info->configured) - smp_cpu_state[logical_cpu] = CPU_STATE_STANDBY; - else - smp_cpu_state[logical_cpu] = CPU_STATE_CONFIGURED; - logical_cpu = cpumask_next(logical_cpu, &avail); - if (logical_cpu >= nr_cpu_ids) - break; + int address; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (info && (use_sigp_detection || sclp_get_cpu_info(info))) { + use_sigp_detection = 1; + for (address = 0; address <= MAX_CPU_ADDRESS; address++) { + if (__pcpu_sigp_relax(address, sigp_sense, 0, NULL) == + sigp_not_operational) + continue; + info->cpu[info->configured].address = address; + info->configured++; + } + info->combined = info->configured; } -out: - kfree(info); - return rc; + return info; } -static int __smp_rescan_cpus(void) +static int __devinit smp_add_present_cpu(int cpu); + +static int __devinit __smp_rescan_cpus(struct sclp_cpu_info *info, + int sysfs_add) { + struct pcpu *pcpu; cpumask_t avail; + int cpu, nr, i; + nr = 0; cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask); - if (smp_use_sigp_detection) - return smp_rescan_cpus_sigp(avail); - else - return smp_rescan_cpus_sclp(avail); + cpu = cpumask_first(&avail); + for (i = 0; (i < info->combined) && (cpu < nr_cpu_ids); i++) { + if (info->has_cpu_type && info->cpu[i].type != boot_cpu_type) + continue; + if (pcpu_find_address(cpu_present_mask, info->cpu[i].address)) + continue; + pcpu = pcpu_devices + cpu; + pcpu->address = info->cpu[i].address; + pcpu->state = (cpu >= info->configured) ? + CPU_STATE_STANDBY : CPU_STATE_CONFIGURED; + cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); + set_cpu_present(cpu, true); + if (sysfs_add && smp_add_present_cpu(cpu) != 0) + set_cpu_present(cpu, false); + else + nr++; + cpu = cpumask_next(cpu, &avail); + } + return nr; } static void __init smp_detect_cpus(void) { unsigned int cpu, c_cpus, s_cpus; struct sclp_cpu_info *info; - u16 boot_cpu_addr, cpu_addr; - c_cpus = 1; - s_cpus = 0; - boot_cpu_addr = __cpu_logical_map[0]; - info = kmalloc(sizeof(*info), GFP_KERNEL); + info = smp_get_cpu_info(); if (!info) panic("smp_detect_cpus failed to allocate memory\n"); -#ifdef CONFIG_CRASH_DUMP - if (OLDMEM_BASE && !is_kdump_kernel()) { - struct save_area *save_area; - - save_area = kmalloc(sizeof(*save_area), GFP_KERNEL); - if (!save_area) - panic("could not allocate memory for save area\n"); - copy_oldmem_page(1, (void *) save_area, sizeof(*save_area), - 0x200, 0); - zfcpdump_save_areas[0] = save_area; - } -#endif - /* Use sigp detection algorithm if sclp doesn't work. */ - if (sclp_get_cpu_info(info)) { - smp_use_sigp_detection = 1; - for (cpu = 0; cpu <= MAX_CPU_ADDRESS; cpu++) { - if (cpu == boot_cpu_addr) - continue; - if (!raw_cpu_stopped(cpu)) - continue; - smp_get_save_area(c_cpus, cpu); - c_cpus++; - } - goto out; - } - if (info->has_cpu_type) { for (cpu = 0; cpu < info->combined; cpu++) { - if (info->cpu[cpu].address == boot_cpu_addr) { - smp_cpu_type = info->cpu[cpu].type; - break; - } + if (info->cpu[cpu].address != boot_cpu_address) + continue; + /* The boot cpu dictates the cpu type. */ + boot_cpu_type = info->cpu[cpu].type; + break; } } - + c_cpus = s_cpus = 0; for (cpu = 0; cpu < info->combined; cpu++) { - if (info->has_cpu_type && info->cpu[cpu].type != smp_cpu_type) - continue; - cpu_addr = info->cpu[cpu].address; - if (cpu_addr == boot_cpu_addr) + if (info->has_cpu_type && info->cpu[cpu].type != boot_cpu_type) continue; - if (!raw_cpu_stopped(cpu_addr)) { + if (cpu < info->configured) { + smp_get_save_area(c_cpus, info->cpu[cpu].address); + c_cpus++; + } else s_cpus++; - continue; - } - smp_get_save_area(c_cpus, cpu_addr); - c_cpus++; } -out: - kfree(info); pr_info("%d configured CPUs, %d standby CPUs\n", c_cpus, s_cpus); get_online_cpus(); - __smp_rescan_cpus(); + __smp_rescan_cpus(info, 0); put_online_cpus(); + kfree(info); } /* * Activate a secondary processor. */ -int __cpuinit start_secondary(void *cpuvoid) +static void __cpuinit smp_start_secondary(void *cpuvoid) { + S390_lowcore.last_update_clock = get_clock(); + S390_lowcore.restart_stack = (unsigned long) restart_stack; + S390_lowcore.restart_fn = (unsigned long) do_restart; + S390_lowcore.restart_data = 0; + S390_lowcore.restart_source = -1UL; + restore_access_regs(S390_lowcore.access_regs_save_area); + __ctl_load(S390_lowcore.cregs_save_area, 0, 15); + __load_psw_mask(psw_kernel_bits | PSW_MASK_DAT); cpu_init(); preempt_disable(); init_cpu_timer(); init_cpu_vtimer(); pfault_init(); - notify_cpu_starting(smp_processor_id()); ipi_call_lock(); set_cpu_online(smp_processor_id(), true); ipi_call_unlock(); - __ctl_clear_bit(0, 28); /* Disable lowcore protection */ - S390_lowcore.restart_psw.mask = - PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_EA | PSW_MASK_BA; - S390_lowcore.restart_psw.addr = - PSW_ADDR_AMODE | (unsigned long) psw_restart_int_handler; - __ctl_set_bit(0, 28); /* Enable lowcore protection */ - /* - * Wait until the cpu which brought this one up marked it - * active before enabling interrupts. - */ - while (!cpumask_test_cpu(smp_processor_id(), cpu_active_mask)) - cpu_relax(); local_irq_enable(); /* cpu_idle will call schedule for us */ cpu_idle(); - return 0; } struct create_idle { @@ -578,82 +737,20 @@ static void __cpuinit smp_fork_idle(struct work_struct *work) complete(&c_idle->done); } -static int __cpuinit smp_alloc_lowcore(int cpu) -{ - unsigned long async_stack, panic_stack; - struct _lowcore *lowcore; - - lowcore = (void *) __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER); - if (!lowcore) - return -ENOMEM; - async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER); - panic_stack = __get_free_page(GFP_KERNEL); - if (!panic_stack || !async_stack) - goto out; - memcpy(lowcore, &S390_lowcore, 512); - memset((char *)lowcore + 512, 0, sizeof(*lowcore) - 512); - lowcore->async_stack = async_stack + ASYNC_SIZE; - lowcore->panic_stack = panic_stack + PAGE_SIZE; - lowcore->restart_psw.mask = - PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_EA | PSW_MASK_BA; - lowcore->restart_psw.addr = - PSW_ADDR_AMODE | (unsigned long) restart_int_handler; - if (user_mode != HOME_SPACE_MODE) - lowcore->restart_psw.mask |= PSW_ASC_HOME; -#ifndef CONFIG_64BIT - if (MACHINE_HAS_IEEE) { - unsigned long save_area; - - save_area = get_zeroed_page(GFP_KERNEL); - if (!save_area) - goto out; - lowcore->extended_save_area_addr = (u32) save_area; - } -#else - if (vdso_alloc_per_cpu(cpu, lowcore)) - goto out; -#endif - lowcore_ptr[cpu] = lowcore; - return 0; - -out: - free_page(panic_stack); - free_pages(async_stack, ASYNC_ORDER); - free_pages((unsigned long) lowcore, LC_ORDER); - return -ENOMEM; -} - -static void smp_free_lowcore(int cpu) -{ - struct _lowcore *lowcore; - - lowcore = lowcore_ptr[cpu]; -#ifndef CONFIG_64BIT - if (MACHINE_HAS_IEEE) - free_page((unsigned long) lowcore->extended_save_area_addr); -#else - vdso_free_per_cpu(cpu, lowcore); -#endif - free_page(lowcore->panic_stack - PAGE_SIZE); - free_pages(lowcore->async_stack - ASYNC_SIZE, ASYNC_ORDER); - free_pages((unsigned long) lowcore, LC_ORDER); - lowcore_ptr[cpu] = NULL; -} - /* Upping and downing of CPUs */ int __cpuinit __cpu_up(unsigned int cpu) { - struct _lowcore *cpu_lowcore; struct create_idle c_idle; - struct task_struct *idle; - struct stack_frame *sf; - u32 lowcore; - int ccode; + struct pcpu *pcpu; + int rc; - if (smp_cpu_state[cpu] != CPU_STATE_CONFIGURED) + pcpu = pcpu_devices + cpu; + if (pcpu->state != CPU_STATE_CONFIGURED) + return -EIO; + if (pcpu_sigp_retry(pcpu, sigp_initial_cpu_reset, 0) != + sigp_order_code_accepted) return -EIO; - idle = current_set[cpu]; - if (!idle) { + if (!pcpu->idle) { c_idle.done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done); INIT_WORK_ONSTACK(&c_idle.work, smp_fork_idle); c_idle.cpu = cpu; @@ -661,68 +758,28 @@ int __cpuinit __cpu_up(unsigned int cpu) wait_for_completion(&c_idle.done); if (IS_ERR(c_idle.idle)) return PTR_ERR(c_idle.idle); - idle = c_idle.idle; - current_set[cpu] = c_idle.idle; + pcpu->idle = c_idle.idle; } - init_idle(idle, cpu); - if (smp_alloc_lowcore(cpu)) - return -ENOMEM; - do { - ccode = sigp(cpu, sigp_initial_cpu_reset); - if (ccode == sigp_busy) - udelay(10); - if (ccode == sigp_not_operational) - goto err_out; - } while (ccode == sigp_busy); - - lowcore = (u32)(unsigned long)lowcore_ptr[cpu]; - while (sigp_p(lowcore, cpu, sigp_set_prefix) == sigp_busy) - udelay(10); - - cpu_lowcore = lowcore_ptr[cpu]; - cpu_lowcore->kernel_stack = (unsigned long) - task_stack_page(idle) + THREAD_SIZE; - cpu_lowcore->thread_info = (unsigned long) task_thread_info(idle); - sf = (struct stack_frame *) (cpu_lowcore->kernel_stack - - sizeof(struct pt_regs) - - sizeof(struct stack_frame)); - memset(sf, 0, sizeof(struct stack_frame)); - sf->gprs[9] = (unsigned long) sf; - cpu_lowcore->gpregs_save_area[15] = (unsigned long) sf; - __ctl_store(cpu_lowcore->cregs_save_area, 0, 15); - atomic_inc(&init_mm.context.attach_count); - asm volatile( - " stam 0,15,0(%0)" - : : "a" (&cpu_lowcore->access_regs_save_area) : "memory"); - cpu_lowcore->percpu_offset = __per_cpu_offset[cpu]; - cpu_lowcore->current_task = (unsigned long) idle; - cpu_lowcore->cpu_nr = cpu; - cpu_lowcore->kernel_asce = S390_lowcore.kernel_asce; - cpu_lowcore->machine_flags = S390_lowcore.machine_flags; - cpu_lowcore->ftrace_func = S390_lowcore.ftrace_func; - memcpy(cpu_lowcore->stfle_fac_list, S390_lowcore.stfle_fac_list, - MAX_FACILITY_BIT/8); - eieio(); - - while (sigp(cpu, sigp_restart) == sigp_busy) - udelay(10); - + init_idle(pcpu->idle, cpu); + rc = pcpu_alloc_lowcore(pcpu, cpu); + if (rc) + return rc; + pcpu_prepare_secondary(pcpu, cpu); + pcpu_attach_task(pcpu, pcpu->idle); + pcpu_start_fn(pcpu, smp_start_secondary, NULL); while (!cpu_online(cpu)) cpu_relax(); return 0; - -err_out: - smp_free_lowcore(cpu); - return -EIO; } static int __init setup_possible_cpus(char *s) { - int pcpus, cpu; + int max, cpu; - pcpus = simple_strtoul(s, NULL, 0); + if (kstrtoint(s, 0, &max) < 0) + return 0; init_cpu_possible(cpumask_of(0)); - for (cpu = 1; cpu < pcpus && cpu < nr_cpu_ids; cpu++) + for (cpu = 1; cpu < max && cpu < nr_cpu_ids; cpu++) set_cpu_possible(cpu, true); return 0; } @@ -732,113 +789,79 @@ early_param("possible_cpus", setup_possible_cpus); int __cpu_disable(void) { - struct ec_creg_mask_parms cr_parms; - int cpu = smp_processor_id(); - - set_cpu_online(cpu, false); + unsigned long cregs[16]; - /* Disable pfault pseudo page faults on this cpu. */ + set_cpu_online(smp_processor_id(), false); + /* Disable pseudo page faults on this cpu. */ pfault_fini(); - - memset(&cr_parms.orvals, 0, sizeof(cr_parms.orvals)); - memset(&cr_parms.andvals, 0xff, sizeof(cr_parms.andvals)); - - /* disable all external interrupts */ - cr_parms.orvals[0] = 0; - cr_parms.andvals[0] = ~(1 << 15 | 1 << 14 | 1 << 13 | 1 << 11 | - 1 << 10 | 1 << 9 | 1 << 6 | 1 << 5 | - 1 << 4); - /* disable all I/O interrupts */ - cr_parms.orvals[6] = 0; - cr_parms.andvals[6] = ~(1 << 31 | 1 << 30 | 1 << 29 | 1 << 28 | - 1 << 27 | 1 << 26 | 1 << 25 | 1 << 24); - /* disable most machine checks */ - cr_parms.orvals[14] = 0; - cr_parms.andvals[14] = ~(1 << 28 | 1 << 27 | 1 << 26 | - 1 << 25 | 1 << 24); - - smp_ctl_bit_callback(&cr_parms); - + /* Disable interrupt sources via control register. */ + __ctl_store(cregs, 0, 15); + cregs[0] &= ~0x0000ee70UL; /* disable all external interrupts */ + cregs[6] &= ~0xff000000UL; /* disable all I/O interrupts */ + cregs[14] &= ~0x1f000000UL; /* disable most machine checks */ + __ctl_load(cregs, 0, 15); return 0; } void __cpu_die(unsigned int cpu) { + struct pcpu *pcpu; + /* Wait until target cpu is down */ - while (!cpu_stopped(cpu)) + pcpu = pcpu_devices + cpu; + while (!pcpu_stopped(pcpu)) cpu_relax(); - while (sigp_p(0, cpu, sigp_set_prefix) == sigp_busy) - udelay(10); - smp_free_lowcore(cpu); + pcpu_free_lowcore(pcpu); atomic_dec(&init_mm.context.attach_count); } void __noreturn cpu_die(void) { idle_task_exit(); - while (sigp(smp_processor_id(), sigp_stop) == sigp_busy) - cpu_relax(); - for (;;); + pcpu_sigp_retry(pcpu_devices + smp_processor_id(), sigp_stop, 0); + for (;;) ; } #endif /* CONFIG_HOTPLUG_CPU */ -void __init smp_prepare_cpus(unsigned int max_cpus) +static void smp_call_os_info_init_fn(void) { -#ifndef CONFIG_64BIT - unsigned long save_area = 0; -#endif - unsigned long async_stack, panic_stack; - struct _lowcore *lowcore; + int (*init_fn)(void); + unsigned long size; - smp_detect_cpus(); + init_fn = os_info_old_entry(OS_INFO_INIT_FN, &size); + if (!init_fn) + return; + init_fn(); +} +void __init smp_prepare_cpus(unsigned int max_cpus) +{ /* request the 0x1201 emergency signal external interrupt */ if (register_external_interrupt(0x1201, do_ext_call_interrupt) != 0) panic("Couldn't request external interrupt 0x1201"); /* request the 0x1202 external call external interrupt */ if (register_external_interrupt(0x1202, do_ext_call_interrupt) != 0) panic("Couldn't request external interrupt 0x1202"); - - /* Reallocate current lowcore, but keep its contents. */ - lowcore = (void *) __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER); - panic_stack = __get_free_page(GFP_KERNEL); - async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER); - BUG_ON(!lowcore || !panic_stack || !async_stack); -#ifndef CONFIG_64BIT - if (MACHINE_HAS_IEEE) - save_area = get_zeroed_page(GFP_KERNEL); -#endif - local_irq_disable(); - local_mcck_disable(); - lowcore_ptr[smp_processor_id()] = lowcore; - *lowcore = S390_lowcore; - lowcore->panic_stack = panic_stack + PAGE_SIZE; - lowcore->async_stack = async_stack + ASYNC_SIZE; -#ifndef CONFIG_64BIT - if (MACHINE_HAS_IEEE) - lowcore->extended_save_area_addr = (u32) save_area; -#endif - set_prefix((u32)(unsigned long) lowcore); - local_mcck_enable(); - local_irq_enable(); -#ifdef CONFIG_64BIT - if (vdso_alloc_per_cpu(smp_processor_id(), &S390_lowcore)) - BUG(); -#endif + smp_call_os_info_init_fn(); + smp_detect_cpus(); } void __init smp_prepare_boot_cpu(void) { - BUG_ON(smp_processor_id() != 0); - - current_thread_info()->cpu = 0; - set_cpu_present(0, true); - set_cpu_online(0, true); + struct pcpu *pcpu = pcpu_devices; + + boot_cpu_address = stap(); + pcpu->idle = current; + pcpu->state = CPU_STATE_CONFIGURED; + pcpu->address = boot_cpu_address; + pcpu->lowcore = (struct _lowcore *)(unsigned long) store_prefix(); + pcpu->async_stack = S390_lowcore.async_stack - ASYNC_SIZE; + pcpu->panic_stack = S390_lowcore.panic_stack - PAGE_SIZE; S390_lowcore.percpu_offset = __per_cpu_offset[0]; - current_set[0] = current; - smp_cpu_state[0] = CPU_STATE_CONFIGURED; cpu_set_polarization(0, POLARIZATION_UNKNOWN); + set_cpu_present(0, true); + set_cpu_online(0, true); } void __init smp_cpus_done(unsigned int max_cpus) @@ -848,7 +871,6 @@ void __init smp_cpus_done(unsigned int max_cpus) void __init smp_setup_processor_id(void) { S390_lowcore.cpu_nr = 0; - __cpu_logical_map[0] = stap(); } /* @@ -864,56 +886,57 @@ int setup_profiling_timer(unsigned int multiplier) #ifdef CONFIG_HOTPLUG_CPU static ssize_t cpu_configure_show(struct device *dev, - struct device_attribute *attr, char *buf) + struct device_attribute *attr, char *buf) { ssize_t count; mutex_lock(&smp_cpu_state_mutex); - count = sprintf(buf, "%d\n", smp_cpu_state[dev->id]); + count = sprintf(buf, "%d\n", pcpu_devices[dev->id].state); mutex_unlock(&smp_cpu_state_mutex); return count; } static ssize_t cpu_configure_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) + struct device_attribute *attr, + const char *buf, size_t count) { - int cpu = dev->id; - int val, rc; + struct pcpu *pcpu; + int cpu, val, rc; char delim; if (sscanf(buf, "%d %c", &val, &delim) != 1) return -EINVAL; if (val != 0 && val != 1) return -EINVAL; - get_online_cpus(); mutex_lock(&smp_cpu_state_mutex); rc = -EBUSY; /* disallow configuration changes of online cpus and cpu 0 */ + cpu = dev->id; if (cpu_online(cpu) || cpu == 0) goto out; + pcpu = pcpu_devices + cpu; rc = 0; switch (val) { case 0: - if (smp_cpu_state[cpu] == CPU_STATE_CONFIGURED) { - rc = sclp_cpu_deconfigure(__cpu_logical_map[cpu]); - if (!rc) { - smp_cpu_state[cpu] = CPU_STATE_STANDBY; - cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); - topology_expect_change(); - } - } + if (pcpu->state != CPU_STATE_CONFIGURED) + break; + rc = sclp_cpu_deconfigure(pcpu->address); + if (rc) + break; + pcpu->state = CPU_STATE_STANDBY; + cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); + topology_expect_change(); break; case 1: - if (smp_cpu_state[cpu] == CPU_STATE_STANDBY) { - rc = sclp_cpu_configure(__cpu_logical_map[cpu]); - if (!rc) { - smp_cpu_state[cpu] = CPU_STATE_CONFIGURED; - cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); - topology_expect_change(); - } - } + if (pcpu->state != CPU_STATE_STANDBY) + break; + rc = sclp_cpu_configure(pcpu->address); + if (rc) + break; + pcpu->state = CPU_STATE_CONFIGURED; + cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); + topology_expect_change(); break; default: break; @@ -929,7 +952,7 @@ static DEVICE_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store); static ssize_t show_cpu_address(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", __cpu_logical_map[dev->id]); + return sprintf(buf, "%d\n", pcpu_devices[dev->id].address); } static DEVICE_ATTR(address, 0444, show_cpu_address, NULL); @@ -961,22 +984,16 @@ static DEVICE_ATTR(capability, 0444, show_capability, NULL); static ssize_t show_idle_count(struct device *dev, struct device_attribute *attr, char *buf) { - struct s390_idle_data *idle; + struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id); unsigned long long idle_count; unsigned int sequence; - idle = &per_cpu(s390_idle, dev->id); -repeat: - sequence = idle->sequence; - smp_rmb(); - if (sequence & 1) - goto repeat; - idle_count = idle->idle_count; - if (idle->idle_enter) - idle_count++; - smp_rmb(); - if (idle->sequence != sequence) - goto repeat; + do { + sequence = ACCESS_ONCE(idle->sequence); + idle_count = ACCESS_ONCE(idle->idle_count); + if (ACCESS_ONCE(idle->idle_enter)) + idle_count++; + } while ((sequence & 1) || (idle->sequence != sequence)); return sprintf(buf, "%llu\n", idle_count); } static DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL); @@ -984,24 +1001,18 @@ static DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL); static ssize_t show_idle_time(struct device *dev, struct device_attribute *attr, char *buf) { - struct s390_idle_data *idle; - unsigned long long now, idle_time, idle_enter; + struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id); + unsigned long long now, idle_time, idle_enter, idle_exit; unsigned int sequence; - idle = &per_cpu(s390_idle, dev->id); - now = get_clock(); -repeat: - sequence = idle->sequence; - smp_rmb(); - if (sequence & 1) - goto repeat; - idle_time = idle->idle_time; - idle_enter = idle->idle_enter; - if (idle_enter != 0ULL && idle_enter < now) - idle_time += now - idle_enter; - smp_rmb(); - if (idle->sequence != sequence) - goto repeat; + do { + now = get_clock(); + sequence = ACCESS_ONCE(idle->sequence); + idle_time = ACCESS_ONCE(idle->idle_time); + idle_enter = ACCESS_ONCE(idle->idle_enter); + idle_exit = ACCESS_ONCE(idle->idle_exit); + } while ((sequence & 1) || (idle->sequence != sequence)); + idle_time += idle_enter ? ((idle_exit ? : now) - idle_enter) : 0; return sprintf(buf, "%llu\n", idle_time >> 12); } static DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL); @@ -1021,7 +1032,7 @@ static int __cpuinit smp_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned int)(long)hcpu; - struct cpu *c = &per_cpu(cpu_devices, cpu); + struct cpu *c = &pcpu_devices[cpu].cpu; struct device *s = &c->dev; struct s390_idle_data *idle; int err = 0; @@ -1047,7 +1058,7 @@ static struct notifier_block __cpuinitdata smp_cpu_nb = { static int __devinit smp_add_present_cpu(int cpu) { - struct cpu *c = &per_cpu(cpu_devices, cpu); + struct cpu *c = &pcpu_devices[cpu].cpu; struct device *s = &c->dev; int rc; @@ -1085,29 +1096,21 @@ out: int __ref smp_rescan_cpus(void) { - cpumask_t newcpus; - int cpu; - int rc; + struct sclp_cpu_info *info; + int nr; + info = smp_get_cpu_info(); + if (!info) + return -ENOMEM; get_online_cpus(); mutex_lock(&smp_cpu_state_mutex); - cpumask_copy(&newcpus, cpu_present_mask); - rc = __smp_rescan_cpus(); - if (rc) - goto out; - cpumask_andnot(&newcpus, cpu_present_mask, &newcpus); - for_each_cpu(cpu, &newcpus) { - rc = smp_add_present_cpu(cpu); - if (rc) - set_cpu_present(cpu, false); - } - rc = 0; -out: + nr = __smp_rescan_cpus(info, 1); mutex_unlock(&smp_cpu_state_mutex); put_online_cpus(); - if (!cpumask_empty(&newcpus)) + kfree(info); + if (nr) topology_schedule_update(); - return rc; + return 0; } static ssize_t __ref rescan_store(struct device *dev, diff --git a/arch/s390/kernel/suspend.c b/arch/s390/kernel/suspend.c index 47df775c844d..aa1494d0e380 100644 --- a/arch/s390/kernel/suspend.c +++ b/arch/s390/kernel/suspend.c @@ -9,7 +9,7 @@ #include <linux/pfn.h> #include <linux/suspend.h> #include <linux/mm.h> -#include <asm/system.h> +#include <asm/ctl_reg.h> /* * References to section boundaries diff --git a/arch/s390/kernel/switch_cpu.S b/arch/s390/kernel/switch_cpu.S deleted file mode 100644 index bfe070bc7659..000000000000 --- a/arch/s390/kernel/switch_cpu.S +++ /dev/null @@ -1,58 +0,0 @@ -/* - * 31-bit switch cpu code - * - * Copyright IBM Corp. 2009 - * - */ - -#include <linux/linkage.h> -#include <asm/asm-offsets.h> -#include <asm/ptrace.h> - -# smp_switch_to_cpu switches to destination cpu and executes the passed function -# Parameter: %r2 - function to call -# %r3 - function parameter -# %r4 - stack poiner -# %r5 - current cpu -# %r6 - destination cpu - - .section .text -ENTRY(smp_switch_to_cpu) - stm %r6,%r15,__SF_GPRS(%r15) - lr %r1,%r15 - ahi %r15,-STACK_FRAME_OVERHEAD - st %r1,__SF_BACKCHAIN(%r15) - basr %r13,0 -0: la %r1,.gprregs_addr-0b(%r13) - l %r1,0(%r1) - stm %r0,%r15,0(%r1) -1: sigp %r0,%r6,__SIGP_RESTART /* start destination CPU */ - brc 2,1b /* busy, try again */ -2: sigp %r0,%r5,__SIGP_STOP /* stop current CPU */ - brc 2,2b /* busy, try again */ -3: j 3b - -ENTRY(smp_restart_cpu) - basr %r13,0 -0: la %r1,.gprregs_addr-0b(%r13) - l %r1,0(%r1) - lm %r0,%r15,0(%r1) -1: sigp %r0,%r5,__SIGP_SENSE /* Wait for calling CPU */ - brc 10,1b /* busy, accepted (status 0), running */ - tmll %r0,0x40 /* Test if calling CPU is stopped */ - jz 1b - ltr %r4,%r4 /* New stack ? */ - jz 1f - lr %r15,%r4 -1: lr %r14,%r2 /* r14: Function to call */ - lr %r2,%r3 /* r2 : Parameter for function*/ - basr %r14,%r14 /* Call function */ - -.gprregs_addr: - .long .gprregs - - .section .data,"aw",@progbits -.gprregs: - .rept 16 - .long 0 - .endr diff --git a/arch/s390/kernel/switch_cpu64.S b/arch/s390/kernel/switch_cpu64.S deleted file mode 100644 index fcc42d799e41..000000000000 --- a/arch/s390/kernel/switch_cpu64.S +++ /dev/null @@ -1,51 +0,0 @@ -/* - * 64-bit switch cpu code - * - * Copyright IBM Corp. 2009 - * - */ - -#include <linux/linkage.h> -#include <asm/asm-offsets.h> -#include <asm/ptrace.h> - -# smp_switch_to_cpu switches to destination cpu and executes the passed function -# Parameter: %r2 - function to call -# %r3 - function parameter -# %r4 - stack poiner -# %r5 - current cpu -# %r6 - destination cpu - - .section .text -ENTRY(smp_switch_to_cpu) - stmg %r6,%r15,__SF_GPRS(%r15) - lgr %r1,%r15 - aghi %r15,-STACK_FRAME_OVERHEAD - stg %r1,__SF_BACKCHAIN(%r15) - larl %r1,.gprregs - stmg %r0,%r15,0(%r1) -1: sigp %r0,%r6,__SIGP_RESTART /* start destination CPU */ - brc 2,1b /* busy, try again */ -2: sigp %r0,%r5,__SIGP_STOP /* stop current CPU */ - brc 2,2b /* busy, try again */ -3: j 3b - -ENTRY(smp_restart_cpu) - larl %r1,.gprregs - lmg %r0,%r15,0(%r1) -1: sigp %r0,%r5,__SIGP_SENSE /* Wait for calling CPU */ - brc 10,1b /* busy, accepted (status 0), running */ - tmll %r0,0x40 /* Test if calling CPU is stopped */ - jz 1b - ltgr %r4,%r4 /* New stack ? */ - jz 1f - lgr %r15,%r4 -1: lgr %r14,%r2 /* r14: Function to call */ - lgr %r2,%r3 /* r2 : Parameter for function*/ - basr %r14,%r14 /* Call function */ - - .section .data,"aw",@progbits -.gprregs: - .rept 16 - .quad 0 - .endr diff --git a/arch/s390/kernel/swsusp_asm64.S b/arch/s390/kernel/swsusp_asm64.S index acb78cdee896..dd70ef046058 100644 --- a/arch/s390/kernel/swsusp_asm64.S +++ b/arch/s390/kernel/swsusp_asm64.S @@ -42,7 +42,7 @@ ENTRY(swsusp_arch_suspend) lghi %r1,0x1000 /* Save CPU address */ - stap __LC_CPU_ADDRESS(%r0) + stap __LC_EXT_CPU_ADDR(%r0) /* Store registers */ mvc 0x318(4,%r1),__SF_EMPTY(%r15) /* move prefix to lowcore */ @@ -173,15 +173,15 @@ pgm_check_entry: larl %r1,.Lresume_cpu /* Resume CPU address: r2 */ stap 0(%r1) llgh %r2,0(%r1) - llgh %r1,__LC_CPU_ADDRESS(%r0) /* Suspend CPU address: r1 */ + llgh %r1,__LC_EXT_CPU_ADDR(%r0) /* Suspend CPU address: r1 */ cgr %r1,%r2 je restore_registers /* r1 = r2 -> nothing to do */ larl %r4,.Lrestart_suspend_psw /* Set new restart PSW */ mvc __LC_RST_NEW_PSW(16,%r0),0(%r4) 3: - sigp %r9,%r1,__SIGP_INITIAL_CPU_RESET - brc 8,4f /* accepted */ - brc 2,3b /* busy, try again */ + sigp %r9,%r1,11 /* sigp initial cpu reset */ + brc 8,4f /* accepted */ + brc 2,3b /* busy, try again */ /* Suspend CPU not available -> panic */ larl %r15,init_thread_union @@ -196,10 +196,10 @@ pgm_check_entry: lpsw 0(%r3) 4: /* Switch to suspend CPU */ - sigp %r9,%r1,__SIGP_RESTART /* start suspend CPU */ + sigp %r9,%r1,6 /* sigp restart to suspend CPU */ brc 2,4b /* busy, try again */ 5: - sigp %r9,%r2,__SIGP_STOP /* stop resume (current) CPU */ + sigp %r9,%r2,5 /* sigp stop to current resume CPU */ brc 2,5b /* busy, try again */ 6: j 6b @@ -207,7 +207,7 @@ restart_suspend: larl %r1,.Lresume_cpu llgh %r2,0(%r1) 7: - sigp %r9,%r2,__SIGP_SENSE /* Wait for resume CPU */ + sigp %r9,%r2,1 /* sigp sense, wait for resume CPU */ brc 8,7b /* accepted, status 0, still running */ brc 2,7b /* busy, try again */ tmll %r9,0x40 /* Test if resume CPU is stopped */ @@ -257,6 +257,9 @@ restore_registers: lghi %r2,0 brasl %r14,arch_set_page_states + /* Log potential guest relocation */ + brasl %r14,lgr_info_log + /* Reinitialize the channel subsystem */ brasl %r14,channel_subsystem_reinit diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 14da278febbf..d4e1cb1dbcd1 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -165,7 +165,7 @@ void init_cpu_timer(void) __ctl_set_bit(0, 4); } -static void clock_comparator_interrupt(unsigned int ext_int_code, +static void clock_comparator_interrupt(struct ext_code ext_code, unsigned int param32, unsigned long param64) { @@ -177,7 +177,7 @@ static void clock_comparator_interrupt(unsigned int ext_int_code, static void etr_timing_alert(struct etr_irq_parm *); static void stp_timing_alert(struct stp_irq_parm *); -static void timing_alert_interrupt(unsigned int ext_int_code, +static void timing_alert_interrupt(struct ext_code ext_code, unsigned int param32, unsigned long param64) { kstat_cpu(smp_processor_id()).irqs[EXTINT_TLA]++; diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 7370a41948ca..4f8dc942257c 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -79,12 +79,12 @@ static struct mask_info *add_cpus_to_mask(struct topology_cpu *tl_cpu, cpu < TOPOLOGY_CPU_BITS; cpu = find_next_bit(&tl_cpu->mask[0], TOPOLOGY_CPU_BITS, cpu + 1)) { - unsigned int rcpu, lcpu; + unsigned int rcpu; + int lcpu; rcpu = TOPOLOGY_CPU_BITS - 1 - cpu + tl_cpu->origin; - for_each_present_cpu(lcpu) { - if (cpu_logical_map(lcpu) != rcpu) - continue; + lcpu = smp_find_processor_id(rcpu); + if (lcpu >= 0) { cpumask_set_cpu(lcpu, &book->mask); cpu_book_id[lcpu] = book->id; cpumask_set_cpu(lcpu, &core->mask); diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 5ce3750b181f..77cdf4234ebc 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -33,7 +33,6 @@ #include <linux/kprobes.h> #include <linux/bug.h> #include <linux/utsname.h> -#include <asm/system.h> #include <asm/uaccess.h> #include <asm/io.h> #include <linux/atomic.h> @@ -41,6 +40,7 @@ #include <asm/cpcmd.h> #include <asm/lowcore.h> #include <asm/debug.h> +#include <asm/ipl.h> #include "entry.h" void (*pgm_check_table[128])(struct pt_regs *regs); @@ -144,8 +144,8 @@ void show_stack(struct task_struct *task, unsigned long *sp) for (i = 0; i < kstack_depth_to_print; i++) { if (((addr_t) stack & (THREAD_SIZE-1)) == 0) break; - if (i && ((i * sizeof (long) % 32) == 0)) - printk("\n "); + if ((i * sizeof(long) % 32) == 0) + printk("%s ", i == 0 ? "" : "\n"); printk(LONG, *stack++); } printk("\n"); @@ -239,6 +239,7 @@ void die(struct pt_regs *regs, const char *str) static int die_counter; oops_enter(); + lgr_info_log(); debug_stop_all(); console_verbose(); spin_lock_irq(&die_lock); diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index d73630b4fe1d..ea5590fdca3b 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -25,12 +25,12 @@ #include <linux/compat.h> #include <asm/asm-offsets.h> #include <asm/pgtable.h> -#include <asm/system.h> #include <asm/processor.h> #include <asm/mmu.h> #include <asm/mmu_context.h> #include <asm/sections.h> #include <asm/vdso.h> +#include <asm/facility.h> #if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT) extern char vdso32_start, vdso32_end; @@ -89,18 +89,11 @@ static void vdso_init_data(struct vdso_data *vd) #ifdef CONFIG_64BIT /* - * Setup per cpu vdso data page. - */ -static void vdso_init_per_cpu_data(int cpu, struct vdso_per_cpu_data *vpcd) -{ -} - -/* * Allocate/free per cpu vdso data. */ #define SEGMENT_ORDER 2 -int vdso_alloc_per_cpu(int cpu, struct _lowcore *lowcore) +int vdso_alloc_per_cpu(struct _lowcore *lowcore) { unsigned long segment_table, page_table, page_frame; u32 *psal, *aste; @@ -139,7 +132,6 @@ int vdso_alloc_per_cpu(int cpu, struct _lowcore *lowcore) aste[4] = (u32)(addr_t) psal; lowcore->vdso_per_cpu_data = page_frame; - vdso_init_per_cpu_data(cpu, (struct vdso_per_cpu_data *) page_frame); return 0; out: @@ -149,7 +141,7 @@ out: return -ENOMEM; } -void vdso_free_per_cpu(int cpu, struct _lowcore *lowcore) +void vdso_free_per_cpu(struct _lowcore *lowcore) { unsigned long segment_table, page_table, page_frame; u32 *psal, *aste; @@ -168,19 +160,15 @@ void vdso_free_per_cpu(int cpu, struct _lowcore *lowcore) free_pages(segment_table, SEGMENT_ORDER); } -static void __vdso_init_cr5(void *dummy) +static void vdso_init_cr5(void) { unsigned long cr5; + if (user_mode == HOME_SPACE_MODE || !vdso_enabled) + return; cr5 = offsetof(struct _lowcore, paste); __ctl_load(cr5, 5, 5); } - -static void vdso_init_cr5(void) -{ - if (user_mode != HOME_SPACE_MODE && vdso_enabled) - on_each_cpu(__vdso_init_cr5, NULL, 1); -} #endif /* CONFIG_64BIT */ /* @@ -253,17 +241,11 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) * on the "data" page of the vDSO or you'll stop getting kernel * updates and your nice userland gettimeofday will be totally dead. * It's fine to use that for setting breakpoints in the vDSO code - * pages though - * - * Make sure the vDSO gets into every core dump. - * Dumping its contents makes post-mortem fully interpretable later - * without matching up the same kernel and hardware config to see - * what PC values meant. + * pages though. */ rc = install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT, VM_READ|VM_EXEC| - VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| - VM_ALWAYSDUMP, + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, vdso_pagelist); if (rc) current->mm->context.vdso_base = 0; @@ -322,10 +304,8 @@ static int __init vdso_init(void) } vdso64_pagelist[vdso64_pages - 1] = virt_to_page(vdso_data); vdso64_pagelist[vdso64_pages] = NULL; -#ifndef CONFIG_SMP - if (vdso_alloc_per_cpu(0, &S390_lowcore)) + if (vdso_alloc_per_cpu(&S390_lowcore)) BUG(); -#endif vdso_init_cr5(); #endif /* CONFIG_64BIT */ @@ -335,7 +315,7 @@ static int __init vdso_init(void) return 0; } -arch_initcall(vdso_init); +early_initcall(vdso_init); int in_gate_area_no_mm(unsigned long addr) { diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index bb48977f5469..39ebff506946 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -26,6 +26,7 @@ #include <asm/irq_regs.h> #include <asm/cputime.h> #include <asm/irq.h> +#include "entry.h" static DEFINE_PER_CPU(struct vtimer_queue, virt_cpu_timer); @@ -123,153 +124,53 @@ void account_system_vtime(struct task_struct *tsk) } EXPORT_SYMBOL_GPL(account_system_vtime); -void __kprobes vtime_start_cpu(__u64 int_clock, __u64 enter_timer) +void __kprobes vtime_stop_cpu(void) { struct s390_idle_data *idle = &__get_cpu_var(s390_idle); struct vtimer_queue *vq = &__get_cpu_var(virt_cpu_timer); - __u64 idle_time, expires; + unsigned long long idle_time; + unsigned long psw_mask; - if (idle->idle_enter == 0ULL) - return; + trace_hardirqs_on(); + /* Don't trace preempt off for idle. */ + stop_critical_timings(); - /* Account time spent with enabled wait psw loaded as idle time. */ - idle_time = int_clock - idle->idle_enter; - account_idle_time(idle_time); - S390_lowcore.steal_timer += - idle->idle_enter - S390_lowcore.last_update_clock; - S390_lowcore.last_update_clock = int_clock; - - /* Account system time spent going idle. */ - S390_lowcore.system_timer += S390_lowcore.last_update_timer - vq->idle; - S390_lowcore.last_update_timer = enter_timer; - - /* Restart vtime CPU timer */ - if (vq->do_spt) { - /* Program old expire value but first save progress. */ - expires = vq->idle - enter_timer; - expires += get_vtimer(); - set_vtimer(expires); - } else { - /* Don't account the CPU timer delta while the cpu was idle. */ - vq->elapsed -= vq->idle - enter_timer; - } + /* Wait for external, I/O or machine check interrupt. */ + psw_mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_DAT | + PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; + idle->nohz_delay = 0; + /* Call the assembler magic in entry.S */ + psw_idle(idle, vq, psw_mask, !list_empty(&vq->list)); + + /* Reenable preemption tracer. */ + start_critical_timings(); + + /* Account time spent with enabled wait psw loaded as idle time. */ idle->sequence++; smp_wmb(); + idle_time = idle->idle_exit - idle->idle_enter; idle->idle_time += idle_time; - idle->idle_enter = 0ULL; + idle->idle_enter = idle->idle_exit = 0ULL; idle->idle_count++; + account_idle_time(idle_time); smp_wmb(); idle->sequence++; } -void __kprobes vtime_stop_cpu(void) -{ - struct s390_idle_data *idle = &__get_cpu_var(s390_idle); - struct vtimer_queue *vq = &__get_cpu_var(virt_cpu_timer); - psw_t psw; - - /* Wait for external, I/O or machine check interrupt. */ - psw.mask = psw_kernel_bits | PSW_MASK_WAIT | - PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; - - idle->nohz_delay = 0; - - /* Check if the CPU timer needs to be reprogrammed. */ - if (vq->do_spt) { - __u64 vmax = VTIMER_MAX_SLICE; - /* - * The inline assembly is equivalent to - * vq->idle = get_cpu_timer(); - * set_cpu_timer(VTIMER_MAX_SLICE); - * idle->idle_enter = get_clock(); - * __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT | - * PSW_MASK_DAT | PSW_MASK_IO | - * PSW_MASK_EXT | PSW_MASK_MCHECK); - * The difference is that the inline assembly makes sure that - * the last three instruction are stpt, stck and lpsw in that - * order. This is done to increase the precision. - */ - asm volatile( -#ifndef CONFIG_64BIT - " basr 1,0\n" - "0: ahi 1,1f-0b\n" - " st 1,4(%2)\n" -#else /* CONFIG_64BIT */ - " larl 1,1f\n" - " stg 1,8(%2)\n" -#endif /* CONFIG_64BIT */ - " stpt 0(%4)\n" - " spt 0(%5)\n" - " stck 0(%3)\n" -#ifndef CONFIG_64BIT - " lpsw 0(%2)\n" -#else /* CONFIG_64BIT */ - " lpswe 0(%2)\n" -#endif /* CONFIG_64BIT */ - "1:" - : "=m" (idle->idle_enter), "=m" (vq->idle) - : "a" (&psw), "a" (&idle->idle_enter), - "a" (&vq->idle), "a" (&vmax), "m" (vmax), "m" (psw) - : "memory", "cc", "1"); - } else { - /* - * The inline assembly is equivalent to - * vq->idle = get_cpu_timer(); - * idle->idle_enter = get_clock(); - * __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT | - * PSW_MASK_DAT | PSW_MASK_IO | - * PSW_MASK_EXT | PSW_MASK_MCHECK); - * The difference is that the inline assembly makes sure that - * the last three instruction are stpt, stck and lpsw in that - * order. This is done to increase the precision. - */ - asm volatile( -#ifndef CONFIG_64BIT - " basr 1,0\n" - "0: ahi 1,1f-0b\n" - " st 1,4(%2)\n" -#else /* CONFIG_64BIT */ - " larl 1,1f\n" - " stg 1,8(%2)\n" -#endif /* CONFIG_64BIT */ - " stpt 0(%4)\n" - " stck 0(%3)\n" -#ifndef CONFIG_64BIT - " lpsw 0(%2)\n" -#else /* CONFIG_64BIT */ - " lpswe 0(%2)\n" -#endif /* CONFIG_64BIT */ - "1:" - : "=m" (idle->idle_enter), "=m" (vq->idle) - : "a" (&psw), "a" (&idle->idle_enter), - "a" (&vq->idle), "m" (psw) - : "memory", "cc", "1"); - } -} - cputime64_t s390_get_idle_time(int cpu) { - struct s390_idle_data *idle; - unsigned long long now, idle_time, idle_enter; + struct s390_idle_data *idle = &per_cpu(s390_idle, cpu); + unsigned long long now, idle_enter, idle_exit; unsigned int sequence; - idle = &per_cpu(s390_idle, cpu); - - now = get_clock(); -repeat: - sequence = idle->sequence; - smp_rmb(); - if (sequence & 1) - goto repeat; - idle_time = 0; - idle_enter = idle->idle_enter; - if (idle_enter != 0ULL && idle_enter < now) - idle_time = now - idle_enter; - smp_rmb(); - if (idle->sequence != sequence) - goto repeat; - return idle_time; + do { + now = get_clock(); + sequence = ACCESS_ONCE(idle->sequence); + idle_enter = ACCESS_ONCE(idle->idle_enter); + idle_exit = ACCESS_ONCE(idle->idle_exit); + } while ((sequence & 1) || (idle->sequence != sequence)); + return idle_enter ? ((idle_exit ? : now) - idle_enter) : 0; } /* @@ -319,7 +220,7 @@ static void do_callbacks(struct list_head *cb_list) /* * Handler for the virtual CPU timer. */ -static void do_cpu_timer_interrupt(unsigned int ext_int_code, +static void do_cpu_timer_interrupt(struct ext_code ext_code, unsigned int param32, unsigned long param64) { struct vtimer_queue *vq; @@ -346,7 +247,6 @@ static void do_cpu_timer_interrupt(unsigned int ext_int_code, } spin_unlock(&vq->lock); - vq->do_spt = list_empty(&cb_list); do_callbacks(&cb_list); /* next event is first in list */ @@ -355,8 +255,7 @@ static void do_cpu_timer_interrupt(unsigned int ext_int_code, if (!list_empty(&vq->list)) { event = list_first_entry(&vq->list, struct vtimer_list, entry); next = event->expires; - } else - vq->do_spt = 0; + } spin_unlock(&vq->lock); /* * To improve precision add the time spent by the @@ -570,6 +469,9 @@ void init_cpu_vtimer(void) /* enable cpu timer interrupts */ __ctl_set_bit(0,10); + + /* set initial cpu timer */ + set_vtimer(0x7fffffffffffffffULL); } static int __cpuinit s390_nohz_notify(struct notifier_block *self, diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig index a21634173a66..78eb9847008f 100644 --- a/arch/s390/kvm/Kconfig +++ b/arch/s390/kvm/Kconfig @@ -34,6 +34,15 @@ config KVM If unsure, say N. +config KVM_S390_UCONTROL + bool "Userspace controlled virtual machines" + depends on KVM + ---help--- + Allow CAP_SYS_ADMIN users to create KVM virtual machines that are + controlled by userspace. + + If unsure, say N. + # OK, it's a little counter-intuitive to do this, but it puts it neatly under # the virtualization menu. source drivers/vhost/Kconfig diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 8943e82cd4d9..a353f0ea45c2 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -20,8 +20,8 @@ static int diag_release_pages(struct kvm_vcpu *vcpu) unsigned long start, end; unsigned long prefix = vcpu->arch.sie_block->prefix; - start = vcpu->arch.guest_gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4]; - end = vcpu->arch.guest_gprs[vcpu->arch.sie_block->ipa & 0xf] + 4096; + start = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4]; + end = vcpu->run->s.regs.gprs[vcpu->arch.sie_block->ipa & 0xf] + 4096; if (start & ~PAGE_MASK || end & ~PAGE_MASK || start > end || start < 2 * PAGE_SIZE) @@ -56,7 +56,7 @@ static int __diag_time_slice_end(struct kvm_vcpu *vcpu) static int __diag_ipl_functions(struct kvm_vcpu *vcpu) { unsigned int reg = vcpu->arch.sie_block->ipa & 0xf; - unsigned long subcode = vcpu->arch.guest_gprs[reg] & 0xffff; + unsigned long subcode = vcpu->run->s.regs.gprs[reg] & 0xffff; VCPU_EVENT(vcpu, 5, "diag ipl functions, subcode %lx", subcode); switch (subcode) { diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 02434543eabb..361456577c6f 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -36,7 +36,7 @@ static int handle_lctlg(struct kvm_vcpu *vcpu) useraddr = disp2; if (base2) - useraddr += vcpu->arch.guest_gprs[base2]; + useraddr += vcpu->run->s.regs.gprs[base2]; if (useraddr & 7) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); @@ -75,7 +75,7 @@ static int handle_lctl(struct kvm_vcpu *vcpu) useraddr = disp2; if (base2) - useraddr += vcpu->arch.guest_gprs[base2]; + useraddr += vcpu->run->s.regs.gprs[base2]; if (useraddr & 3) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); @@ -133,13 +133,6 @@ static int handle_stop(struct kvm_vcpu *vcpu) vcpu->stat.exit_stop_request++; spin_lock_bh(&vcpu->arch.local_int.lock); - if (vcpu->arch.local_int.action_bits & ACTION_STORE_ON_STOP) { - vcpu->arch.local_int.action_bits &= ~ACTION_STORE_ON_STOP; - rc = kvm_s390_vcpu_store_status(vcpu, - KVM_S390_STORE_STATUS_NOADDR); - if (rc >= 0) - rc = -EOPNOTSUPP; - } if (vcpu->arch.local_int.action_bits & ACTION_RELOADVCPU_ON_STOP) { vcpu->arch.local_int.action_bits &= ~ACTION_RELOADVCPU_ON_STOP; @@ -155,7 +148,18 @@ static int handle_stop(struct kvm_vcpu *vcpu) rc = -EOPNOTSUPP; } - spin_unlock_bh(&vcpu->arch.local_int.lock); + if (vcpu->arch.local_int.action_bits & ACTION_STORE_ON_STOP) { + vcpu->arch.local_int.action_bits &= ~ACTION_STORE_ON_STOP; + /* store status must be called unlocked. Since local_int.lock + * only protects local_int.* and not guest memory we can give + * up the lock here */ + spin_unlock_bh(&vcpu->arch.local_int.lock); + rc = kvm_s390_vcpu_store_status(vcpu, + KVM_S390_STORE_STATUS_NOADDR); + if (rc >= 0) + rc = -EOPNOTSUPP; + } else + spin_unlock_bh(&vcpu->arch.local_int.lock); return rc; } diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 278ee009ce65..2d9f9a72bb81 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -134,7 +134,7 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, if (rc == -EFAULT) exception = 1; - rc = put_guest_u16(vcpu, __LC_CPU_ADDRESS, inti->emerg.code); + rc = put_guest_u16(vcpu, __LC_EXT_CPU_ADDR, inti->emerg.code); if (rc == -EFAULT) exception = 1; @@ -156,7 +156,7 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, if (rc == -EFAULT) exception = 1; - rc = put_guest_u16(vcpu, __LC_CPU_ADDRESS, inti->extcall.code); + rc = put_guest_u16(vcpu, __LC_EXT_CPU_ADDR, inti->extcall.code); if (rc == -EFAULT) exception = 1; @@ -202,7 +202,7 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, if (rc == -EFAULT) exception = 1; - rc = put_guest_u16(vcpu, __LC_CPU_ADDRESS, 0x0d00); + rc = put_guest_u16(vcpu, __LC_EXT_CPU_ADDR, 0x0d00); if (rc == -EFAULT) exception = 1; @@ -236,8 +236,7 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, VCPU_EVENT(vcpu, 4, "interrupt: set prefix to %x", inti->prefix.address); vcpu->stat.deliver_prefix_signal++; - vcpu->arch.sie_block->prefix = inti->prefix.address; - vcpu->arch.sie_block->ihcpu = 0xffff; + kvm_s390_set_prefix(vcpu, inti->prefix.address); break; case KVM_S390_RESTART: diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index d1c445732451..217ce44395a4 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -27,7 +27,7 @@ #include <asm/lowcore.h> #include <asm/pgtable.h> #include <asm/nmi.h> -#include <asm/system.h> +#include <asm/switch_to.h> #include "kvm-s390.h" #include "gaccess.h" @@ -129,6 +129,10 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_S390_PSW: case KVM_CAP_S390_GMAP: case KVM_CAP_SYNC_MMU: +#ifdef CONFIG_KVM_S390_UCONTROL + case KVM_CAP_S390_UCONTROL: +#endif + case KVM_CAP_SYNC_REGS: r = 1; break; default: @@ -171,11 +175,22 @@ long kvm_arch_vm_ioctl(struct file *filp, return r; } -int kvm_arch_init_vm(struct kvm *kvm) +int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { int rc; char debug_name[16]; + rc = -EINVAL; +#ifdef CONFIG_KVM_S390_UCONTROL + if (type & ~KVM_VM_S390_UCONTROL) + goto out_err; + if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN))) + goto out_err; +#else + if (type) + goto out_err; +#endif + rc = s390_enable_sie(); if (rc) goto out_err; @@ -198,10 +213,13 @@ int kvm_arch_init_vm(struct kvm *kvm) debug_register_view(kvm->arch.dbf, &debug_sprintf_view); VM_EVENT(kvm, 3, "%s", "vm created"); - kvm->arch.gmap = gmap_alloc(current->mm); - if (!kvm->arch.gmap) - goto out_nogmap; - + if (type & KVM_VM_S390_UCONTROL) { + kvm->arch.gmap = NULL; + } else { + kvm->arch.gmap = gmap_alloc(current->mm); + if (!kvm->arch.gmap) + goto out_nogmap; + } return 0; out_nogmap: debug_unregister(kvm->arch.dbf); @@ -214,11 +232,18 @@ out_err: void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) { VCPU_EVENT(vcpu, 3, "%s", "free cpu"); - clear_bit(63 - vcpu->vcpu_id, (unsigned long *) &vcpu->kvm->arch.sca->mcn); - if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda == - (__u64) vcpu->arch.sie_block) - vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0; + if (!kvm_is_ucontrol(vcpu->kvm)) { + clear_bit(63 - vcpu->vcpu_id, + (unsigned long *) &vcpu->kvm->arch.sca->mcn); + if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda == + (__u64) vcpu->arch.sie_block) + vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0; + } smp_mb(); + + if (kvm_is_ucontrol(vcpu->kvm)) + gmap_free(vcpu->arch.gmap); + free_page((unsigned long)(vcpu->arch.sie_block)); kvm_vcpu_uninit(vcpu); kfree(vcpu); @@ -249,13 +274,25 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvm_free_vcpus(kvm); free_page((unsigned long)(kvm->arch.sca)); debug_unregister(kvm->arch.dbf); - gmap_free(kvm->arch.gmap); + if (!kvm_is_ucontrol(kvm)) + gmap_free(kvm->arch.gmap); } /* Section: vcpu related */ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) { + if (kvm_is_ucontrol(vcpu->kvm)) { + vcpu->arch.gmap = gmap_alloc(current->mm); + if (!vcpu->arch.gmap) + return -ENOMEM; + return 0; + } + vcpu->arch.gmap = vcpu->kvm->arch.gmap; + vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX | + KVM_SYNC_GPRS | + KVM_SYNC_ACRS | + KVM_SYNC_CRS; return 0; } @@ -270,7 +307,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) save_access_regs(vcpu->arch.host_acrs); vcpu->arch.guest_fpregs.fpc &= FPC_VALID_MASK; restore_fp_regs(&vcpu->arch.guest_fpregs); - restore_access_regs(vcpu->arch.guest_acrs); + restore_access_regs(vcpu->run->s.regs.acrs); gmap_enable(vcpu->arch.gmap); atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); } @@ -280,7 +317,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); gmap_disable(vcpu->arch.gmap); save_fp_regs(&vcpu->arch.guest_fpregs); - save_access_regs(vcpu->arch.guest_acrs); + save_access_regs(vcpu->run->s.regs.acrs); restore_fp_regs(&vcpu->arch.host_fpregs); restore_access_regs(vcpu->arch.host_acrs); } @@ -290,8 +327,7 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) /* this equals initial cpu reset in pop, but we don't switch to ESA */ vcpu->arch.sie_block->gpsw.mask = 0UL; vcpu->arch.sie_block->gpsw.addr = 0UL; - vcpu->arch.sie_block->prefix = 0UL; - vcpu->arch.sie_block->ihcpu = 0xffff; + kvm_s390_set_prefix(vcpu, 0); vcpu->arch.sie_block->cputm = 0UL; vcpu->arch.sie_block->ckc = 0UL; vcpu->arch.sie_block->todpr = 0; @@ -342,12 +378,19 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, goto out_free_cpu; vcpu->arch.sie_block->icpua = id; - BUG_ON(!kvm->arch.sca); - if (!kvm->arch.sca->cpu[id].sda) - kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block; - vcpu->arch.sie_block->scaoh = (__u32)(((__u64)kvm->arch.sca) >> 32); - vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca; - set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn); + if (!kvm_is_ucontrol(kvm)) { + if (!kvm->arch.sca) { + WARN_ON_ONCE(1); + goto out_free_cpu; + } + if (!kvm->arch.sca->cpu[id].sda) + kvm->arch.sca->cpu[id].sda = + (__u64) vcpu->arch.sie_block; + vcpu->arch.sie_block->scaoh = + (__u32)(((__u64)kvm->arch.sca) >> 32); + vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca; + set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn); + } spin_lock_init(&vcpu->arch.local_int.lock); INIT_LIST_HEAD(&vcpu->arch.local_int.list); @@ -388,29 +431,29 @@ static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { - memcpy(&vcpu->arch.guest_gprs, ®s->gprs, sizeof(regs->gprs)); + memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs)); return 0; } int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { - memcpy(®s->gprs, &vcpu->arch.guest_gprs, sizeof(regs->gprs)); + memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs)); return 0; } int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { - memcpy(&vcpu->arch.guest_acrs, &sregs->acrs, sizeof(sregs->acrs)); + memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs)); memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs)); - restore_access_regs(vcpu->arch.guest_acrs); + restore_access_regs(vcpu->run->s.regs.acrs); return 0; } int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { - memcpy(&sregs->acrs, &vcpu->arch.guest_acrs, sizeof(sregs->acrs)); + memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs)); memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs)); return 0; } @@ -418,7 +461,7 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs)); - vcpu->arch.guest_fpregs.fpc = fpu->fpc; + vcpu->arch.guest_fpregs.fpc = fpu->fpc & FPC_VALID_MASK; restore_fp_regs(&vcpu->arch.guest_fpregs); return 0; } @@ -467,9 +510,11 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, return -EINVAL; /* not implemented yet */ } -static void __vcpu_run(struct kvm_vcpu *vcpu) +static int __vcpu_run(struct kvm_vcpu *vcpu) { - memcpy(&vcpu->arch.sie_block->gg14, &vcpu->arch.guest_gprs[14], 16); + int rc; + + memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16); if (need_resched()) schedule(); @@ -477,7 +522,8 @@ static void __vcpu_run(struct kvm_vcpu *vcpu) if (test_thread_flag(TIF_MCCK_PENDING)) s390_handle_mcck(); - kvm_s390_deliver_pending_interrupts(vcpu); + if (!kvm_is_ucontrol(vcpu->kvm)) + kvm_s390_deliver_pending_interrupts(vcpu); vcpu->arch.sie_block->icptcode = 0; local_irq_disable(); @@ -485,9 +531,15 @@ static void __vcpu_run(struct kvm_vcpu *vcpu) local_irq_enable(); VCPU_EVENT(vcpu, 6, "entering sie flags %x", atomic_read(&vcpu->arch.sie_block->cpuflags)); - if (sie64a(vcpu->arch.sie_block, vcpu->arch.guest_gprs)) { - VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + rc = sie64a(vcpu->arch.sie_block, vcpu->run->s.regs.gprs); + if (rc) { + if (kvm_is_ucontrol(vcpu->kvm)) { + rc = SIE_INTERCEPT_UCONTROL; + } else { + VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); + kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + rc = 0; + } } VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", vcpu->arch.sie_block->icptcode); @@ -495,7 +547,8 @@ static void __vcpu_run(struct kvm_vcpu *vcpu) kvm_guest_exit(); local_irq_enable(); - memcpy(&vcpu->arch.guest_gprs[14], &vcpu->arch.sie_block->gg14, 16); + memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16); + return rc; } int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) @@ -516,6 +569,7 @@ rerun_vcpu: case KVM_EXIT_UNKNOWN: case KVM_EXIT_INTR: case KVM_EXIT_S390_RESET: + case KVM_EXIT_S390_UCONTROL: break; default: BUG(); @@ -523,12 +577,26 @@ rerun_vcpu: vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask; vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr; + if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX) { + kvm_run->kvm_dirty_regs &= ~KVM_SYNC_PREFIX; + kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix); + } + if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) { + kvm_run->kvm_dirty_regs &= ~KVM_SYNC_CRS; + memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128); + kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix); + } might_fault(); do { - __vcpu_run(vcpu); - rc = kvm_handle_sie_intercept(vcpu); + rc = __vcpu_run(vcpu); + if (rc) + break; + if (kvm_is_ucontrol(vcpu->kvm)) + rc = -EOPNOTSUPP; + else + rc = kvm_handle_sie_intercept(vcpu); } while (!signal_pending(current) && !rc); if (rc == SIE_INTERCEPT_RERUNVCPU) @@ -539,6 +607,16 @@ rerun_vcpu: rc = -EINTR; } +#ifdef CONFIG_KVM_S390_UCONTROL + if (rc == SIE_INTERCEPT_UCONTROL) { + kvm_run->exit_reason = KVM_EXIT_S390_UCONTROL; + kvm_run->s390_ucontrol.trans_exc_code = + current->thread.gmap_addr; + kvm_run->s390_ucontrol.pgm_code = 0x10; + rc = 0; + } +#endif + if (rc == -EOPNOTSUPP) { /* intercept cannot be handled in-kernel, prepare kvm-run */ kvm_run->exit_reason = KVM_EXIT_S390_SIEIC; @@ -556,6 +634,8 @@ rerun_vcpu: kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask; kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr; + kvm_run->s.regs.prefix = vcpu->arch.sie_block->prefix; + memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128); if (vcpu->sigset_active) sigprocmask(SIG_SETMASK, &sigsaved, NULL); @@ -602,7 +682,7 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) return -EFAULT; if (__guestcopy(vcpu, addr + offsetof(struct save_area, gp_regs), - vcpu->arch.guest_gprs, 128, prefix)) + vcpu->run->s.regs.gprs, 128, prefix)) return -EFAULT; if (__guestcopy(vcpu, addr + offsetof(struct save_area, psw), @@ -631,7 +711,7 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) return -EFAULT; if (__guestcopy(vcpu, addr + offsetof(struct save_area, acc_regs), - &vcpu->arch.guest_acrs, 64, prefix)) + &vcpu->run->s.regs.acrs, 64, prefix)) return -EFAULT; if (__guestcopy(vcpu, @@ -673,12 +753,77 @@ long kvm_arch_vcpu_ioctl(struct file *filp, case KVM_S390_INITIAL_RESET: r = kvm_arch_vcpu_ioctl_initial_reset(vcpu); break; +#ifdef CONFIG_KVM_S390_UCONTROL + case KVM_S390_UCAS_MAP: { + struct kvm_s390_ucas_mapping ucasmap; + + if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) { + r = -EFAULT; + break; + } + + if (!kvm_is_ucontrol(vcpu->kvm)) { + r = -EINVAL; + break; + } + + r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr, + ucasmap.vcpu_addr, ucasmap.length); + break; + } + case KVM_S390_UCAS_UNMAP: { + struct kvm_s390_ucas_mapping ucasmap; + + if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) { + r = -EFAULT; + break; + } + + if (!kvm_is_ucontrol(vcpu->kvm)) { + r = -EINVAL; + break; + } + + r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr, + ucasmap.length); + break; + } +#endif + case KVM_S390_VCPU_FAULT: { + r = gmap_fault(arg, vcpu->arch.gmap); + if (!IS_ERR_VALUE(r)) + r = 0; + break; + } default: - r = -EINVAL; + r = -ENOTTY; } return r; } +int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) +{ +#ifdef CONFIG_KVM_S390_UCONTROL + if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET) + && (kvm_is_ucontrol(vcpu->kvm))) { + vmf->page = virt_to_page(vcpu->arch.sie_block); + get_page(vmf->page); + return 0; + } +#endif + return VM_FAULT_SIGBUS; +} + +void kvm_arch_free_memslot(struct kvm_memory_slot *free, + struct kvm_memory_slot *dont) +{ +} + +int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) +{ + return 0; +} + /* Section: memory related */ int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 99b0b7597115..ff28f9d1c9eb 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -26,6 +26,7 @@ typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu); /* negativ values are error codes, positive values for internal conditions */ #define SIE_INTERCEPT_RERUNVCPU (1<<0) +#define SIE_INTERCEPT_UCONTROL (1<<1) int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu); #define VM_EVENT(d_kvm, d_loglevel, d_string, d_args...)\ @@ -47,6 +48,23 @@ static inline int __cpu_is_stopped(struct kvm_vcpu *vcpu) return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOP_INT; } +static inline int kvm_is_ucontrol(struct kvm *kvm) +{ +#ifdef CONFIG_KVM_S390_UCONTROL + if (kvm->arch.gmap) + return 0; + return 1; +#else + return 0; +#endif +} + +static inline void kvm_s390_set_prefix(struct kvm_vcpu *vcpu, u32 prefix) +{ + vcpu->arch.sie_block->prefix = prefix & 0x7fffe000u; + vcpu->arch.sie_block->ihcpu = 0xffff; +} + int kvm_s390_handle_wait(struct kvm_vcpu *vcpu); enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer); void kvm_s390_tasklet(unsigned long parm); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index d02638959922..e5a45dbd26ac 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -33,7 +33,7 @@ static int handle_set_prefix(struct kvm_vcpu *vcpu) operand2 = disp2; if (base2) - operand2 += vcpu->arch.guest_gprs[base2]; + operand2 += vcpu->run->s.regs.gprs[base2]; /* must be word boundary */ if (operand2 & 3) { @@ -56,8 +56,7 @@ static int handle_set_prefix(struct kvm_vcpu *vcpu) goto out; } - vcpu->arch.sie_block->prefix = address; - vcpu->arch.sie_block->ihcpu = 0xffff; + kvm_s390_set_prefix(vcpu, address); VCPU_EVENT(vcpu, 5, "setting prefix to %x", address); out: @@ -74,7 +73,7 @@ static int handle_store_prefix(struct kvm_vcpu *vcpu) vcpu->stat.instruction_stpx++; operand2 = disp2; if (base2) - operand2 += vcpu->arch.guest_gprs[base2]; + operand2 += vcpu->run->s.regs.gprs[base2]; /* must be word boundary */ if (operand2 & 3) { @@ -106,7 +105,7 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu) vcpu->stat.instruction_stap++; useraddr = disp2; if (base2) - useraddr += vcpu->arch.guest_gprs[base2]; + useraddr += vcpu->run->s.regs.gprs[base2]; if (useraddr & 1) { kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); @@ -181,7 +180,7 @@ static int handle_stidp(struct kvm_vcpu *vcpu) vcpu->stat.instruction_stidp++; operand2 = disp2; if (base2) - operand2 += vcpu->arch.guest_gprs[base2]; + operand2 += vcpu->run->s.regs.gprs[base2]; if (operand2 & 7) { kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); @@ -232,9 +231,9 @@ static void handle_stsi_3_2_2(struct kvm_vcpu *vcpu, struct sysinfo_3_2_2 *mem) static int handle_stsi(struct kvm_vcpu *vcpu) { - int fc = (vcpu->arch.guest_gprs[0] & 0xf0000000) >> 28; - int sel1 = vcpu->arch.guest_gprs[0] & 0xff; - int sel2 = vcpu->arch.guest_gprs[1] & 0xffff; + int fc = (vcpu->run->s.regs.gprs[0] & 0xf0000000) >> 28; + int sel1 = vcpu->run->s.regs.gprs[0] & 0xff; + int sel2 = vcpu->run->s.regs.gprs[1] & 0xffff; int base2 = vcpu->arch.sie_block->ipb >> 28; int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); u64 operand2; @@ -245,14 +244,14 @@ static int handle_stsi(struct kvm_vcpu *vcpu) operand2 = disp2; if (base2) - operand2 += vcpu->arch.guest_gprs[base2]; + operand2 += vcpu->run->s.regs.gprs[base2]; if (operand2 & 0xfff && fc > 0) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); switch (fc) { case 0: - vcpu->arch.guest_gprs[0] = 3 << 28; + vcpu->run->s.regs.gprs[0] = 3 << 28; vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); return 0; case 1: /* same handling for 1 and 2 */ @@ -281,7 +280,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu) } free_page(mem); vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); - vcpu->arch.guest_gprs[0] = 0; + vcpu->run->s.regs.gprs[0] = 0; return 0; out_mem: free_page(mem); @@ -333,8 +332,8 @@ static int handle_tprot(struct kvm_vcpu *vcpu) int disp1 = (vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16; int base2 = (vcpu->arch.sie_block->ipb & 0xf000) >> 12; int disp2 = vcpu->arch.sie_block->ipb & 0x0fff; - u64 address1 = disp1 + base1 ? vcpu->arch.guest_gprs[base1] : 0; - u64 address2 = disp2 + base2 ? vcpu->arch.guest_gprs[base2] : 0; + u64 address1 = disp1 + base1 ? vcpu->run->s.regs.gprs[base1] : 0; + u64 address2 = disp2 + base2 ? vcpu->run->s.regs.gprs[base2] : 0; struct vm_area_struct *vma; unsigned long user_address; diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index 0a7941d74bc6..0ad4cf238391 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -48,7 +48,7 @@ static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr, - unsigned long *reg) + u64 *reg) { struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; int rc; @@ -160,12 +160,15 @@ static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action) inti->type = KVM_S390_SIGP_STOP; spin_lock_bh(&li->lock); + if ((atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) + goto out; list_add_tail(&inti->list, &li->list); atomic_set(&li->active, 1); atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags); li->action_bits |= action; if (waitqueue_active(&li->wq)) wake_up_interruptible(&li->wq); +out: spin_unlock_bh(&li->lock); return 0; /* order accepted */ @@ -220,7 +223,7 @@ static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter) } static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address, - unsigned long *reg) + u64 *reg) { struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; struct kvm_s390_local_interrupt *li = NULL; @@ -278,7 +281,7 @@ out_fi: } static int __sigp_sense_running(struct kvm_vcpu *vcpu, u16 cpu_addr, - unsigned long *reg) + u64 *reg) { int rc; struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; @@ -309,6 +312,34 @@ static int __sigp_sense_running(struct kvm_vcpu *vcpu, u16 cpu_addr, return rc; } +static int __sigp_restart(struct kvm_vcpu *vcpu, u16 cpu_addr) +{ + int rc = 0; + struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; + struct kvm_s390_local_interrupt *li; + + if (cpu_addr >= KVM_MAX_VCPUS) + return 3; /* not operational */ + + spin_lock(&fi->lock); + li = fi->local_int[cpu_addr]; + if (li == NULL) { + rc = 3; /* not operational */ + goto out; + } + + spin_lock_bh(&li->lock); + if (li->action_bits & ACTION_STOP_ON_STOP) + rc = 2; /* busy */ + else + VCPU_EVENT(vcpu, 4, "sigp restart %x to handle userspace", + cpu_addr); + spin_unlock_bh(&li->lock); +out: + spin_unlock(&fi->lock); + return rc; +} + int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) { int r1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; @@ -316,7 +347,7 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) int base2 = vcpu->arch.sie_block->ipb >> 28; int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); u32 parameter; - u16 cpu_addr = vcpu->arch.guest_gprs[r3]; + u16 cpu_addr = vcpu->run->s.regs.gprs[r3]; u8 order_code; int rc; @@ -327,18 +358,18 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) order_code = disp2; if (base2) - order_code += vcpu->arch.guest_gprs[base2]; + order_code += vcpu->run->s.regs.gprs[base2]; if (r1 % 2) - parameter = vcpu->arch.guest_gprs[r1]; + parameter = vcpu->run->s.regs.gprs[r1]; else - parameter = vcpu->arch.guest_gprs[r1 + 1]; + parameter = vcpu->run->s.regs.gprs[r1 + 1]; switch (order_code) { case SIGP_SENSE: vcpu->stat.instruction_sigp_sense++; rc = __sigp_sense(vcpu, cpu_addr, - &vcpu->arch.guest_gprs[r1]); + &vcpu->run->s.regs.gprs[r1]); break; case SIGP_EXTERNAL_CALL: vcpu->stat.instruction_sigp_external_call++; @@ -354,7 +385,8 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) break; case SIGP_STOP_STORE_STATUS: vcpu->stat.instruction_sigp_stop++; - rc = __sigp_stop(vcpu, cpu_addr, ACTION_STORE_ON_STOP); + rc = __sigp_stop(vcpu, cpu_addr, ACTION_STORE_ON_STOP | + ACTION_STOP_ON_STOP); break; case SIGP_SET_ARCH: vcpu->stat.instruction_sigp_arch++; @@ -363,15 +395,18 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) case SIGP_SET_PREFIX: vcpu->stat.instruction_sigp_prefix++; rc = __sigp_set_prefix(vcpu, cpu_addr, parameter, - &vcpu->arch.guest_gprs[r1]); + &vcpu->run->s.regs.gprs[r1]); break; case SIGP_SENSE_RUNNING: vcpu->stat.instruction_sigp_sense_running++; rc = __sigp_sense_running(vcpu, cpu_addr, - &vcpu->arch.guest_gprs[r1]); + &vcpu->run->s.regs.gprs[r1]); break; case SIGP_RESTART: vcpu->stat.instruction_sigp_restart++; + rc = __sigp_restart(vcpu, cpu_addr); + if (rc == 2) /* busy */ + break; /* user space must know about restart */ default: return -EOPNOTSUPP; diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c index db92f044024c..9f1f71e85778 100644 --- a/arch/s390/lib/delay.c +++ b/arch/s390/lib/delay.c @@ -13,6 +13,7 @@ #include <linux/irqflags.h> #include <linux/interrupt.h> #include <asm/div64.h> +#include <asm/timer.h> void __delay(unsigned long loops) { @@ -28,36 +29,33 @@ void __delay(unsigned long loops) static void __udelay_disabled(unsigned long long usecs) { - unsigned long mask, cr0, cr0_saved; - u64 clock_saved; - u64 end; + unsigned long cr0, cr6, new; + u64 clock_saved, end; - mask = psw_kernel_bits | PSW_MASK_DAT | PSW_MASK_WAIT | - PSW_MASK_EXT | PSW_MASK_MCHECK; end = get_clock() + (usecs << 12); clock_saved = local_tick_disable(); - __ctl_store(cr0_saved, 0, 0); - cr0 = (cr0_saved & 0xffff00e0) | 0x00000800; - __ctl_load(cr0 , 0, 0); + __ctl_store(cr0, 0, 0); + __ctl_store(cr6, 6, 6); + new = (cr0 & 0xffff00e0) | 0x00000800; + __ctl_load(new , 0, 0); + new = 0; + __ctl_load(new, 6, 6); lockdep_off(); do { set_clock_comparator(end); - trace_hardirqs_on(); - __load_psw_mask(mask); + vtime_stop_cpu(); local_irq_disable(); } while (get_clock() < end); lockdep_on(); - __ctl_load(cr0_saved, 0, 0); + __ctl_load(cr0, 0, 0); + __ctl_load(cr6, 6, 6); local_tick_enable(clock_saved); } static void __udelay_enabled(unsigned long long usecs) { - unsigned long mask; - u64 clock_saved; - u64 end; + u64 clock_saved, end; - mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_EXT | PSW_MASK_IO; end = get_clock() + (usecs << 12); do { clock_saved = 0; @@ -65,8 +63,7 @@ static void __udelay_enabled(unsigned long long usecs) clock_saved = local_tick_disable(); set_clock_comparator(end); } - trace_hardirqs_on(); - __load_psw_mask(mask); + vtime_stop_cpu(); local_irq_disable(); if (clock_saved) local_tick_enable(clock_saved); diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c index 91754ffb9203..093eb694d9c1 100644 --- a/arch/s390/lib/spinlock.c +++ b/arch/s390/lib/spinlock.c @@ -10,6 +10,7 @@ #include <linux/module.h> #include <linux/spinlock.h> #include <linux/init.h> +#include <linux/smp.h> #include <asm/io.h> int spin_retry = 1000; @@ -24,21 +25,6 @@ static int __init spin_retry_setup(char *str) } __setup("spin_retry=", spin_retry_setup); -static inline void _raw_yield(void) -{ - if (MACHINE_HAS_DIAG44) - asm volatile("diag 0,0,0x44"); -} - -static inline void _raw_yield_cpu(int cpu) -{ - if (MACHINE_HAS_DIAG9C) - asm volatile("diag %0,0,0x9c" - : : "d" (cpu_logical_map(cpu))); - else - _raw_yield(); -} - void arch_spin_lock_wait(arch_spinlock_t *lp) { int count = spin_retry; @@ -60,7 +46,7 @@ void arch_spin_lock_wait(arch_spinlock_t *lp) } owner = lp->owner_cpu; if (owner) - _raw_yield_cpu(~owner); + smp_yield_cpu(~owner); if (_raw_compare_and_swap(&lp->owner_cpu, 0, cpu) == 0) return; } @@ -91,7 +77,7 @@ void arch_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags) } owner = lp->owner_cpu; if (owner) - _raw_yield_cpu(~owner); + smp_yield_cpu(~owner); local_irq_disable(); if (_raw_compare_and_swap(&lp->owner_cpu, 0, cpu) == 0) return; @@ -121,7 +107,7 @@ void arch_spin_relax(arch_spinlock_t *lock) if (cpu != 0) { if (MACHINE_IS_VM || MACHINE_IS_KVM || !smp_vcpu_scheduled(~cpu)) - _raw_yield_cpu(~cpu); + smp_yield_cpu(~cpu); } } EXPORT_SYMBOL(arch_spin_relax); @@ -133,7 +119,7 @@ void _raw_read_lock_wait(arch_rwlock_t *rw) while (1) { if (count-- <= 0) { - _raw_yield(); + smp_yield(); count = spin_retry; } if (!arch_read_can_lock(rw)) @@ -153,7 +139,7 @@ void _raw_read_lock_wait_flags(arch_rwlock_t *rw, unsigned long flags) local_irq_restore(flags); while (1) { if (count-- <= 0) { - _raw_yield(); + smp_yield(); count = spin_retry; } if (!arch_read_can_lock(rw)) @@ -188,7 +174,7 @@ void _raw_write_lock_wait(arch_rwlock_t *rw) while (1) { if (count-- <= 0) { - _raw_yield(); + smp_yield(); count = spin_retry; } if (!arch_write_can_lock(rw)) @@ -206,7 +192,7 @@ void _raw_write_lock_wait_flags(arch_rwlock_t *rw, unsigned long flags) local_irq_restore(flags); while (1) { if (count-- <= 0) { - _raw_yield(); + smp_yield(); count = spin_retry; } if (!arch_write_can_lock(rw)) diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index e8fcd928dc78..46ef3fd0663b 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -32,10 +32,10 @@ #include <linux/uaccess.h> #include <linux/hugetlb.h> #include <asm/asm-offsets.h> -#include <asm/system.h> #include <asm/pgtable.h> #include <asm/irq.h> #include <asm/mmu_context.h> +#include <asm/facility.h> #include "../kernel/entry.h" #ifndef CONFIG_64BIT @@ -532,7 +532,7 @@ void pfault_fini(void) static DEFINE_SPINLOCK(pfault_lock); static LIST_HEAD(pfault_list); -static void pfault_interrupt(unsigned int ext_int_code, +static void pfault_interrupt(struct ext_code ext_code, unsigned int param32, unsigned long param64) { struct task_struct *tsk; @@ -545,7 +545,7 @@ static void pfault_interrupt(unsigned int ext_int_code, * in the 'cpu address' field associated with the * external interrupt. */ - subcode = ext_int_code >> 16; + subcode = ext_code.subcode; if ((subcode & 0xff00) != __SUBCODE_MASK) return; kstat_cpu(smp_processor_id()).irqs[EXTINT_PFL]++; diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 50236610de83..2bea0605856e 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -29,7 +29,6 @@ #include <linux/export.h> #include <linux/gfp.h> #include <asm/processor.h> -#include <asm/system.h> #include <asm/uaccess.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> @@ -38,6 +37,7 @@ #include <asm/tlb.h> #include <asm/tlbflush.h> #include <asm/sections.h> +#include <asm/ctl_reg.h> pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((__aligned__(PAGE_SIZE))); diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index 1cb8427bedfb..7bb15fcca75e 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -12,7 +12,7 @@ #include <linux/types.h> #include <linux/errno.h> #include <linux/gfp.h> -#include <asm/system.h> +#include <asm/ctl_reg.h> /* * This function writes to kernel memory bypassing DAT and possible diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index a0155c02e324..2857c48486ea 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c @@ -100,7 +100,6 @@ void arch_pick_mmap_layout(struct mm_struct *mm) mm->unmap_area = arch_unmap_area_topdown; } } -EXPORT_SYMBOL_GPL(arch_pick_mmap_layout); #else @@ -175,6 +174,5 @@ void arch_pick_mmap_layout(struct mm_struct *mm) mm->unmap_area = arch_unmap_area_topdown; } } -EXPORT_SYMBOL_GPL(arch_pick_mmap_layout); #endif diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 51b0738e13d1..373adf69b01c 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -18,7 +18,6 @@ #include <linux/rcupdate.h> #include <linux/slab.h> -#include <asm/system.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> #include <asm/tlb.h> diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c index 9daee91e6c3f..c6646de07bf4 100644 --- a/arch/s390/oprofile/hwsampler.c +++ b/arch/s390/oprofile/hwsampler.c @@ -18,7 +18,8 @@ #include <linux/oom.h> #include <linux/oprofile.h> -#include <asm/lowcore.h> +#include <asm/facility.h> +#include <asm/cpu_mf.h> #include <asm/irq.h> #include "hwsampler.h" @@ -30,12 +31,6 @@ #define ALERT_REQ_MASK 0x4000000000000000ul #define BUFFER_FULL_MASK 0x8000000000000000ul -#define EI_IEA (1 << 31) /* invalid entry address */ -#define EI_ISE (1 << 30) /* incorrect SDBT entry */ -#define EI_PRA (1 << 29) /* program request alert */ -#define EI_SACA (1 << 23) /* sampler authorization change alert */ -#define EI_LSDA (1 << 22) /* loss of sample data alert */ - DECLARE_PER_CPU(struct hws_cpu_buffer, sampler_cpu_buffer); struct hws_execute_parms { @@ -232,9 +227,20 @@ static inline unsigned long *trailer_entry_ptr(unsigned long v) return (unsigned long *) ret; } -/* prototypes for external interrupt handler and worker */ -static void hws_ext_handler(unsigned int ext_int_code, - unsigned int param32, unsigned long param64); +static void hws_ext_handler(struct ext_code ext_code, + unsigned int param32, unsigned long param64) +{ + struct hws_cpu_buffer *cb = &__get_cpu_var(sampler_cpu_buffer); + + if (!(param32 & CPU_MF_INT_SF_MASK)) + return; + + kstat_cpu(smp_processor_id()).irqs[EXTINT_CPM]++; + atomic_xchg(&cb->ext_params, atomic_read(&cb->ext_params) | param32); + + if (hws_wq) + queue_work(hws_wq, &cb->worker); +} static void worker(struct work_struct *work); @@ -673,18 +679,6 @@ int hwsampler_activate(unsigned int cpu) return rc; } -static void hws_ext_handler(unsigned int ext_int_code, - unsigned int param32, unsigned long param64) -{ - struct hws_cpu_buffer *cb; - - kstat_cpu(smp_processor_id()).irqs[EXTINT_CPM]++; - cb = &__get_cpu_var(sampler_cpu_buffer); - atomic_xchg(&cb->ext_params, atomic_read(&cb->ext_params) | param32); - if (hws_wq) - queue_work(hws_wq, &cb->worker); -} - static int check_qsi_on_setup(void) { int rc; @@ -760,23 +754,23 @@ static int worker_check_error(unsigned int cpu, int ext_params) if (!sdbt || !*sdbt) return -EINVAL; - if (ext_params & EI_PRA) + if (ext_params & CPU_MF_INT_SF_PRA) cb->req_alert++; - if (ext_params & EI_LSDA) + if (ext_params & CPU_MF_INT_SF_LSDA) cb->loss_of_sample_data++; - if (ext_params & EI_IEA) { + if (ext_params & CPU_MF_INT_SF_IAE) { cb->invalid_entry_address++; rc = -EINVAL; } - if (ext_params & EI_ISE) { + if (ext_params & CPU_MF_INT_SF_ISE) { cb->incorrect_sdbt_entry++; rc = -EINVAL; } - if (ext_params & EI_SACA) { + if (ext_params & CPU_MF_INT_SF_SACA) { cb->sample_auth_change_alert++; rc = -EINVAL; } @@ -1009,7 +1003,7 @@ int hwsampler_deallocate(void) if (hws_state != HWS_STOPPED) goto deallocate_exit; - ctl_clear_bit(0, 5); /* set bit 58 CR0 off */ + measurement_alert_subclass_unregister(); deallocate_sdbt(); hws_state = HWS_DEALLOCATED; @@ -1123,7 +1117,7 @@ int hwsampler_shutdown(void) mutex_lock(&hws_sem); if (hws_state == HWS_STOPPED) { - ctl_clear_bit(0, 5); /* set bit 58 CR0 off */ + measurement_alert_subclass_unregister(); deallocate_sdbt(); } if (hws_wq) { @@ -1198,7 +1192,7 @@ start_all_exit: hws_oom = 1; hws_flush_all = 0; /* now let them in, 1407 CPUMF external interrupts */ - ctl_set_bit(0, 5); /* set CR0 bit 58 */ + measurement_alert_subclass_register(); return 0; } |