diff options
Diffstat (limited to 'arch/s390')
29 files changed, 646 insertions, 173 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index a9fbd43395f7..d48ede334434 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -92,6 +92,9 @@ config S390 select HAVE_ARCH_JUMP_LABEL if !MARCH_G5 select HAVE_RCU_TABLE_FREE if SMP select ARCH_SAVE_PAGE_KEYS if HIBERNATION + select HAVE_MEMBLOCK + select HAVE_MEMBLOCK_NODE_MAP + select ARCH_DISCARD_MEMBLOCK select ARCH_INLINE_SPIN_TRYLOCK select ARCH_INLINE_SPIN_TRYLOCK_BH select ARCH_INLINE_SPIN_LOCK @@ -345,9 +348,6 @@ config WARN_DYNAMIC_STACK Say N if you are unsure. -config ARCH_POPULATES_NODE_MAP - def_bool y - comment "Kernel preemption" source "kernel/Kconfig.preempt" @@ -572,6 +572,7 @@ config KEXEC config CRASH_DUMP bool "kernel crash dumps" depends on 64BIT + select KEXEC help Generate crash dump after being started by kexec. Crash dump kernels are loaded in the main kernel with kexec-tools diff --git a/arch/s390/appldata/appldata_os.c b/arch/s390/appldata/appldata_os.c index 92f1cb745d69..4de031d6b76c 100644 --- a/arch/s390/appldata/appldata_os.c +++ b/arch/s390/appldata/appldata_os.c @@ -115,21 +115,21 @@ static void appldata_get_os_data(void *data) j = 0; for_each_online_cpu(i) { os_data->os_cpu[j].per_cpu_user = - cputime_to_jiffies(kstat_cpu(i).cpustat.user); + cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_USER]); os_data->os_cpu[j].per_cpu_nice = - cputime_to_jiffies(kstat_cpu(i).cpustat.nice); + cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_NICE]); os_data->os_cpu[j].per_cpu_system = - cputime_to_jiffies(kstat_cpu(i).cpustat.system); + cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM]); os_data->os_cpu[j].per_cpu_idle = - cputime_to_jiffies(kstat_cpu(i).cpustat.idle); + cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IDLE]); os_data->os_cpu[j].per_cpu_irq = - cputime_to_jiffies(kstat_cpu(i).cpustat.irq); + cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IRQ]); os_data->os_cpu[j].per_cpu_softirq = - cputime_to_jiffies(kstat_cpu(i).cpustat.softirq); + cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_SOFTIRQ]); os_data->os_cpu[j].per_cpu_iowait = - cputime_to_jiffies(kstat_cpu(i).cpustat.iowait); + cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IOWAIT]); os_data->os_cpu[j].per_cpu_steal = - cputime_to_jiffies(kstat_cpu(i).cpustat.steal); + cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_STEAL]); os_data->os_cpu[j].cpu_id = i; j++; } diff --git a/arch/s390/crypto/crypt_s390.h b/arch/s390/crypto/crypt_s390.h index 49676771bd66..ffd1ac255f19 100644 --- a/arch/s390/crypto/crypt_s390.h +++ b/arch/s390/crypto/crypt_s390.h @@ -368,9 +368,12 @@ static inline int crypt_s390_func_available(int func, if (facility_mask & CRYPT_S390_MSA && !test_facility(17)) return 0; - if (facility_mask & CRYPT_S390_MSA3 && !test_facility(76)) + + if (facility_mask & CRYPT_S390_MSA3 && + (!test_facility(2) || !test_facility(76))) return 0; - if (facility_mask & CRYPT_S390_MSA4 && !test_facility(77)) + if (facility_mask & CRYPT_S390_MSA4 && + (!test_facility(2) || !test_facility(77))) return 0; switch (func & CRYPT_S390_OP_MASK) { diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h index 081434878296..c23c3900c304 100644 --- a/arch/s390/include/asm/cputime.h +++ b/arch/s390/include/asm/cputime.h @@ -16,114 +16,100 @@ /* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */ -typedef unsigned long long cputime_t; -typedef unsigned long long cputime64_t; +typedef unsigned long long __nocast cputime_t; +typedef unsigned long long __nocast cputime64_t; -#ifndef __s390x__ - -static inline unsigned int -__div(unsigned long long n, unsigned int base) +static inline unsigned long __div(unsigned long long n, unsigned long base) { +#ifndef __s390x__ register_pair rp; rp.pair = n >> 1; asm ("dr %0,%1" : "+d" (rp) : "d" (base >> 1)); return rp.subreg.odd; +#else /* __s390x__ */ + return n / base; +#endif /* __s390x__ */ } -#else /* __s390x__ */ +#define cputime_one_jiffy jiffies_to_cputime(1) + +/* + * Convert cputime to jiffies and back. + */ +static inline unsigned long cputime_to_jiffies(const cputime_t cputime) +{ + return __div((__force unsigned long long) cputime, 4096000000ULL / HZ); +} -static inline unsigned int -__div(unsigned long long n, unsigned int base) +static inline cputime_t jiffies_to_cputime(const unsigned int jif) { - return n / base; + return (__force cputime_t)(jif * (4096000000ULL / HZ)); } -#endif /* __s390x__ */ +static inline u64 cputime64_to_jiffies64(cputime64_t cputime) +{ + unsigned long long jif = (__force unsigned long long) cputime; + do_div(jif, 4096000000ULL / HZ); + return jif; +} -#define cputime_zero (0ULL) -#define cputime_one_jiffy jiffies_to_cputime(1) -#define cputime_max ((~0UL >> 1) - 1) -#define cputime_add(__a, __b) ((__a) + (__b)) -#define cputime_sub(__a, __b) ((__a) - (__b)) -#define cputime_div(__a, __n) ({ \ - unsigned long long __div = (__a); \ - do_div(__div,__n); \ - __div; \ -}) -#define cputime_halve(__a) ((__a) >> 1) -#define cputime_eq(__a, __b) ((__a) == (__b)) -#define cputime_gt(__a, __b) ((__a) > (__b)) -#define cputime_ge(__a, __b) ((__a) >= (__b)) -#define cputime_lt(__a, __b) ((__a) < (__b)) -#define cputime_le(__a, __b) ((__a) <= (__b)) -#define cputime_to_jiffies(__ct) (__div((__ct), 4096000000ULL / HZ)) -#define cputime_to_scaled(__ct) (__ct) -#define jiffies_to_cputime(__hz) ((cputime_t)(__hz) * (4096000000ULL / HZ)) - -#define cputime64_zero (0ULL) -#define cputime64_add(__a, __b) ((__a) + (__b)) -#define cputime_to_cputime64(__ct) (__ct) - -static inline u64 -cputime64_to_jiffies64(cputime64_t cputime) -{ - do_div(cputime, 4096000000ULL / HZ); - return cputime; +static inline cputime64_t jiffies64_to_cputime64(const u64 jif) +{ + return (__force cputime64_t)(jif * (4096000000ULL / HZ)); } /* * Convert cputime to microseconds and back. */ -static inline unsigned int -cputime_to_usecs(const cputime_t cputime) +static inline unsigned int cputime_to_usecs(const cputime_t cputime) { - return cputime_div(cputime, 4096); + return (__force unsigned long long) cputime >> 12; } -static inline cputime_t -usecs_to_cputime(const unsigned int m) +static inline cputime_t usecs_to_cputime(const unsigned int m) { - return (cputime_t) m * 4096; + return (__force cputime_t)(m * 4096ULL); } +#define usecs_to_cputime64(m) usecs_to_cputime(m) + /* * Convert cputime to milliseconds and back. */ -static inline unsigned int -cputime_to_secs(const cputime_t cputime) +static inline unsigned int cputime_to_secs(const cputime_t cputime) { - return __div(cputime, 2048000000) >> 1; + return __div((__force unsigned long long) cputime, 2048000000) >> 1; } -static inline cputime_t -secs_to_cputime(const unsigned int s) +static inline cputime_t secs_to_cputime(const unsigned int s) { - return (cputime_t) s * 4096000000ULL; + return (__force cputime_t)(s * 4096000000ULL); } /* * Convert cputime to timespec and back. */ -static inline cputime_t -timespec_to_cputime(const struct timespec *value) +static inline cputime_t timespec_to_cputime(const struct timespec *value) { - return value->tv_nsec * 4096 / 1000 + (u64) value->tv_sec * 4096000000ULL; + unsigned long long ret = value->tv_sec * 4096000000ULL; + return (__force cputime_t)(ret + value->tv_nsec * 4096 / 1000); } -static inline void -cputime_to_timespec(const cputime_t cputime, struct timespec *value) +static inline void cputime_to_timespec(const cputime_t cputime, + struct timespec *value) { + unsigned long long __cputime = (__force unsigned long long) cputime; #ifndef __s390x__ register_pair rp; - rp.pair = cputime >> 1; + rp.pair = __cputime >> 1; asm ("dr %0,%1" : "+d" (rp) : "d" (2048000000UL)); value->tv_nsec = rp.subreg.even * 1000 / 4096; value->tv_sec = rp.subreg.odd; #else - value->tv_nsec = (cputime % 4096000000ULL) * 1000 / 4096; - value->tv_sec = cputime / 4096000000ULL; + value->tv_nsec = (__cputime % 4096000000ULL) * 1000 / 4096; + value->tv_sec = __cputime / 4096000000ULL; #endif } @@ -132,50 +118,52 @@ cputime_to_timespec(const cputime_t cputime, struct timespec *value) * Since cputime and timeval have the same resolution (microseconds) * this is easy. */ -static inline cputime_t -timeval_to_cputime(const struct timeval *value) +static inline cputime_t timeval_to_cputime(const struct timeval *value) { - return value->tv_usec * 4096 + (u64) value->tv_sec * 4096000000ULL; + unsigned long long ret = value->tv_sec * 4096000000ULL; + return (__force cputime_t)(ret + value->tv_usec * 4096ULL); } -static inline void -cputime_to_timeval(const cputime_t cputime, struct timeval *value) +static inline void cputime_to_timeval(const cputime_t cputime, + struct timeval *value) { + unsigned long long __cputime = (__force unsigned long long) cputime; #ifndef __s390x__ register_pair rp; - rp.pair = cputime >> 1; + rp.pair = __cputime >> 1; asm ("dr %0,%1" : "+d" (rp) : "d" (2048000000UL)); value->tv_usec = rp.subreg.even / 4096; value->tv_sec = rp.subreg.odd; #else - value->tv_usec = (cputime % 4096000000ULL) / 4096; - value->tv_sec = cputime / 4096000000ULL; + value->tv_usec = (__cputime % 4096000000ULL) / 4096; + value->tv_sec = __cputime / 4096000000ULL; #endif } /* * Convert cputime to clock and back. */ -static inline clock_t -cputime_to_clock_t(cputime_t cputime) +static inline clock_t cputime_to_clock_t(cputime_t cputime) { - return cputime_div(cputime, 4096000000ULL / USER_HZ); + unsigned long long clock = (__force unsigned long long) cputime; + do_div(clock, 4096000000ULL / USER_HZ); + return clock; } -static inline cputime_t -clock_t_to_cputime(unsigned long x) +static inline cputime_t clock_t_to_cputime(unsigned long x) { - return (cputime_t) x * (4096000000ULL / USER_HZ); + return (__force cputime_t)(x * (4096000000ULL / USER_HZ)); } /* * Convert cputime64 to clock. */ -static inline clock_t -cputime64_to_clock_t(cputime64_t cputime) +static inline clock_t cputime64_to_clock_t(cputime64_t cputime) { - return cputime_div(cputime, 4096000000ULL / USER_HZ); + unsigned long long clock = (__force unsigned long long) cputime; + do_div(clock, 4096000000ULL / USER_HZ); + return clock; } struct s390_idle_data { diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 24e18473d926..b0c235cb6ad5 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -47,7 +47,7 @@ struct sca_block { #define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1)) #define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE) -#define CPUSTAT_HOST 0x80000000 +#define CPUSTAT_STOPPED 0x80000000 #define CPUSTAT_WAIT 0x10000000 #define CPUSTAT_ECALL_PEND 0x08000000 #define CPUSTAT_STOP_INT 0x04000000 @@ -139,6 +139,7 @@ struct kvm_vcpu_stat { u32 instruction_stfl; u32 instruction_tprot; u32 instruction_sigp_sense; + u32 instruction_sigp_sense_running; u32 instruction_sigp_external_call; u32 instruction_sigp_emergency; u32 instruction_sigp_stop; diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 34ede0ea85a9..4f289ff0b7fe 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -593,14 +593,16 @@ static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste) unsigned long address, bits; unsigned char skey; + if (!pte_present(*ptep)) + return pgste; address = pte_val(*ptep) & PAGE_MASK; skey = page_get_storage_key(address); bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); /* Clear page changed & referenced bit in the storage key */ - if (bits) { - skey ^= bits; - page_set_storage_key(address, skey, 1); - } + if (bits & _PAGE_CHANGED) + page_set_storage_key(address, skey ^ bits, 1); + else if (bits) + page_reset_referenced(address); /* Transfer page changed & referenced bit to guest bits in pgste */ pgste_val(pgste) |= bits << 48; /* RCP_GR_BIT & RCP_GC_BIT */ /* Get host changed & referenced bits from pgste */ @@ -625,6 +627,8 @@ static inline pgste_t pgste_update_young(pte_t *ptep, pgste_t pgste) #ifdef CONFIG_PGSTE int young; + if (!pte_present(*ptep)) + return pgste; young = page_reset_referenced(pte_val(*ptep) & PAGE_MASK); /* Transfer page referenced bit to pte software bit (host view) */ if (young || (pgste_val(pgste) & RCP_HR_BIT)) @@ -638,13 +642,15 @@ static inline pgste_t pgste_update_young(pte_t *ptep, pgste_t pgste) } -static inline void pgste_set_pte(pte_t *ptep, pgste_t pgste) +static inline void pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry) { #ifdef CONFIG_PGSTE unsigned long address; unsigned long okey, nkey; - address = pte_val(*ptep) & PAGE_MASK; + if (!pte_present(entry)) + return; + address = pte_val(entry) & PAGE_MASK; okey = nkey = page_get_storage_key(address); nkey &= ~(_PAGE_ACC_BITS | _PAGE_FP_BIT); /* Set page access key and fetch protection bit from pgste */ @@ -712,7 +718,7 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, if (mm_has_pgste(mm)) { pgste = pgste_get_lock(ptep); - pgste_set_pte(ptep, pgste); + pgste_set_pte(ptep, pgste, entry); *ptep = entry; pgste_set_unlock(ptep, pgste); } else diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 5a099714df04..097183c70407 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -82,6 +82,7 @@ extern unsigned int user_mode; #define MACHINE_FLAG_LPAR (1UL << 12) #define MACHINE_FLAG_SPP (1UL << 13) #define MACHINE_FLAG_TOPOLOGY (1UL << 14) +#define MACHINE_FLAG_STCKF (1UL << 15) #define MACHINE_IS_VM (S390_lowcore.machine_flags & MACHINE_FLAG_VM) #define MACHINE_IS_KVM (S390_lowcore.machine_flags & MACHINE_FLAG_KVM) @@ -100,6 +101,7 @@ extern unsigned int user_mode; #define MACHINE_HAS_PFMF (0) #define MACHINE_HAS_SPP (0) #define MACHINE_HAS_TOPOLOGY (0) +#define MACHINE_HAS_STCKF (0) #else /* __s390x__ */ #define MACHINE_HAS_IEEE (1) #define MACHINE_HAS_CSP (1) @@ -111,6 +113,7 @@ extern unsigned int user_mode; #define MACHINE_HAS_PFMF (S390_lowcore.machine_flags & MACHINE_FLAG_PFMF) #define MACHINE_HAS_SPP (S390_lowcore.machine_flags & MACHINE_FLAG_SPP) #define MACHINE_HAS_TOPOLOGY (S390_lowcore.machine_flags & MACHINE_FLAG_TOPOLOGY) +#define MACHINE_HAS_STCKF (S390_lowcore.machine_flags & MACHINE_FLAG_STCKF) #endif /* __s390x__ */ #define ZFCPDUMP_HSA_SIZE (32UL<<20) diff --git a/arch/s390/include/asm/socket.h b/arch/s390/include/asm/socket.h index fdff1e995c73..67b5c1b14b51 100644 --- a/arch/s390/include/asm/socket.h +++ b/arch/s390/include/asm/socket.h @@ -70,4 +70,7 @@ #define SO_RXQ_OVFL 40 +#define SO_WIFI_STATUS 41 +#define SCM_WIFI_STATUS SO_WIFI_STATUS + #endif /* _ASM_SOCKET_H */ diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index d610bef9c5e9..c447a27a7fdb 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -90,7 +90,7 @@ static inline unsigned long long get_clock_fast(void) { unsigned long long clk; - if (test_facility(25)) + if (MACHINE_HAS_STCKF) asm volatile(".insn s,0xb27c0000,%0" : "=Q" (clk) : : "cc"); else clk = get_clock(); diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h index 404bdb9671b4..58de4c91c333 100644 --- a/arch/s390/include/asm/unistd.h +++ b/arch/s390/include/asm/unistd.h @@ -277,7 +277,9 @@ #define __NR_clock_adjtime 337 #define __NR_syncfs 338 #define __NR_setns 339 -#define NR_syscalls 340 +#define __NR_process_vm_readv 340 +#define __NR_process_vm_writev 341 +#define NR_syscalls 342 /* * There are some system calls that are not present on 64 bit, some diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index 5006a1d9f5d0..18c51df9fe06 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -1627,3 +1627,23 @@ ENTRY(sys_setns_wrapper) lgfr %r2,%r2 # int lgfr %r3,%r3 # int jg sys_setns + +ENTRY(compat_sys_process_vm_readv_wrapper) + lgfr %r2,%r2 # compat_pid_t + llgtr %r3,%r3 # struct compat_iovec __user * + llgfr %r4,%r4 # unsigned long + llgtr %r5,%r5 # struct compat_iovec __user * + llgfr %r6,%r6 # unsigned long + llgf %r0,164(%r15) # unsigned long + stg %r0,160(%r15) + jg sys_process_vm_readv + +ENTRY(compat_sys_process_vm_writev_wrapper) + lgfr %r2,%r2 # compat_pid_t + llgtr %r3,%r3 # struct compat_iovec __user * + llgfr %r4,%r4 # unsigned long + llgtr %r5,%r5 # struct compat_iovec __user * + llgfr %r6,%r6 # unsigned long + llgf %r0,164(%r15) # unsigned long + stg %r0,160(%r15) + jg sys_process_vm_writev diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 37394b3413e2..c9ffe0025197 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -390,6 +390,8 @@ static __init void detect_machine_facilities(void) S390_lowcore.machine_flags |= MACHINE_FLAG_MVCOS; if (test_facility(40)) S390_lowcore.machine_flags |= MACHINE_FLAG_SPP; + if (test_facility(25)) + S390_lowcore.machine_flags |= MACHINE_FLAG_STCKF; #endif } diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 9451b210a1b4..3201ae447990 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -91,10 +91,12 @@ static void default_idle(void) void cpu_idle(void) { for (;;) { - tick_nohz_stop_sched_tick(1); + tick_nohz_idle_enter(); + rcu_idle_enter(); while (!need_resched()) default_idle(); - tick_nohz_restart_sched_tick(); + rcu_idle_exit(); + tick_nohz_idle_exit(); preempt_enable_no_resched(); schedule(); preempt_disable(); diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 450931a45b68..573bc29551ef 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -296,13 +296,6 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) ((data & PSW_MASK_EA) && !(data & PSW_MASK_BA)))) /* Invalid psw mask. */ return -EINVAL; - if (addr == (addr_t) &dummy->regs.psw.addr) - /* - * The debugger changed the instruction address, - * reset system call restart, see signal.c:do_signal - */ - task_thread_info(child)->system_call = 0; - *(addr_t *)((addr_t) &task_pt_regs(child)->psw + addr) = data; } else if (addr < (addr_t) (&dummy->regs.orig_gpr2)) { @@ -614,11 +607,6 @@ static int __poke_user_compat(struct task_struct *child, /* Transfer 31 bit amode bit to psw mask. */ regs->psw.mask = (regs->psw.mask & ~PSW_MASK_BA) | (__u64)(tmp & PSW32_ADDR_AMODE); - /* - * The debugger changed the instruction address, - * reset system call restart, see signal.c:do_signal - */ - task_thread_info(child)->system_call = 0; } else { /* gpr 0-15 */ *(__u32*)((addr_t) ®s->psw + addr*2 + 4) = tmp; @@ -905,6 +893,14 @@ static int s390_last_break_get(struct task_struct *target, return 0; } +static int s390_last_break_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + return 0; +} + #endif static int s390_system_call_get(struct task_struct *target, @@ -951,6 +947,7 @@ static const struct user_regset s390_regsets[] = { .size = sizeof(long), .align = sizeof(long), .get = s390_last_break_get, + .set = s390_last_break_set, }, #endif [REGSET_SYSTEM_CALL] = { @@ -1116,6 +1113,14 @@ static int s390_compat_last_break_get(struct task_struct *target, return 0; } +static int s390_compat_last_break_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + return 0; +} + static const struct user_regset s390_compat_regsets[] = { [REGSET_GENERAL] = { .core_note_type = NT_PRSTATUS, @@ -1139,6 +1144,7 @@ static const struct user_regset s390_compat_regsets[] = { .size = sizeof(long), .align = sizeof(long), .get = s390_compat_last_break_get, + .set = s390_compat_last_break_set, }, [REGSET_SYSTEM_CALL] = { .core_note_type = NT_S390_SYSTEM_CALL, diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 8ac6bfa2786c..f11d1b037c50 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -21,6 +21,7 @@ #include <linux/module.h> #include <linux/sched.h> #include <linux/kernel.h> +#include <linux/memblock.h> #include <linux/mm.h> #include <linux/stddef.h> #include <linux/unistd.h> @@ -211,6 +212,8 @@ static void __init setup_zfcpdump(unsigned int console_devno) if (ipl_info.type != IPL_TYPE_FCP_DUMP) return; + if (OLDMEM_BASE) + return; if (console_devno != -1) sprintf(str, " cio_ignore=all,!0.0.%04x,!0.0.%04x", ipl_info.data.fcp.dev_id.devno, console_devno); @@ -482,7 +485,7 @@ static void __init setup_memory_end(void) #ifdef CONFIG_ZFCPDUMP - if (ipl_info.type == IPL_TYPE_FCP_DUMP) { + if (ipl_info.type == IPL_TYPE_FCP_DUMP && !OLDMEM_BASE) { memory_end = ZFCPDUMP_HSA_SIZE; memory_end_set = 1; } @@ -577,7 +580,7 @@ static unsigned long __init find_crash_base(unsigned long crash_size, *msg = "first memory chunk must be at least crashkernel size"; return 0; } - if (is_kdump_kernel() && (crash_size == OLDMEM_SIZE)) + if (OLDMEM_BASE && crash_size == OLDMEM_SIZE) return OLDMEM_BASE; for (i = MEMORY_CHUNKS - 1; i >= 0; i--) { @@ -818,7 +821,8 @@ setup_memory(void) end_chunk = min(end_chunk, end_pfn); if (start_chunk >= end_chunk) continue; - add_active_range(0, start_chunk, end_chunk); + memblock_add_node(PFN_PHYS(start_chunk), + PFN_PHYS(end_chunk - start_chunk), 0); pfn = max(start_chunk, start_pfn); for (; pfn < end_chunk; pfn++) page_set_storage_key(PFN_PHYS(pfn), diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index 05a85bc14c98..7f6f9f354545 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -460,9 +460,9 @@ void do_signal(struct pt_regs *regs) regs->svc_code >> 16); break; } - /* No longer in a system call */ - clear_thread_flag(TIF_SYSCALL); } + /* No longer in a system call */ + clear_thread_flag(TIF_SYSCALL); if ((is_compat_task() ? handle_signal32(signr, &ka, &info, oldset, regs) : @@ -486,6 +486,7 @@ void do_signal(struct pt_regs *regs) } /* No handlers present - check for system call restart */ + clear_thread_flag(TIF_SYSCALL); if (current_thread_info()->system_call) { regs->svc_code = current_thread_info()->system_call; switch (regs->gprs[2]) { @@ -500,9 +501,6 @@ void do_signal(struct pt_regs *regs) regs->gprs[2] = regs->orig_gpr2; set_thread_flag(TIF_SYSCALL); break; - default: - clear_thread_flag(TIF_SYSCALL); - break; } } diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index 73eb08c874fb..bcab2f04ba58 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -348,3 +348,5 @@ SYSCALL(sys_open_by_handle_at,sys_open_by_handle_at,compat_sys_open_by_handle_at SYSCALL(sys_clock_adjtime,sys_clock_adjtime,compat_sys_clock_adjtime_wrapper) SYSCALL(sys_syncfs,sys_syncfs,sys_syncfs_wrapper) SYSCALL(sys_setns,sys_setns,sys_setns_wrapper) +SYSCALL(sys_process_vm_readv,sys_process_vm_readv,compat_sys_process_vm_readv_wrapper) /* 340 */ +SYSCALL(sys_process_vm_writev,sys_process_vm_writev,compat_sys_process_vm_writev_wrapper) diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 77b8942b9a15..fdb5b8cb260f 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -68,8 +68,10 @@ static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu) return mask; } -static void add_cpus_to_mask(struct topology_cpu *tl_cpu, - struct mask_info *book, struct mask_info *core) +static struct mask_info *add_cpus_to_mask(struct topology_cpu *tl_cpu, + struct mask_info *book, + struct mask_info *core, + int z10) { unsigned int cpu; @@ -88,10 +90,16 @@ static void add_cpus_to_mask(struct topology_cpu *tl_cpu, cpu_book_id[lcpu] = book->id; #endif cpumask_set_cpu(lcpu, &core->mask); - cpu_core_id[lcpu] = core->id; + if (z10) { + cpu_core_id[lcpu] = rcpu; + core = core->next; + } else { + cpu_core_id[lcpu] = core->id; + } smp_cpu_polarization[lcpu] = tl_cpu->pp; } } + return core; } static void clear_masks(void) @@ -123,18 +131,41 @@ static void tl_to_cores(struct sysinfo_15_1_x *info) { #ifdef CONFIG_SCHED_BOOK struct mask_info *book = &book_info; + struct cpuid cpu_id; #else struct mask_info *book = NULL; #endif struct mask_info *core = &core_info; union topology_entry *tle, *end; + int z10 = 0; - +#ifdef CONFIG_SCHED_BOOK + get_cpu_id(&cpu_id); + z10 = cpu_id.machine == 0x2097 || cpu_id.machine == 0x2098; +#endif spin_lock_irq(&topology_lock); clear_masks(); tle = info->tle; end = (union topology_entry *)((unsigned long)info + info->length); while (tle < end) { +#ifdef CONFIG_SCHED_BOOK + if (z10) { + switch (tle->nl) { + case 1: + book = book->next; + book->id = tle->container.id; + break; + case 0: + core = add_cpus_to_mask(&tle->cpu, book, core, z10); + break; + default: + clear_masks(); + goto out; + } + tle = next_tle(tle); + continue; + } +#endif switch (tle->nl) { #ifdef CONFIG_SCHED_BOOK case 2: @@ -147,7 +178,7 @@ static void tl_to_cores(struct sysinfo_15_1_x *info) core->id = tle->container.id; break; case 0: - add_cpus_to_mask(&tle->cpu, book, core); + add_cpus_to_mask(&tle->cpu, book, core, z10); break; default: clear_masks(); @@ -328,8 +359,8 @@ void __init s390_init_cpu_topology(void) for (i = 0; i < TOPOLOGY_NR_MAG; i++) printk(" %d", info->mag[i]); printk(" / %d\n", info->mnest); - alloc_masks(info, &core_info, 2); + alloc_masks(info, &core_info, 1); #ifdef CONFIG_SCHED_BOOK - alloc_masks(info, &book_info, 3); + alloc_masks(info, &book_info, 2); #endif } diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 56fe6bc81fee..e4c79ebb40e6 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -43,6 +43,8 @@ SECTIONS NOTES :text :note + .dummy : { *(.dummy) } :data + RODATA #ifdef CONFIG_SHARED_KERNEL diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 87cedd61be04..8943e82cd4d9 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -70,7 +70,7 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu) return -EOPNOTSUPP; } - atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); + atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags); vcpu->run->s390_reset_flags |= KVM_S390_RESET_SUBSYSTEM; vcpu->run->s390_reset_flags |= KVM_S390_RESET_IPL; vcpu->run->s390_reset_flags |= KVM_S390_RESET_CPU_INIT; diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index c7c51898984e..02434543eabb 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -132,7 +132,6 @@ static int handle_stop(struct kvm_vcpu *vcpu) int rc = 0; vcpu->stat.exit_stop_request++; - atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); spin_lock_bh(&vcpu->arch.local_int.lock); if (vcpu->arch.local_int.action_bits & ACTION_STORE_ON_STOP) { vcpu->arch.local_int.action_bits &= ~ACTION_STORE_ON_STOP; @@ -149,6 +148,8 @@ static int handle_stop(struct kvm_vcpu *vcpu) } if (vcpu->arch.local_int.action_bits & ACTION_STOP_ON_STOP) { + atomic_set_mask(CPUSTAT_STOPPED, + &vcpu->arch.sie_block->cpuflags); vcpu->arch.local_int.action_bits &= ~ACTION_STOP_ON_STOP; VCPU_EVENT(vcpu, 3, "%s", "cpu stopped"); rc = -EOPNOTSUPP; diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 87c16705b381..278ee009ce65 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -252,6 +252,7 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, offsetof(struct _lowcore, restart_psw), sizeof(psw_t)); if (rc == -EFAULT) exception = 1; + atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags); break; case KVM_S390_PROGRAM_INT: diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 0bd3bea1e4cd..d1c445732451 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -65,6 +65,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "instruction_stfl", VCPU_STAT(instruction_stfl) }, { "instruction_tprot", VCPU_STAT(instruction_tprot) }, { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) }, + { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) }, { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) }, { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) }, { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) }, @@ -127,6 +128,7 @@ int kvm_dev_ioctl_check_extension(long ext) switch (ext) { case KVM_CAP_S390_PSW: case KVM_CAP_S390_GMAP: + case KVM_CAP_SYNC_MMU: r = 1; break; default: @@ -270,10 +272,12 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) restore_fp_regs(&vcpu->arch.guest_fpregs); restore_access_regs(vcpu->arch.guest_acrs); gmap_enable(vcpu->arch.gmap); + atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { + atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); gmap_disable(vcpu->arch.gmap); save_fp_regs(&vcpu->arch.guest_fpregs); save_access_regs(vcpu->arch.guest_acrs); @@ -301,7 +305,9 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) { - atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH | CPUSTAT_SM); + atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH | + CPUSTAT_SM | + CPUSTAT_STOPPED); vcpu->arch.sie_block->ecb = 6; vcpu->arch.sie_block->eca = 0xC1002001U; vcpu->arch.sie_block->fac = (int) (long) facilities; @@ -428,7 +434,7 @@ static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw) { int rc = 0; - if (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_RUNNING) + if (!(atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOPPED)) rc = -EBUSY; else { vcpu->run->psw_mask = psw.mask; @@ -501,7 +507,7 @@ rerun_vcpu: if (vcpu->sigset_active) sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); - atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); + atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags); BUG_ON(vcpu->kvm->arch.float_int.local_int[vcpu->vcpu_id] == NULL); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 391626361084..d02638959922 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -336,6 +336,7 @@ static int handle_tprot(struct kvm_vcpu *vcpu) u64 address1 = disp1 + base1 ? vcpu->arch.guest_gprs[base1] : 0; u64 address2 = disp2 + base2 ? vcpu->arch.guest_gprs[base2] : 0; struct vm_area_struct *vma; + unsigned long user_address; vcpu->stat.instruction_tprot++; @@ -349,9 +350,14 @@ static int handle_tprot(struct kvm_vcpu *vcpu) return -EOPNOTSUPP; + /* we must resolve the address without holding the mmap semaphore. + * This is ok since the userspace hypervisor is not supposed to change + * the mapping while the guest queries the memory. Otherwise the guest + * might crash or get wrong info anyway. */ + user_address = (unsigned long) __guestaddr_to_user(vcpu, address1); + down_read(¤t->mm->mmap_sem); - vma = find_vma(current->mm, - (unsigned long) __guestaddr_to_user(vcpu, address1)); + vma = find_vma(current->mm, user_address); if (!vma) { up_read(¤t->mm->mmap_sem); return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index f815118835f3..0a7941d74bc6 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -31,9 +31,11 @@ #define SIGP_SET_PREFIX 0x0d #define SIGP_STORE_STATUS_ADDR 0x0e #define SIGP_SET_ARCH 0x12 +#define SIGP_SENSE_RUNNING 0x15 /* cpu status bits */ #define SIGP_STAT_EQUIPMENT_CHECK 0x80000000UL +#define SIGP_STAT_NOT_RUNNING 0x00000400UL #define SIGP_STAT_INCORRECT_STATE 0x00000200UL #define SIGP_STAT_INVALID_PARAMETER 0x00000100UL #define SIGP_STAT_EXT_CALL_PENDING 0x00000080UL @@ -57,8 +59,8 @@ static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr, spin_lock(&fi->lock); if (fi->local_int[cpu_addr] == NULL) rc = 3; /* not operational */ - else if (atomic_read(fi->local_int[cpu_addr]->cpuflags) - & CPUSTAT_RUNNING) { + else if (!(atomic_read(fi->local_int[cpu_addr]->cpuflags) + & CPUSTAT_STOPPED)) { *reg &= 0xffffffff00000000UL; rc = 1; /* status stored */ } else { @@ -251,7 +253,7 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address, spin_lock_bh(&li->lock); /* cpu must be in stopped state */ - if (atomic_read(li->cpuflags) & CPUSTAT_RUNNING) { + if (!(atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) { rc = 1; /* incorrect state */ *reg &= SIGP_STAT_INCORRECT_STATE; kfree(inti); @@ -275,6 +277,38 @@ out_fi: return rc; } +static int __sigp_sense_running(struct kvm_vcpu *vcpu, u16 cpu_addr, + unsigned long *reg) +{ + int rc; + struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; + + if (cpu_addr >= KVM_MAX_VCPUS) + return 3; /* not operational */ + + spin_lock(&fi->lock); + if (fi->local_int[cpu_addr] == NULL) + rc = 3; /* not operational */ + else { + if (atomic_read(fi->local_int[cpu_addr]->cpuflags) + & CPUSTAT_RUNNING) { + /* running */ + rc = 1; + } else { + /* not running */ + *reg &= 0xffffffff00000000UL; + *reg |= SIGP_STAT_NOT_RUNNING; + rc = 0; + } + } + spin_unlock(&fi->lock); + + VCPU_EVENT(vcpu, 4, "sensed running status of cpu %x rc %x", cpu_addr, + rc); + + return rc; +} + int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) { int r1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; @@ -331,6 +365,11 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) rc = __sigp_set_prefix(vcpu, cpu_addr, parameter, &vcpu->arch.guest_gprs[r1]); break; + case SIGP_SENSE_RUNNING: + vcpu->stat.instruction_sigp_sense_running++; + rc = __sigp_sense_running(vcpu, cpu_addr, + &vcpu->arch.guest_gprs[r1]); + break; case SIGP_RESTART: vcpu->stat.instruction_sigp_restart++; /* user space must know about restart */ diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 1766def5bc3f..a9a301866b3c 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -587,8 +587,13 @@ static void pfault_interrupt(unsigned int ext_int_code, } else { /* Completion interrupt was faster than initial * interrupt. Set pfault_wait to -1 so the initial - * interrupt doesn't put the task to sleep. */ - tsk->thread.pfault_wait = -1; + * interrupt doesn't put the task to sleep. + * If the task is not running, ignore the completion + * interrupt since it must be a leftover of a PFAULT + * CANCEL operation which didn't remove all pending + * completion interrupts. */ + if (tsk->state == TASK_RUNNING) + tsk->thread.pfault_wait = -1; } put_task_struct(tsk); } else { diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c index f43c0e4282af..9daee91e6c3f 100644 --- a/arch/s390/oprofile/hwsampler.c +++ b/arch/s390/oprofile/hwsampler.c @@ -22,6 +22,7 @@ #include <asm/irq.h> #include "hwsampler.h" +#include "op_counter.h" #define MAX_NUM_SDB 511 #define MIN_NUM_SDB 1 @@ -896,6 +897,8 @@ static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt, if (sample_data_ptr->P == 1) { /* userspace sample */ unsigned int pid = sample_data_ptr->prim_asn; + if (!counter_config.user) + goto skip_sample; rcu_read_lock(); tsk = pid_task(find_vpid(pid), PIDTYPE_PID); if (tsk) @@ -903,6 +906,8 @@ static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt, rcu_read_unlock(); } else { /* kernelspace sample */ + if (!counter_config.kernel) + goto skip_sample; regs = task_pt_regs(current); } @@ -910,7 +915,7 @@ static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt, oprofile_add_ext_hw_sample(sample_data_ptr->ia, regs, 0, !sample_data_ptr->P, tsk); mutex_unlock(&hws_sem); - + skip_sample: sample_data_ptr++; } } diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c index 6efc18b5e60a..2297be406c61 100644 --- a/arch/s390/oprofile/init.c +++ b/arch/s390/oprofile/init.c @@ -2,10 +2,11 @@ * arch/s390/oprofile/init.c * * S390 Version - * Copyright (C) 2003 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright (C) 2002-2011 IBM Deutschland Entwicklung GmbH, IBM Corporation * Author(s): Thomas Spatzier (tspat@de.ibm.com) * Author(s): Mahesh Salgaonkar (mahesh@linux.vnet.ibm.com) * Author(s): Heinz Graalfs (graalfs@linux.vnet.ibm.com) + * Author(s): Andreas Krebbel (krebbel@linux.vnet.ibm.com) * * @remark Copyright 2002-2011 OProfile authors */ @@ -14,6 +15,8 @@ #include <linux/init.h> #include <linux/errno.h> #include <linux/fs.h> +#include <linux/module.h> +#include <asm/processor.h> #include "../../../drivers/oprofile/oprof.h" @@ -22,6 +25,7 @@ extern void s390_backtrace(struct pt_regs * const regs, unsigned int depth); #ifdef CONFIG_64BIT #include "hwsampler.h" +#include "op_counter.h" #define DEFAULT_INTERVAL 4127518 @@ -35,16 +39,41 @@ static unsigned long oprofile_max_interval; static unsigned long oprofile_sdbt_blocks = DEFAULT_SDBT_BLOCKS; static unsigned long oprofile_sdb_blocks = DEFAULT_SDB_BLOCKS; -static int hwsampler_file; +static int hwsampler_enabled; static int hwsampler_running; /* start_mutex must be held to change */ +static int hwsampler_available; static struct oprofile_operations timer_ops; +struct op_counter_config counter_config; + +enum __force_cpu_type { + reserved = 0, /* do not force */ + timer, +}; +static int force_cpu_type; + +static int set_cpu_type(const char *str, struct kernel_param *kp) +{ + if (!strcmp(str, "timer")) { + force_cpu_type = timer; + printk(KERN_INFO "oprofile: forcing timer to be returned " + "as cpu type\n"); + } else { + force_cpu_type = 0; + } + + return 0; +} +module_param_call(cpu_type, set_cpu_type, NULL, NULL, 0); +MODULE_PARM_DESC(cpu_type, "Force legacy basic mode sampling" + "(report cpu_type \"timer\""); + static int oprofile_hwsampler_start(void) { int retval; - hwsampler_running = hwsampler_file; + hwsampler_running = hwsampler_enabled; if (!hwsampler_running) return timer_ops.start(); @@ -72,10 +101,16 @@ static void oprofile_hwsampler_stop(void) return; } +/* + * File ops used for: + * /dev/oprofile/0/enabled + * /dev/oprofile/hwsampling/hwsampler (cpu_type = timer) + */ + static ssize_t hwsampler_read(struct file *file, char __user *buf, size_t count, loff_t *offset) { - return oprofilefs_ulong_to_user(hwsampler_file, buf, count, offset); + return oprofilefs_ulong_to_user(hwsampler_enabled, buf, count, offset); } static ssize_t hwsampler_write(struct file *file, char const __user *buf, @@ -88,9 +123,12 @@ static ssize_t hwsampler_write(struct file *file, char const __user *buf, return -EINVAL; retval = oprofilefs_ulong_from_user(&val, buf, count); - if (retval) + if (retval <= 0) return retval; + if (val != 0 && val != 1) + return -EINVAL; + if (oprofile_started) /* * save to do without locking as we set @@ -99,7 +137,7 @@ static ssize_t hwsampler_write(struct file *file, char const __user *buf, */ return -EBUSY; - hwsampler_file = val; + hwsampler_enabled = val; return count; } @@ -109,38 +147,311 @@ static const struct file_operations hwsampler_fops = { .write = hwsampler_write, }; +/* + * File ops used for: + * /dev/oprofile/0/count + * /dev/oprofile/hwsampling/hw_interval (cpu_type = timer) + * + * Make sure that the value is within the hardware range. + */ + +static ssize_t hw_interval_read(struct file *file, char __user *buf, + size_t count, loff_t *offset) +{ + return oprofilefs_ulong_to_user(oprofile_hw_interval, buf, + count, offset); +} + +static ssize_t hw_interval_write(struct file *file, char const __user *buf, + size_t count, loff_t *offset) +{ + unsigned long val; + int retval; + + if (*offset) + return -EINVAL; + retval = oprofilefs_ulong_from_user(&val, buf, count); + if (retval) + return retval; + if (val < oprofile_min_interval) + oprofile_hw_interval = oprofile_min_interval; + else if (val > oprofile_max_interval) + oprofile_hw_interval = oprofile_max_interval; + else + oprofile_hw_interval = val; + + return count; +} + +static const struct file_operations hw_interval_fops = { + .read = hw_interval_read, + .write = hw_interval_write, +}; + +/* + * File ops used for: + * /dev/oprofile/0/event + * Only a single event with number 0 is supported with this counter. + * + * /dev/oprofile/0/unit_mask + * This is a dummy file needed by the user space tools. + * No value other than 0 is accepted or returned. + */ + +static ssize_t hwsampler_zero_read(struct file *file, char __user *buf, + size_t count, loff_t *offset) +{ + return oprofilefs_ulong_to_user(0, buf, count, offset); +} + +static ssize_t hwsampler_zero_write(struct file *file, char const __user *buf, + size_t count, loff_t *offset) +{ + unsigned long val; + int retval; + + if (*offset) + return -EINVAL; + + retval = oprofilefs_ulong_from_user(&val, buf, count); + if (retval) + return retval; + if (val != 0) + return -EINVAL; + return count; +} + +static const struct file_operations zero_fops = { + .read = hwsampler_zero_read, + .write = hwsampler_zero_write, +}; + +/* /dev/oprofile/0/kernel file ops. */ + +static ssize_t hwsampler_kernel_read(struct file *file, char __user *buf, + size_t count, loff_t *offset) +{ + return oprofilefs_ulong_to_user(counter_config.kernel, + buf, count, offset); +} + +static ssize_t hwsampler_kernel_write(struct file *file, char const __user *buf, + size_t count, loff_t *offset) +{ + unsigned long val; + int retval; + + if (*offset) + return -EINVAL; + + retval = oprofilefs_ulong_from_user(&val, buf, count); + if (retval) + return retval; + + if (val != 0 && val != 1) + return -EINVAL; + + counter_config.kernel = val; + + return count; +} + +static const struct file_operations kernel_fops = { + .read = hwsampler_kernel_read, + .write = hwsampler_kernel_write, +}; + +/* /dev/oprofile/0/user file ops. */ + +static ssize_t hwsampler_user_read(struct file *file, char __user *buf, + size_t count, loff_t *offset) +{ + return oprofilefs_ulong_to_user(counter_config.user, + buf, count, offset); +} + +static ssize_t hwsampler_user_write(struct file *file, char const __user *buf, + size_t count, loff_t *offset) +{ + unsigned long val; + int retval; + + if (*offset) + return -EINVAL; + + retval = oprofilefs_ulong_from_user(&val, buf, count); + if (retval) + return retval; + + if (val != 0 && val != 1) + return -EINVAL; + + counter_config.user = val; + + return count; +} + +static const struct file_operations user_fops = { + .read = hwsampler_user_read, + .write = hwsampler_user_write, +}; + + +/* + * File ops used for: /dev/oprofile/timer/enabled + * The value always has to be the inverted value of hwsampler_enabled. So + * no separate variable is created. That way we do not need locking. + */ + +static ssize_t timer_enabled_read(struct file *file, char __user *buf, + size_t count, loff_t *offset) +{ + return oprofilefs_ulong_to_user(!hwsampler_enabled, buf, count, offset); +} + +static ssize_t timer_enabled_write(struct file *file, char const __user *buf, + size_t count, loff_t *offset) +{ + unsigned long val; + int retval; + + if (*offset) + return -EINVAL; + + retval = oprofilefs_ulong_from_user(&val, buf, count); + if (retval) + return retval; + + if (val != 0 && val != 1) + return -EINVAL; + + /* Timer cannot be disabled without having hardware sampling. */ + if (val == 0 && !hwsampler_available) + return -EINVAL; + + if (oprofile_started) + /* + * save to do without locking as we set + * hwsampler_running in start() when start_mutex is + * held + */ + return -EBUSY; + + hwsampler_enabled = !val; + + return count; +} + +static const struct file_operations timer_enabled_fops = { + .read = timer_enabled_read, + .write = timer_enabled_write, +}; + + static int oprofile_create_hwsampling_files(struct super_block *sb, - struct dentry *root) + struct dentry *root) { - struct dentry *hw_dir; + struct dentry *dir; + + dir = oprofilefs_mkdir(sb, root, "timer"); + if (!dir) + return -EINVAL; + + oprofilefs_create_file(sb, dir, "enabled", &timer_enabled_fops); + + if (!hwsampler_available) + return 0; /* reinitialize default values */ - hwsampler_file = 1; + hwsampler_enabled = 1; + counter_config.kernel = 1; + counter_config.user = 1; - hw_dir = oprofilefs_mkdir(sb, root, "hwsampling"); - if (!hw_dir) - return -EINVAL; + if (!force_cpu_type) { + /* + * Create the counter file system. A single virtual + * counter is created which can be used to + * enable/disable hardware sampling dynamically from + * user space. The user space will configure a single + * counter with a single event. The value of 'event' + * and 'unit_mask' are not evaluated by the kernel code + * and can only be set to 0. + */ + + dir = oprofilefs_mkdir(sb, root, "0"); + if (!dir) + return -EINVAL; - oprofilefs_create_file(sb, hw_dir, "hwsampler", &hwsampler_fops); - oprofilefs_create_ulong(sb, hw_dir, "hw_interval", - &oprofile_hw_interval); - oprofilefs_create_ro_ulong(sb, hw_dir, "hw_min_interval", - &oprofile_min_interval); - oprofilefs_create_ro_ulong(sb, hw_dir, "hw_max_interval", - &oprofile_max_interval); - oprofilefs_create_ulong(sb, hw_dir, "hw_sdbt_blocks", - &oprofile_sdbt_blocks); + oprofilefs_create_file(sb, dir, "enabled", &hwsampler_fops); + oprofilefs_create_file(sb, dir, "event", &zero_fops); + oprofilefs_create_file(sb, dir, "count", &hw_interval_fops); + oprofilefs_create_file(sb, dir, "unit_mask", &zero_fops); + oprofilefs_create_file(sb, dir, "kernel", &kernel_fops); + oprofilefs_create_file(sb, dir, "user", &user_fops); + oprofilefs_create_ulong(sb, dir, "hw_sdbt_blocks", + &oprofile_sdbt_blocks); + } else { + /* + * Hardware sampling can be used but the cpu_type is + * forced to timer in order to deal with legacy user + * space tools. The /dev/oprofile/hwsampling fs is + * provided in that case. + */ + dir = oprofilefs_mkdir(sb, root, "hwsampling"); + if (!dir) + return -EINVAL; + + oprofilefs_create_file(sb, dir, "hwsampler", + &hwsampler_fops); + oprofilefs_create_file(sb, dir, "hw_interval", + &hw_interval_fops); + oprofilefs_create_ro_ulong(sb, dir, "hw_min_interval", + &oprofile_min_interval); + oprofilefs_create_ro_ulong(sb, dir, "hw_max_interval", + &oprofile_max_interval); + oprofilefs_create_ulong(sb, dir, "hw_sdbt_blocks", + &oprofile_sdbt_blocks); + } return 0; } static int oprofile_hwsampler_init(struct oprofile_operations *ops) { + /* + * Initialize the timer mode infrastructure as well in order + * to be able to switch back dynamically. oprofile_timer_init + * is not supposed to fail. + */ + if (oprofile_timer_init(ops)) + BUG(); + + memcpy(&timer_ops, ops, sizeof(timer_ops)); + ops->create_files = oprofile_create_hwsampling_files; + + /* + * If the user space tools do not support newer cpu types, + * the force_cpu_type module parameter + * can be used to always return \"timer\" as cpu type. + */ + if (force_cpu_type != timer) { + struct cpuid id; + + get_cpu_id (&id); + + switch (id.machine) { + case 0x2097: case 0x2098: ops->cpu_type = "s390/z10"; break; + case 0x2817: case 0x2818: ops->cpu_type = "s390/z196"; break; + default: return -ENODEV; + } + } + if (hwsampler_setup()) return -ENODEV; /* - * create hwsampler files only if hwsampler_setup() succeeds. + * Query the range for the sampling interval from the + * hardware. */ oprofile_min_interval = hwsampler_query_min_interval(); if (oprofile_min_interval == 0) @@ -155,23 +466,17 @@ static int oprofile_hwsampler_init(struct oprofile_operations *ops) if (oprofile_hw_interval > oprofile_max_interval) oprofile_hw_interval = oprofile_max_interval; - if (oprofile_timer_init(ops)) - return -ENODEV; - - printk(KERN_INFO "oprofile: using hardware sampling\n"); - - memcpy(&timer_ops, ops, sizeof(timer_ops)); + printk(KERN_INFO "oprofile: System z hardware sampling " + "facility found.\n"); ops->start = oprofile_hwsampler_start; ops->stop = oprofile_hwsampler_stop; - ops->create_files = oprofile_create_hwsampling_files; return 0; } static void oprofile_hwsampler_exit(void) { - oprofile_timer_exit(); hwsampler_shutdown(); } @@ -182,7 +487,15 @@ int __init oprofile_arch_init(struct oprofile_operations *ops) ops->backtrace = s390_backtrace; #ifdef CONFIG_64BIT - return oprofile_hwsampler_init(ops); + + /* + * -ENODEV is not reported to the caller. The module itself + * will use the timer mode sampling as fallback and this is + * always available. + */ + hwsampler_available = oprofile_hwsampler_init(ops) == 0; + + return 0; #else return -ENODEV; #endif diff --git a/arch/s390/oprofile/op_counter.h b/arch/s390/oprofile/op_counter.h new file mode 100644 index 000000000000..1a8d3ca09014 --- /dev/null +++ b/arch/s390/oprofile/op_counter.h @@ -0,0 +1,23 @@ +/** + * arch/s390/oprofile/op_counter.h + * + * Copyright (C) 2011 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Author(s): Andreas Krebbel (krebbel@linux.vnet.ibm.com) + * + * @remark Copyright 2011 OProfile authors + */ + +#ifndef OP_COUNTER_H +#define OP_COUNTER_H + +struct op_counter_config { + /* `enabled' maps to the hwsampler_file variable. */ + /* `count' maps to the oprofile_hw_interval variable. */ + /* `event' and `unit_mask' are unused. */ + unsigned long kernel; + unsigned long user; +}; + +extern struct op_counter_config counter_config; + +#endif /* OP_COUNTER_H */ |
