summaryrefslogtreecommitdiff
path: root/arch/s390
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390')
-rw-r--r--arch/s390/Kconfig7
-rw-r--r--arch/s390/appldata/appldata_os.c16
-rw-r--r--arch/s390/crypto/crypt_s390.h7
-rw-r--r--arch/s390/include/asm/cputime.h142
-rw-r--r--arch/s390/include/asm/kvm_host.h3
-rw-r--r--arch/s390/include/asm/pgtable.h20
-rw-r--r--arch/s390/include/asm/setup.h3
-rw-r--r--arch/s390/include/asm/socket.h3
-rw-r--r--arch/s390/include/asm/timex.h2
-rw-r--r--arch/s390/include/asm/unistd.h4
-rw-r--r--arch/s390/kernel/compat_wrapper.S20
-rw-r--r--arch/s390/kernel/early.c2
-rw-r--r--arch/s390/kernel/process.c6
-rw-r--r--arch/s390/kernel/ptrace.c30
-rw-r--r--arch/s390/kernel/setup.c10
-rw-r--r--arch/s390/kernel/signal.c8
-rw-r--r--arch/s390/kernel/syscalls.S2
-rw-r--r--arch/s390/kernel/topology.c45
-rw-r--r--arch/s390/kernel/vmlinux.lds.S2
-rw-r--r--arch/s390/kvm/diag.c2
-rw-r--r--arch/s390/kvm/intercept.c3
-rw-r--r--arch/s390/kvm/interrupt.c1
-rw-r--r--arch/s390/kvm/kvm-s390.c12
-rw-r--r--arch/s390/kvm/priv.c10
-rw-r--r--arch/s390/kvm/sigp.c45
-rw-r--r--arch/s390/mm/fault.c9
-rw-r--r--arch/s390/oprofile/hwsampler.c7
-rw-r--r--arch/s390/oprofile/init.c375
-rw-r--r--arch/s390/oprofile/op_counter.h23
29 files changed, 646 insertions, 173 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index a9fbd43395f7..d48ede334434 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -92,6 +92,9 @@ config S390
select HAVE_ARCH_JUMP_LABEL if !MARCH_G5
select HAVE_RCU_TABLE_FREE if SMP
select ARCH_SAVE_PAGE_KEYS if HIBERNATION
+ select HAVE_MEMBLOCK
+ select HAVE_MEMBLOCK_NODE_MAP
+ select ARCH_DISCARD_MEMBLOCK
select ARCH_INLINE_SPIN_TRYLOCK
select ARCH_INLINE_SPIN_TRYLOCK_BH
select ARCH_INLINE_SPIN_LOCK
@@ -345,9 +348,6 @@ config WARN_DYNAMIC_STACK
Say N if you are unsure.
-config ARCH_POPULATES_NODE_MAP
- def_bool y
-
comment "Kernel preemption"
source "kernel/Kconfig.preempt"
@@ -572,6 +572,7 @@ config KEXEC
config CRASH_DUMP
bool "kernel crash dumps"
depends on 64BIT
+ select KEXEC
help
Generate crash dump after being started by kexec.
Crash dump kernels are loaded in the main kernel with kexec-tools
diff --git a/arch/s390/appldata/appldata_os.c b/arch/s390/appldata/appldata_os.c
index 92f1cb745d69..4de031d6b76c 100644
--- a/arch/s390/appldata/appldata_os.c
+++ b/arch/s390/appldata/appldata_os.c
@@ -115,21 +115,21 @@ static void appldata_get_os_data(void *data)
j = 0;
for_each_online_cpu(i) {
os_data->os_cpu[j].per_cpu_user =
- cputime_to_jiffies(kstat_cpu(i).cpustat.user);
+ cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_USER]);
os_data->os_cpu[j].per_cpu_nice =
- cputime_to_jiffies(kstat_cpu(i).cpustat.nice);
+ cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_NICE]);
os_data->os_cpu[j].per_cpu_system =
- cputime_to_jiffies(kstat_cpu(i).cpustat.system);
+ cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM]);
os_data->os_cpu[j].per_cpu_idle =
- cputime_to_jiffies(kstat_cpu(i).cpustat.idle);
+ cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IDLE]);
os_data->os_cpu[j].per_cpu_irq =
- cputime_to_jiffies(kstat_cpu(i).cpustat.irq);
+ cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IRQ]);
os_data->os_cpu[j].per_cpu_softirq =
- cputime_to_jiffies(kstat_cpu(i).cpustat.softirq);
+ cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_SOFTIRQ]);
os_data->os_cpu[j].per_cpu_iowait =
- cputime_to_jiffies(kstat_cpu(i).cpustat.iowait);
+ cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IOWAIT]);
os_data->os_cpu[j].per_cpu_steal =
- cputime_to_jiffies(kstat_cpu(i).cpustat.steal);
+ cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_STEAL]);
os_data->os_cpu[j].cpu_id = i;
j++;
}
diff --git a/arch/s390/crypto/crypt_s390.h b/arch/s390/crypto/crypt_s390.h
index 49676771bd66..ffd1ac255f19 100644
--- a/arch/s390/crypto/crypt_s390.h
+++ b/arch/s390/crypto/crypt_s390.h
@@ -368,9 +368,12 @@ static inline int crypt_s390_func_available(int func,
if (facility_mask & CRYPT_S390_MSA && !test_facility(17))
return 0;
- if (facility_mask & CRYPT_S390_MSA3 && !test_facility(76))
+
+ if (facility_mask & CRYPT_S390_MSA3 &&
+ (!test_facility(2) || !test_facility(76)))
return 0;
- if (facility_mask & CRYPT_S390_MSA4 && !test_facility(77))
+ if (facility_mask & CRYPT_S390_MSA4 &&
+ (!test_facility(2) || !test_facility(77)))
return 0;
switch (func & CRYPT_S390_OP_MASK) {
diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h
index 081434878296..c23c3900c304 100644
--- a/arch/s390/include/asm/cputime.h
+++ b/arch/s390/include/asm/cputime.h
@@ -16,114 +16,100 @@
/* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */
-typedef unsigned long long cputime_t;
-typedef unsigned long long cputime64_t;
+typedef unsigned long long __nocast cputime_t;
+typedef unsigned long long __nocast cputime64_t;
-#ifndef __s390x__
-
-static inline unsigned int
-__div(unsigned long long n, unsigned int base)
+static inline unsigned long __div(unsigned long long n, unsigned long base)
{
+#ifndef __s390x__
register_pair rp;
rp.pair = n >> 1;
asm ("dr %0,%1" : "+d" (rp) : "d" (base >> 1));
return rp.subreg.odd;
+#else /* __s390x__ */
+ return n / base;
+#endif /* __s390x__ */
}
-#else /* __s390x__ */
+#define cputime_one_jiffy jiffies_to_cputime(1)
+
+/*
+ * Convert cputime to jiffies and back.
+ */
+static inline unsigned long cputime_to_jiffies(const cputime_t cputime)
+{
+ return __div((__force unsigned long long) cputime, 4096000000ULL / HZ);
+}
-static inline unsigned int
-__div(unsigned long long n, unsigned int base)
+static inline cputime_t jiffies_to_cputime(const unsigned int jif)
{
- return n / base;
+ return (__force cputime_t)(jif * (4096000000ULL / HZ));
}
-#endif /* __s390x__ */
+static inline u64 cputime64_to_jiffies64(cputime64_t cputime)
+{
+ unsigned long long jif = (__force unsigned long long) cputime;
+ do_div(jif, 4096000000ULL / HZ);
+ return jif;
+}
-#define cputime_zero (0ULL)
-#define cputime_one_jiffy jiffies_to_cputime(1)
-#define cputime_max ((~0UL >> 1) - 1)
-#define cputime_add(__a, __b) ((__a) + (__b))
-#define cputime_sub(__a, __b) ((__a) - (__b))
-#define cputime_div(__a, __n) ({ \
- unsigned long long __div = (__a); \
- do_div(__div,__n); \
- __div; \
-})
-#define cputime_halve(__a) ((__a) >> 1)
-#define cputime_eq(__a, __b) ((__a) == (__b))
-#define cputime_gt(__a, __b) ((__a) > (__b))
-#define cputime_ge(__a, __b) ((__a) >= (__b))
-#define cputime_lt(__a, __b) ((__a) < (__b))
-#define cputime_le(__a, __b) ((__a) <= (__b))
-#define cputime_to_jiffies(__ct) (__div((__ct), 4096000000ULL / HZ))
-#define cputime_to_scaled(__ct) (__ct)
-#define jiffies_to_cputime(__hz) ((cputime_t)(__hz) * (4096000000ULL / HZ))
-
-#define cputime64_zero (0ULL)
-#define cputime64_add(__a, __b) ((__a) + (__b))
-#define cputime_to_cputime64(__ct) (__ct)
-
-static inline u64
-cputime64_to_jiffies64(cputime64_t cputime)
-{
- do_div(cputime, 4096000000ULL / HZ);
- return cputime;
+static inline cputime64_t jiffies64_to_cputime64(const u64 jif)
+{
+ return (__force cputime64_t)(jif * (4096000000ULL / HZ));
}
/*
* Convert cputime to microseconds and back.
*/
-static inline unsigned int
-cputime_to_usecs(const cputime_t cputime)
+static inline unsigned int cputime_to_usecs(const cputime_t cputime)
{
- return cputime_div(cputime, 4096);
+ return (__force unsigned long long) cputime >> 12;
}
-static inline cputime_t
-usecs_to_cputime(const unsigned int m)
+static inline cputime_t usecs_to_cputime(const unsigned int m)
{
- return (cputime_t) m * 4096;
+ return (__force cputime_t)(m * 4096ULL);
}
+#define usecs_to_cputime64(m) usecs_to_cputime(m)
+
/*
* Convert cputime to milliseconds and back.
*/
-static inline unsigned int
-cputime_to_secs(const cputime_t cputime)
+static inline unsigned int cputime_to_secs(const cputime_t cputime)
{
- return __div(cputime, 2048000000) >> 1;
+ return __div((__force unsigned long long) cputime, 2048000000) >> 1;
}
-static inline cputime_t
-secs_to_cputime(const unsigned int s)
+static inline cputime_t secs_to_cputime(const unsigned int s)
{
- return (cputime_t) s * 4096000000ULL;
+ return (__force cputime_t)(s * 4096000000ULL);
}
/*
* Convert cputime to timespec and back.
*/
-static inline cputime_t
-timespec_to_cputime(const struct timespec *value)
+static inline cputime_t timespec_to_cputime(const struct timespec *value)
{
- return value->tv_nsec * 4096 / 1000 + (u64) value->tv_sec * 4096000000ULL;
+ unsigned long long ret = value->tv_sec * 4096000000ULL;
+ return (__force cputime_t)(ret + value->tv_nsec * 4096 / 1000);
}
-static inline void
-cputime_to_timespec(const cputime_t cputime, struct timespec *value)
+static inline void cputime_to_timespec(const cputime_t cputime,
+ struct timespec *value)
{
+ unsigned long long __cputime = (__force unsigned long long) cputime;
#ifndef __s390x__
register_pair rp;
- rp.pair = cputime >> 1;
+ rp.pair = __cputime >> 1;
asm ("dr %0,%1" : "+d" (rp) : "d" (2048000000UL));
value->tv_nsec = rp.subreg.even * 1000 / 4096;
value->tv_sec = rp.subreg.odd;
#else
- value->tv_nsec = (cputime % 4096000000ULL) * 1000 / 4096;
- value->tv_sec = cputime / 4096000000ULL;
+ value->tv_nsec = (__cputime % 4096000000ULL) * 1000 / 4096;
+ value->tv_sec = __cputime / 4096000000ULL;
#endif
}
@@ -132,50 +118,52 @@ cputime_to_timespec(const cputime_t cputime, struct timespec *value)
* Since cputime and timeval have the same resolution (microseconds)
* this is easy.
*/
-static inline cputime_t
-timeval_to_cputime(const struct timeval *value)
+static inline cputime_t timeval_to_cputime(const struct timeval *value)
{
- return value->tv_usec * 4096 + (u64) value->tv_sec * 4096000000ULL;
+ unsigned long long ret = value->tv_sec * 4096000000ULL;
+ return (__force cputime_t)(ret + value->tv_usec * 4096ULL);
}
-static inline void
-cputime_to_timeval(const cputime_t cputime, struct timeval *value)
+static inline void cputime_to_timeval(const cputime_t cputime,
+ struct timeval *value)
{
+ unsigned long long __cputime = (__force unsigned long long) cputime;
#ifndef __s390x__
register_pair rp;
- rp.pair = cputime >> 1;
+ rp.pair = __cputime >> 1;
asm ("dr %0,%1" : "+d" (rp) : "d" (2048000000UL));
value->tv_usec = rp.subreg.even / 4096;
value->tv_sec = rp.subreg.odd;
#else
- value->tv_usec = (cputime % 4096000000ULL) / 4096;
- value->tv_sec = cputime / 4096000000ULL;
+ value->tv_usec = (__cputime % 4096000000ULL) / 4096;
+ value->tv_sec = __cputime / 4096000000ULL;
#endif
}
/*
* Convert cputime to clock and back.
*/
-static inline clock_t
-cputime_to_clock_t(cputime_t cputime)
+static inline clock_t cputime_to_clock_t(cputime_t cputime)
{
- return cputime_div(cputime, 4096000000ULL / USER_HZ);
+ unsigned long long clock = (__force unsigned long long) cputime;
+ do_div(clock, 4096000000ULL / USER_HZ);
+ return clock;
}
-static inline cputime_t
-clock_t_to_cputime(unsigned long x)
+static inline cputime_t clock_t_to_cputime(unsigned long x)
{
- return (cputime_t) x * (4096000000ULL / USER_HZ);
+ return (__force cputime_t)(x * (4096000000ULL / USER_HZ));
}
/*
* Convert cputime64 to clock.
*/
-static inline clock_t
-cputime64_to_clock_t(cputime64_t cputime)
+static inline clock_t cputime64_to_clock_t(cputime64_t cputime)
{
- return cputime_div(cputime, 4096000000ULL / USER_HZ);
+ unsigned long long clock = (__force unsigned long long) cputime;
+ do_div(clock, 4096000000ULL / USER_HZ);
+ return clock;
}
struct s390_idle_data {
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 24e18473d926..b0c235cb6ad5 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -47,7 +47,7 @@ struct sca_block {
#define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1))
#define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE)
-#define CPUSTAT_HOST 0x80000000
+#define CPUSTAT_STOPPED 0x80000000
#define CPUSTAT_WAIT 0x10000000
#define CPUSTAT_ECALL_PEND 0x08000000
#define CPUSTAT_STOP_INT 0x04000000
@@ -139,6 +139,7 @@ struct kvm_vcpu_stat {
u32 instruction_stfl;
u32 instruction_tprot;
u32 instruction_sigp_sense;
+ u32 instruction_sigp_sense_running;
u32 instruction_sigp_external_call;
u32 instruction_sigp_emergency;
u32 instruction_sigp_stop;
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 34ede0ea85a9..4f289ff0b7fe 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -593,14 +593,16 @@ static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste)
unsigned long address, bits;
unsigned char skey;
+ if (!pte_present(*ptep))
+ return pgste;
address = pte_val(*ptep) & PAGE_MASK;
skey = page_get_storage_key(address);
bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
/* Clear page changed & referenced bit in the storage key */
- if (bits) {
- skey ^= bits;
- page_set_storage_key(address, skey, 1);
- }
+ if (bits & _PAGE_CHANGED)
+ page_set_storage_key(address, skey ^ bits, 1);
+ else if (bits)
+ page_reset_referenced(address);
/* Transfer page changed & referenced bit to guest bits in pgste */
pgste_val(pgste) |= bits << 48; /* RCP_GR_BIT & RCP_GC_BIT */
/* Get host changed & referenced bits from pgste */
@@ -625,6 +627,8 @@ static inline pgste_t pgste_update_young(pte_t *ptep, pgste_t pgste)
#ifdef CONFIG_PGSTE
int young;
+ if (!pte_present(*ptep))
+ return pgste;
young = page_reset_referenced(pte_val(*ptep) & PAGE_MASK);
/* Transfer page referenced bit to pte software bit (host view) */
if (young || (pgste_val(pgste) & RCP_HR_BIT))
@@ -638,13 +642,15 @@ static inline pgste_t pgste_update_young(pte_t *ptep, pgste_t pgste)
}
-static inline void pgste_set_pte(pte_t *ptep, pgste_t pgste)
+static inline void pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry)
{
#ifdef CONFIG_PGSTE
unsigned long address;
unsigned long okey, nkey;
- address = pte_val(*ptep) & PAGE_MASK;
+ if (!pte_present(entry))
+ return;
+ address = pte_val(entry) & PAGE_MASK;
okey = nkey = page_get_storage_key(address);
nkey &= ~(_PAGE_ACC_BITS | _PAGE_FP_BIT);
/* Set page access key and fetch protection bit from pgste */
@@ -712,7 +718,7 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
if (mm_has_pgste(mm)) {
pgste = pgste_get_lock(ptep);
- pgste_set_pte(ptep, pgste);
+ pgste_set_pte(ptep, pgste, entry);
*ptep = entry;
pgste_set_unlock(ptep, pgste);
} else
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index 5a099714df04..097183c70407 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -82,6 +82,7 @@ extern unsigned int user_mode;
#define MACHINE_FLAG_LPAR (1UL << 12)
#define MACHINE_FLAG_SPP (1UL << 13)
#define MACHINE_FLAG_TOPOLOGY (1UL << 14)
+#define MACHINE_FLAG_STCKF (1UL << 15)
#define MACHINE_IS_VM (S390_lowcore.machine_flags & MACHINE_FLAG_VM)
#define MACHINE_IS_KVM (S390_lowcore.machine_flags & MACHINE_FLAG_KVM)
@@ -100,6 +101,7 @@ extern unsigned int user_mode;
#define MACHINE_HAS_PFMF (0)
#define MACHINE_HAS_SPP (0)
#define MACHINE_HAS_TOPOLOGY (0)
+#define MACHINE_HAS_STCKF (0)
#else /* __s390x__ */
#define MACHINE_HAS_IEEE (1)
#define MACHINE_HAS_CSP (1)
@@ -111,6 +113,7 @@ extern unsigned int user_mode;
#define MACHINE_HAS_PFMF (S390_lowcore.machine_flags & MACHINE_FLAG_PFMF)
#define MACHINE_HAS_SPP (S390_lowcore.machine_flags & MACHINE_FLAG_SPP)
#define MACHINE_HAS_TOPOLOGY (S390_lowcore.machine_flags & MACHINE_FLAG_TOPOLOGY)
+#define MACHINE_HAS_STCKF (S390_lowcore.machine_flags & MACHINE_FLAG_STCKF)
#endif /* __s390x__ */
#define ZFCPDUMP_HSA_SIZE (32UL<<20)
diff --git a/arch/s390/include/asm/socket.h b/arch/s390/include/asm/socket.h
index fdff1e995c73..67b5c1b14b51 100644
--- a/arch/s390/include/asm/socket.h
+++ b/arch/s390/include/asm/socket.h
@@ -70,4 +70,7 @@
#define SO_RXQ_OVFL 40
+#define SO_WIFI_STATUS 41
+#define SCM_WIFI_STATUS SO_WIFI_STATUS
+
#endif /* _ASM_SOCKET_H */
diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h
index d610bef9c5e9..c447a27a7fdb 100644
--- a/arch/s390/include/asm/timex.h
+++ b/arch/s390/include/asm/timex.h
@@ -90,7 +90,7 @@ static inline unsigned long long get_clock_fast(void)
{
unsigned long long clk;
- if (test_facility(25))
+ if (MACHINE_HAS_STCKF)
asm volatile(".insn s,0xb27c0000,%0" : "=Q" (clk) : : "cc");
else
clk = get_clock();
diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h
index 404bdb9671b4..58de4c91c333 100644
--- a/arch/s390/include/asm/unistd.h
+++ b/arch/s390/include/asm/unistd.h
@@ -277,7 +277,9 @@
#define __NR_clock_adjtime 337
#define __NR_syncfs 338
#define __NR_setns 339
-#define NR_syscalls 340
+#define __NR_process_vm_readv 340
+#define __NR_process_vm_writev 341
+#define NR_syscalls 342
/*
* There are some system calls that are not present on 64 bit, some
diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S
index 5006a1d9f5d0..18c51df9fe06 100644
--- a/arch/s390/kernel/compat_wrapper.S
+++ b/arch/s390/kernel/compat_wrapper.S
@@ -1627,3 +1627,23 @@ ENTRY(sys_setns_wrapper)
lgfr %r2,%r2 # int
lgfr %r3,%r3 # int
jg sys_setns
+
+ENTRY(compat_sys_process_vm_readv_wrapper)
+ lgfr %r2,%r2 # compat_pid_t
+ llgtr %r3,%r3 # struct compat_iovec __user *
+ llgfr %r4,%r4 # unsigned long
+ llgtr %r5,%r5 # struct compat_iovec __user *
+ llgfr %r6,%r6 # unsigned long
+ llgf %r0,164(%r15) # unsigned long
+ stg %r0,160(%r15)
+ jg sys_process_vm_readv
+
+ENTRY(compat_sys_process_vm_writev_wrapper)
+ lgfr %r2,%r2 # compat_pid_t
+ llgtr %r3,%r3 # struct compat_iovec __user *
+ llgfr %r4,%r4 # unsigned long
+ llgtr %r5,%r5 # struct compat_iovec __user *
+ llgfr %r6,%r6 # unsigned long
+ llgf %r0,164(%r15) # unsigned long
+ stg %r0,160(%r15)
+ jg sys_process_vm_writev
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 37394b3413e2..c9ffe0025197 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -390,6 +390,8 @@ static __init void detect_machine_facilities(void)
S390_lowcore.machine_flags |= MACHINE_FLAG_MVCOS;
if (test_facility(40))
S390_lowcore.machine_flags |= MACHINE_FLAG_SPP;
+ if (test_facility(25))
+ S390_lowcore.machine_flags |= MACHINE_FLAG_STCKF;
#endif
}
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 9451b210a1b4..3201ae447990 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -91,10 +91,12 @@ static void default_idle(void)
void cpu_idle(void)
{
for (;;) {
- tick_nohz_stop_sched_tick(1);
+ tick_nohz_idle_enter();
+ rcu_idle_enter();
while (!need_resched())
default_idle();
- tick_nohz_restart_sched_tick();
+ rcu_idle_exit();
+ tick_nohz_idle_exit();
preempt_enable_no_resched();
schedule();
preempt_disable();
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 450931a45b68..573bc29551ef 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -296,13 +296,6 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data)
((data & PSW_MASK_EA) && !(data & PSW_MASK_BA))))
/* Invalid psw mask. */
return -EINVAL;
- if (addr == (addr_t) &dummy->regs.psw.addr)
- /*
- * The debugger changed the instruction address,
- * reset system call restart, see signal.c:do_signal
- */
- task_thread_info(child)->system_call = 0;
-
*(addr_t *)((addr_t) &task_pt_regs(child)->psw + addr) = data;
} else if (addr < (addr_t) (&dummy->regs.orig_gpr2)) {
@@ -614,11 +607,6 @@ static int __poke_user_compat(struct task_struct *child,
/* Transfer 31 bit amode bit to psw mask. */
regs->psw.mask = (regs->psw.mask & ~PSW_MASK_BA) |
(__u64)(tmp & PSW32_ADDR_AMODE);
- /*
- * The debugger changed the instruction address,
- * reset system call restart, see signal.c:do_signal
- */
- task_thread_info(child)->system_call = 0;
} else {
/* gpr 0-15 */
*(__u32*)((addr_t) &regs->psw + addr*2 + 4) = tmp;
@@ -905,6 +893,14 @@ static int s390_last_break_get(struct task_struct *target,
return 0;
}
+static int s390_last_break_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ return 0;
+}
+
#endif
static int s390_system_call_get(struct task_struct *target,
@@ -951,6 +947,7 @@ static const struct user_regset s390_regsets[] = {
.size = sizeof(long),
.align = sizeof(long),
.get = s390_last_break_get,
+ .set = s390_last_break_set,
},
#endif
[REGSET_SYSTEM_CALL] = {
@@ -1116,6 +1113,14 @@ static int s390_compat_last_break_get(struct task_struct *target,
return 0;
}
+static int s390_compat_last_break_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ return 0;
+}
+
static const struct user_regset s390_compat_regsets[] = {
[REGSET_GENERAL] = {
.core_note_type = NT_PRSTATUS,
@@ -1139,6 +1144,7 @@ static const struct user_regset s390_compat_regsets[] = {
.size = sizeof(long),
.align = sizeof(long),
.get = s390_compat_last_break_get,
+ .set = s390_compat_last_break_set,
},
[REGSET_SYSTEM_CALL] = {
.core_note_type = NT_S390_SYSTEM_CALL,
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 8ac6bfa2786c..f11d1b037c50 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -21,6 +21,7 @@
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/kernel.h>
+#include <linux/memblock.h>
#include <linux/mm.h>
#include <linux/stddef.h>
#include <linux/unistd.h>
@@ -211,6 +212,8 @@ static void __init setup_zfcpdump(unsigned int console_devno)
if (ipl_info.type != IPL_TYPE_FCP_DUMP)
return;
+ if (OLDMEM_BASE)
+ return;
if (console_devno != -1)
sprintf(str, " cio_ignore=all,!0.0.%04x,!0.0.%04x",
ipl_info.data.fcp.dev_id.devno, console_devno);
@@ -482,7 +485,7 @@ static void __init setup_memory_end(void)
#ifdef CONFIG_ZFCPDUMP
- if (ipl_info.type == IPL_TYPE_FCP_DUMP) {
+ if (ipl_info.type == IPL_TYPE_FCP_DUMP && !OLDMEM_BASE) {
memory_end = ZFCPDUMP_HSA_SIZE;
memory_end_set = 1;
}
@@ -577,7 +580,7 @@ static unsigned long __init find_crash_base(unsigned long crash_size,
*msg = "first memory chunk must be at least crashkernel size";
return 0;
}
- if (is_kdump_kernel() && (crash_size == OLDMEM_SIZE))
+ if (OLDMEM_BASE && crash_size == OLDMEM_SIZE)
return OLDMEM_BASE;
for (i = MEMORY_CHUNKS - 1; i >= 0; i--) {
@@ -818,7 +821,8 @@ setup_memory(void)
end_chunk = min(end_chunk, end_pfn);
if (start_chunk >= end_chunk)
continue;
- add_active_range(0, start_chunk, end_chunk);
+ memblock_add_node(PFN_PHYS(start_chunk),
+ PFN_PHYS(end_chunk - start_chunk), 0);
pfn = max(start_chunk, start_pfn);
for (; pfn < end_chunk; pfn++)
page_set_storage_key(PFN_PHYS(pfn),
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index 05a85bc14c98..7f6f9f354545 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -460,9 +460,9 @@ void do_signal(struct pt_regs *regs)
regs->svc_code >> 16);
break;
}
- /* No longer in a system call */
- clear_thread_flag(TIF_SYSCALL);
}
+ /* No longer in a system call */
+ clear_thread_flag(TIF_SYSCALL);
if ((is_compat_task() ?
handle_signal32(signr, &ka, &info, oldset, regs) :
@@ -486,6 +486,7 @@ void do_signal(struct pt_regs *regs)
}
/* No handlers present - check for system call restart */
+ clear_thread_flag(TIF_SYSCALL);
if (current_thread_info()->system_call) {
regs->svc_code = current_thread_info()->system_call;
switch (regs->gprs[2]) {
@@ -500,9 +501,6 @@ void do_signal(struct pt_regs *regs)
regs->gprs[2] = regs->orig_gpr2;
set_thread_flag(TIF_SYSCALL);
break;
- default:
- clear_thread_flag(TIF_SYSCALL);
- break;
}
}
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index 73eb08c874fb..bcab2f04ba58 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -348,3 +348,5 @@ SYSCALL(sys_open_by_handle_at,sys_open_by_handle_at,compat_sys_open_by_handle_at
SYSCALL(sys_clock_adjtime,sys_clock_adjtime,compat_sys_clock_adjtime_wrapper)
SYSCALL(sys_syncfs,sys_syncfs,sys_syncfs_wrapper)
SYSCALL(sys_setns,sys_setns,sys_setns_wrapper)
+SYSCALL(sys_process_vm_readv,sys_process_vm_readv,compat_sys_process_vm_readv_wrapper) /* 340 */
+SYSCALL(sys_process_vm_writev,sys_process_vm_writev,compat_sys_process_vm_writev_wrapper)
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 77b8942b9a15..fdb5b8cb260f 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -68,8 +68,10 @@ static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu)
return mask;
}
-static void add_cpus_to_mask(struct topology_cpu *tl_cpu,
- struct mask_info *book, struct mask_info *core)
+static struct mask_info *add_cpus_to_mask(struct topology_cpu *tl_cpu,
+ struct mask_info *book,
+ struct mask_info *core,
+ int z10)
{
unsigned int cpu;
@@ -88,10 +90,16 @@ static void add_cpus_to_mask(struct topology_cpu *tl_cpu,
cpu_book_id[lcpu] = book->id;
#endif
cpumask_set_cpu(lcpu, &core->mask);
- cpu_core_id[lcpu] = core->id;
+ if (z10) {
+ cpu_core_id[lcpu] = rcpu;
+ core = core->next;
+ } else {
+ cpu_core_id[lcpu] = core->id;
+ }
smp_cpu_polarization[lcpu] = tl_cpu->pp;
}
}
+ return core;
}
static void clear_masks(void)
@@ -123,18 +131,41 @@ static void tl_to_cores(struct sysinfo_15_1_x *info)
{
#ifdef CONFIG_SCHED_BOOK
struct mask_info *book = &book_info;
+ struct cpuid cpu_id;
#else
struct mask_info *book = NULL;
#endif
struct mask_info *core = &core_info;
union topology_entry *tle, *end;
+ int z10 = 0;
-
+#ifdef CONFIG_SCHED_BOOK
+ get_cpu_id(&cpu_id);
+ z10 = cpu_id.machine == 0x2097 || cpu_id.machine == 0x2098;
+#endif
spin_lock_irq(&topology_lock);
clear_masks();
tle = info->tle;
end = (union topology_entry *)((unsigned long)info + info->length);
while (tle < end) {
+#ifdef CONFIG_SCHED_BOOK
+ if (z10) {
+ switch (tle->nl) {
+ case 1:
+ book = book->next;
+ book->id = tle->container.id;
+ break;
+ case 0:
+ core = add_cpus_to_mask(&tle->cpu, book, core, z10);
+ break;
+ default:
+ clear_masks();
+ goto out;
+ }
+ tle = next_tle(tle);
+ continue;
+ }
+#endif
switch (tle->nl) {
#ifdef CONFIG_SCHED_BOOK
case 2:
@@ -147,7 +178,7 @@ static void tl_to_cores(struct sysinfo_15_1_x *info)
core->id = tle->container.id;
break;
case 0:
- add_cpus_to_mask(&tle->cpu, book, core);
+ add_cpus_to_mask(&tle->cpu, book, core, z10);
break;
default:
clear_masks();
@@ -328,8 +359,8 @@ void __init s390_init_cpu_topology(void)
for (i = 0; i < TOPOLOGY_NR_MAG; i++)
printk(" %d", info->mag[i]);
printk(" / %d\n", info->mnest);
- alloc_masks(info, &core_info, 2);
+ alloc_masks(info, &core_info, 1);
#ifdef CONFIG_SCHED_BOOK
- alloc_masks(info, &book_info, 3);
+ alloc_masks(info, &book_info, 2);
#endif
}
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index 56fe6bc81fee..e4c79ebb40e6 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -43,6 +43,8 @@ SECTIONS
NOTES :text :note
+ .dummy : { *(.dummy) } :data
+
RODATA
#ifdef CONFIG_SHARED_KERNEL
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 87cedd61be04..8943e82cd4d9 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -70,7 +70,7 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu)
return -EOPNOTSUPP;
}
- atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
+ atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
vcpu->run->s390_reset_flags |= KVM_S390_RESET_SUBSYSTEM;
vcpu->run->s390_reset_flags |= KVM_S390_RESET_IPL;
vcpu->run->s390_reset_flags |= KVM_S390_RESET_CPU_INIT;
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index c7c51898984e..02434543eabb 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -132,7 +132,6 @@ static int handle_stop(struct kvm_vcpu *vcpu)
int rc = 0;
vcpu->stat.exit_stop_request++;
- atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
spin_lock_bh(&vcpu->arch.local_int.lock);
if (vcpu->arch.local_int.action_bits & ACTION_STORE_ON_STOP) {
vcpu->arch.local_int.action_bits &= ~ACTION_STORE_ON_STOP;
@@ -149,6 +148,8 @@ static int handle_stop(struct kvm_vcpu *vcpu)
}
if (vcpu->arch.local_int.action_bits & ACTION_STOP_ON_STOP) {
+ atomic_set_mask(CPUSTAT_STOPPED,
+ &vcpu->arch.sie_block->cpuflags);
vcpu->arch.local_int.action_bits &= ~ACTION_STOP_ON_STOP;
VCPU_EVENT(vcpu, 3, "%s", "cpu stopped");
rc = -EOPNOTSUPP;
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 87c16705b381..278ee009ce65 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -252,6 +252,7 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
offsetof(struct _lowcore, restart_psw), sizeof(psw_t));
if (rc == -EFAULT)
exception = 1;
+ atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
break;
case KVM_S390_PROGRAM_INT:
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 0bd3bea1e4cd..d1c445732451 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -65,6 +65,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
+ { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
@@ -127,6 +128,7 @@ int kvm_dev_ioctl_check_extension(long ext)
switch (ext) {
case KVM_CAP_S390_PSW:
case KVM_CAP_S390_GMAP:
+ case KVM_CAP_SYNC_MMU:
r = 1;
break;
default:
@@ -270,10 +272,12 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
restore_fp_regs(&vcpu->arch.guest_fpregs);
restore_access_regs(vcpu->arch.guest_acrs);
gmap_enable(vcpu->arch.gmap);
+ atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
}
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
{
+ atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
gmap_disable(vcpu->arch.gmap);
save_fp_regs(&vcpu->arch.guest_fpregs);
save_access_regs(vcpu->arch.guest_acrs);
@@ -301,7 +305,9 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
{
- atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH | CPUSTAT_SM);
+ atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
+ CPUSTAT_SM |
+ CPUSTAT_STOPPED);
vcpu->arch.sie_block->ecb = 6;
vcpu->arch.sie_block->eca = 0xC1002001U;
vcpu->arch.sie_block->fac = (int) (long) facilities;
@@ -428,7 +434,7 @@ static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
{
int rc = 0;
- if (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_RUNNING)
+ if (!(atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOPPED))
rc = -EBUSY;
else {
vcpu->run->psw_mask = psw.mask;
@@ -501,7 +507,7 @@ rerun_vcpu:
if (vcpu->sigset_active)
sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
- atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
+ atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
BUG_ON(vcpu->kvm->arch.float_int.local_int[vcpu->vcpu_id] == NULL);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 391626361084..d02638959922 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -336,6 +336,7 @@ static int handle_tprot(struct kvm_vcpu *vcpu)
u64 address1 = disp1 + base1 ? vcpu->arch.guest_gprs[base1] : 0;
u64 address2 = disp2 + base2 ? vcpu->arch.guest_gprs[base2] : 0;
struct vm_area_struct *vma;
+ unsigned long user_address;
vcpu->stat.instruction_tprot++;
@@ -349,9 +350,14 @@ static int handle_tprot(struct kvm_vcpu *vcpu)
return -EOPNOTSUPP;
+ /* we must resolve the address without holding the mmap semaphore.
+ * This is ok since the userspace hypervisor is not supposed to change
+ * the mapping while the guest queries the memory. Otherwise the guest
+ * might crash or get wrong info anyway. */
+ user_address = (unsigned long) __guestaddr_to_user(vcpu, address1);
+
down_read(&current->mm->mmap_sem);
- vma = find_vma(current->mm,
- (unsigned long) __guestaddr_to_user(vcpu, address1));
+ vma = find_vma(current->mm, user_address);
if (!vma) {
up_read(&current->mm->mmap_sem);
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index f815118835f3..0a7941d74bc6 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -31,9 +31,11 @@
#define SIGP_SET_PREFIX 0x0d
#define SIGP_STORE_STATUS_ADDR 0x0e
#define SIGP_SET_ARCH 0x12
+#define SIGP_SENSE_RUNNING 0x15
/* cpu status bits */
#define SIGP_STAT_EQUIPMENT_CHECK 0x80000000UL
+#define SIGP_STAT_NOT_RUNNING 0x00000400UL
#define SIGP_STAT_INCORRECT_STATE 0x00000200UL
#define SIGP_STAT_INVALID_PARAMETER 0x00000100UL
#define SIGP_STAT_EXT_CALL_PENDING 0x00000080UL
@@ -57,8 +59,8 @@ static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr,
spin_lock(&fi->lock);
if (fi->local_int[cpu_addr] == NULL)
rc = 3; /* not operational */
- else if (atomic_read(fi->local_int[cpu_addr]->cpuflags)
- & CPUSTAT_RUNNING) {
+ else if (!(atomic_read(fi->local_int[cpu_addr]->cpuflags)
+ & CPUSTAT_STOPPED)) {
*reg &= 0xffffffff00000000UL;
rc = 1; /* status stored */
} else {
@@ -251,7 +253,7 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
spin_lock_bh(&li->lock);
/* cpu must be in stopped state */
- if (atomic_read(li->cpuflags) & CPUSTAT_RUNNING) {
+ if (!(atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) {
rc = 1; /* incorrect state */
*reg &= SIGP_STAT_INCORRECT_STATE;
kfree(inti);
@@ -275,6 +277,38 @@ out_fi:
return rc;
}
+static int __sigp_sense_running(struct kvm_vcpu *vcpu, u16 cpu_addr,
+ unsigned long *reg)
+{
+ int rc;
+ struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
+
+ if (cpu_addr >= KVM_MAX_VCPUS)
+ return 3; /* not operational */
+
+ spin_lock(&fi->lock);
+ if (fi->local_int[cpu_addr] == NULL)
+ rc = 3; /* not operational */
+ else {
+ if (atomic_read(fi->local_int[cpu_addr]->cpuflags)
+ & CPUSTAT_RUNNING) {
+ /* running */
+ rc = 1;
+ } else {
+ /* not running */
+ *reg &= 0xffffffff00000000UL;
+ *reg |= SIGP_STAT_NOT_RUNNING;
+ rc = 0;
+ }
+ }
+ spin_unlock(&fi->lock);
+
+ VCPU_EVENT(vcpu, 4, "sensed running status of cpu %x rc %x", cpu_addr,
+ rc);
+
+ return rc;
+}
+
int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
{
int r1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
@@ -331,6 +365,11 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
rc = __sigp_set_prefix(vcpu, cpu_addr, parameter,
&vcpu->arch.guest_gprs[r1]);
break;
+ case SIGP_SENSE_RUNNING:
+ vcpu->stat.instruction_sigp_sense_running++;
+ rc = __sigp_sense_running(vcpu, cpu_addr,
+ &vcpu->arch.guest_gprs[r1]);
+ break;
case SIGP_RESTART:
vcpu->stat.instruction_sigp_restart++;
/* user space must know about restart */
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 1766def5bc3f..a9a301866b3c 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -587,8 +587,13 @@ static void pfault_interrupt(unsigned int ext_int_code,
} else {
/* Completion interrupt was faster than initial
* interrupt. Set pfault_wait to -1 so the initial
- * interrupt doesn't put the task to sleep. */
- tsk->thread.pfault_wait = -1;
+ * interrupt doesn't put the task to sleep.
+ * If the task is not running, ignore the completion
+ * interrupt since it must be a leftover of a PFAULT
+ * CANCEL operation which didn't remove all pending
+ * completion interrupts. */
+ if (tsk->state == TASK_RUNNING)
+ tsk->thread.pfault_wait = -1;
}
put_task_struct(tsk);
} else {
diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c
index f43c0e4282af..9daee91e6c3f 100644
--- a/arch/s390/oprofile/hwsampler.c
+++ b/arch/s390/oprofile/hwsampler.c
@@ -22,6 +22,7 @@
#include <asm/irq.h>
#include "hwsampler.h"
+#include "op_counter.h"
#define MAX_NUM_SDB 511
#define MIN_NUM_SDB 1
@@ -896,6 +897,8 @@ static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt,
if (sample_data_ptr->P == 1) {
/* userspace sample */
unsigned int pid = sample_data_ptr->prim_asn;
+ if (!counter_config.user)
+ goto skip_sample;
rcu_read_lock();
tsk = pid_task(find_vpid(pid), PIDTYPE_PID);
if (tsk)
@@ -903,6 +906,8 @@ static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt,
rcu_read_unlock();
} else {
/* kernelspace sample */
+ if (!counter_config.kernel)
+ goto skip_sample;
regs = task_pt_regs(current);
}
@@ -910,7 +915,7 @@ static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt,
oprofile_add_ext_hw_sample(sample_data_ptr->ia, regs, 0,
!sample_data_ptr->P, tsk);
mutex_unlock(&hws_sem);
-
+ skip_sample:
sample_data_ptr++;
}
}
diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c
index 6efc18b5e60a..2297be406c61 100644
--- a/arch/s390/oprofile/init.c
+++ b/arch/s390/oprofile/init.c
@@ -2,10 +2,11 @@
* arch/s390/oprofile/init.c
*
* S390 Version
- * Copyright (C) 2003 IBM Deutschland Entwicklung GmbH, IBM Corporation
+ * Copyright (C) 2002-2011 IBM Deutschland Entwicklung GmbH, IBM Corporation
* Author(s): Thomas Spatzier (tspat@de.ibm.com)
* Author(s): Mahesh Salgaonkar (mahesh@linux.vnet.ibm.com)
* Author(s): Heinz Graalfs (graalfs@linux.vnet.ibm.com)
+ * Author(s): Andreas Krebbel (krebbel@linux.vnet.ibm.com)
*
* @remark Copyright 2002-2011 OProfile authors
*/
@@ -14,6 +15,8 @@
#include <linux/init.h>
#include <linux/errno.h>
#include <linux/fs.h>
+#include <linux/module.h>
+#include <asm/processor.h>
#include "../../../drivers/oprofile/oprof.h"
@@ -22,6 +25,7 @@ extern void s390_backtrace(struct pt_regs * const regs, unsigned int depth);
#ifdef CONFIG_64BIT
#include "hwsampler.h"
+#include "op_counter.h"
#define DEFAULT_INTERVAL 4127518
@@ -35,16 +39,41 @@ static unsigned long oprofile_max_interval;
static unsigned long oprofile_sdbt_blocks = DEFAULT_SDBT_BLOCKS;
static unsigned long oprofile_sdb_blocks = DEFAULT_SDB_BLOCKS;
-static int hwsampler_file;
+static int hwsampler_enabled;
static int hwsampler_running; /* start_mutex must be held to change */
+static int hwsampler_available;
static struct oprofile_operations timer_ops;
+struct op_counter_config counter_config;
+
+enum __force_cpu_type {
+ reserved = 0, /* do not force */
+ timer,
+};
+static int force_cpu_type;
+
+static int set_cpu_type(const char *str, struct kernel_param *kp)
+{
+ if (!strcmp(str, "timer")) {
+ force_cpu_type = timer;
+ printk(KERN_INFO "oprofile: forcing timer to be returned "
+ "as cpu type\n");
+ } else {
+ force_cpu_type = 0;
+ }
+
+ return 0;
+}
+module_param_call(cpu_type, set_cpu_type, NULL, NULL, 0);
+MODULE_PARM_DESC(cpu_type, "Force legacy basic mode sampling"
+ "(report cpu_type \"timer\"");
+
static int oprofile_hwsampler_start(void)
{
int retval;
- hwsampler_running = hwsampler_file;
+ hwsampler_running = hwsampler_enabled;
if (!hwsampler_running)
return timer_ops.start();
@@ -72,10 +101,16 @@ static void oprofile_hwsampler_stop(void)
return;
}
+/*
+ * File ops used for:
+ * /dev/oprofile/0/enabled
+ * /dev/oprofile/hwsampling/hwsampler (cpu_type = timer)
+ */
+
static ssize_t hwsampler_read(struct file *file, char __user *buf,
size_t count, loff_t *offset)
{
- return oprofilefs_ulong_to_user(hwsampler_file, buf, count, offset);
+ return oprofilefs_ulong_to_user(hwsampler_enabled, buf, count, offset);
}
static ssize_t hwsampler_write(struct file *file, char const __user *buf,
@@ -88,9 +123,12 @@ static ssize_t hwsampler_write(struct file *file, char const __user *buf,
return -EINVAL;
retval = oprofilefs_ulong_from_user(&val, buf, count);
- if (retval)
+ if (retval <= 0)
return retval;
+ if (val != 0 && val != 1)
+ return -EINVAL;
+
if (oprofile_started)
/*
* save to do without locking as we set
@@ -99,7 +137,7 @@ static ssize_t hwsampler_write(struct file *file, char const __user *buf,
*/
return -EBUSY;
- hwsampler_file = val;
+ hwsampler_enabled = val;
return count;
}
@@ -109,38 +147,311 @@ static const struct file_operations hwsampler_fops = {
.write = hwsampler_write,
};
+/*
+ * File ops used for:
+ * /dev/oprofile/0/count
+ * /dev/oprofile/hwsampling/hw_interval (cpu_type = timer)
+ *
+ * Make sure that the value is within the hardware range.
+ */
+
+static ssize_t hw_interval_read(struct file *file, char __user *buf,
+ size_t count, loff_t *offset)
+{
+ return oprofilefs_ulong_to_user(oprofile_hw_interval, buf,
+ count, offset);
+}
+
+static ssize_t hw_interval_write(struct file *file, char const __user *buf,
+ size_t count, loff_t *offset)
+{
+ unsigned long val;
+ int retval;
+
+ if (*offset)
+ return -EINVAL;
+ retval = oprofilefs_ulong_from_user(&val, buf, count);
+ if (retval)
+ return retval;
+ if (val < oprofile_min_interval)
+ oprofile_hw_interval = oprofile_min_interval;
+ else if (val > oprofile_max_interval)
+ oprofile_hw_interval = oprofile_max_interval;
+ else
+ oprofile_hw_interval = val;
+
+ return count;
+}
+
+static const struct file_operations hw_interval_fops = {
+ .read = hw_interval_read,
+ .write = hw_interval_write,
+};
+
+/*
+ * File ops used for:
+ * /dev/oprofile/0/event
+ * Only a single event with number 0 is supported with this counter.
+ *
+ * /dev/oprofile/0/unit_mask
+ * This is a dummy file needed by the user space tools.
+ * No value other than 0 is accepted or returned.
+ */
+
+static ssize_t hwsampler_zero_read(struct file *file, char __user *buf,
+ size_t count, loff_t *offset)
+{
+ return oprofilefs_ulong_to_user(0, buf, count, offset);
+}
+
+static ssize_t hwsampler_zero_write(struct file *file, char const __user *buf,
+ size_t count, loff_t *offset)
+{
+ unsigned long val;
+ int retval;
+
+ if (*offset)
+ return -EINVAL;
+
+ retval = oprofilefs_ulong_from_user(&val, buf, count);
+ if (retval)
+ return retval;
+ if (val != 0)
+ return -EINVAL;
+ return count;
+}
+
+static const struct file_operations zero_fops = {
+ .read = hwsampler_zero_read,
+ .write = hwsampler_zero_write,
+};
+
+/* /dev/oprofile/0/kernel file ops. */
+
+static ssize_t hwsampler_kernel_read(struct file *file, char __user *buf,
+ size_t count, loff_t *offset)
+{
+ return oprofilefs_ulong_to_user(counter_config.kernel,
+ buf, count, offset);
+}
+
+static ssize_t hwsampler_kernel_write(struct file *file, char const __user *buf,
+ size_t count, loff_t *offset)
+{
+ unsigned long val;
+ int retval;
+
+ if (*offset)
+ return -EINVAL;
+
+ retval = oprofilefs_ulong_from_user(&val, buf, count);
+ if (retval)
+ return retval;
+
+ if (val != 0 && val != 1)
+ return -EINVAL;
+
+ counter_config.kernel = val;
+
+ return count;
+}
+
+static const struct file_operations kernel_fops = {
+ .read = hwsampler_kernel_read,
+ .write = hwsampler_kernel_write,
+};
+
+/* /dev/oprofile/0/user file ops. */
+
+static ssize_t hwsampler_user_read(struct file *file, char __user *buf,
+ size_t count, loff_t *offset)
+{
+ return oprofilefs_ulong_to_user(counter_config.user,
+ buf, count, offset);
+}
+
+static ssize_t hwsampler_user_write(struct file *file, char const __user *buf,
+ size_t count, loff_t *offset)
+{
+ unsigned long val;
+ int retval;
+
+ if (*offset)
+ return -EINVAL;
+
+ retval = oprofilefs_ulong_from_user(&val, buf, count);
+ if (retval)
+ return retval;
+
+ if (val != 0 && val != 1)
+ return -EINVAL;
+
+ counter_config.user = val;
+
+ return count;
+}
+
+static const struct file_operations user_fops = {
+ .read = hwsampler_user_read,
+ .write = hwsampler_user_write,
+};
+
+
+/*
+ * File ops used for: /dev/oprofile/timer/enabled
+ * The value always has to be the inverted value of hwsampler_enabled. So
+ * no separate variable is created. That way we do not need locking.
+ */
+
+static ssize_t timer_enabled_read(struct file *file, char __user *buf,
+ size_t count, loff_t *offset)
+{
+ return oprofilefs_ulong_to_user(!hwsampler_enabled, buf, count, offset);
+}
+
+static ssize_t timer_enabled_write(struct file *file, char const __user *buf,
+ size_t count, loff_t *offset)
+{
+ unsigned long val;
+ int retval;
+
+ if (*offset)
+ return -EINVAL;
+
+ retval = oprofilefs_ulong_from_user(&val, buf, count);
+ if (retval)
+ return retval;
+
+ if (val != 0 && val != 1)
+ return -EINVAL;
+
+ /* Timer cannot be disabled without having hardware sampling. */
+ if (val == 0 && !hwsampler_available)
+ return -EINVAL;
+
+ if (oprofile_started)
+ /*
+ * save to do without locking as we set
+ * hwsampler_running in start() when start_mutex is
+ * held
+ */
+ return -EBUSY;
+
+ hwsampler_enabled = !val;
+
+ return count;
+}
+
+static const struct file_operations timer_enabled_fops = {
+ .read = timer_enabled_read,
+ .write = timer_enabled_write,
+};
+
+
static int oprofile_create_hwsampling_files(struct super_block *sb,
- struct dentry *root)
+ struct dentry *root)
{
- struct dentry *hw_dir;
+ struct dentry *dir;
+
+ dir = oprofilefs_mkdir(sb, root, "timer");
+ if (!dir)
+ return -EINVAL;
+
+ oprofilefs_create_file(sb, dir, "enabled", &timer_enabled_fops);
+
+ if (!hwsampler_available)
+ return 0;
/* reinitialize default values */
- hwsampler_file = 1;
+ hwsampler_enabled = 1;
+ counter_config.kernel = 1;
+ counter_config.user = 1;
- hw_dir = oprofilefs_mkdir(sb, root, "hwsampling");
- if (!hw_dir)
- return -EINVAL;
+ if (!force_cpu_type) {
+ /*
+ * Create the counter file system. A single virtual
+ * counter is created which can be used to
+ * enable/disable hardware sampling dynamically from
+ * user space. The user space will configure a single
+ * counter with a single event. The value of 'event'
+ * and 'unit_mask' are not evaluated by the kernel code
+ * and can only be set to 0.
+ */
+
+ dir = oprofilefs_mkdir(sb, root, "0");
+ if (!dir)
+ return -EINVAL;
- oprofilefs_create_file(sb, hw_dir, "hwsampler", &hwsampler_fops);
- oprofilefs_create_ulong(sb, hw_dir, "hw_interval",
- &oprofile_hw_interval);
- oprofilefs_create_ro_ulong(sb, hw_dir, "hw_min_interval",
- &oprofile_min_interval);
- oprofilefs_create_ro_ulong(sb, hw_dir, "hw_max_interval",
- &oprofile_max_interval);
- oprofilefs_create_ulong(sb, hw_dir, "hw_sdbt_blocks",
- &oprofile_sdbt_blocks);
+ oprofilefs_create_file(sb, dir, "enabled", &hwsampler_fops);
+ oprofilefs_create_file(sb, dir, "event", &zero_fops);
+ oprofilefs_create_file(sb, dir, "count", &hw_interval_fops);
+ oprofilefs_create_file(sb, dir, "unit_mask", &zero_fops);
+ oprofilefs_create_file(sb, dir, "kernel", &kernel_fops);
+ oprofilefs_create_file(sb, dir, "user", &user_fops);
+ oprofilefs_create_ulong(sb, dir, "hw_sdbt_blocks",
+ &oprofile_sdbt_blocks);
+ } else {
+ /*
+ * Hardware sampling can be used but the cpu_type is
+ * forced to timer in order to deal with legacy user
+ * space tools. The /dev/oprofile/hwsampling fs is
+ * provided in that case.
+ */
+ dir = oprofilefs_mkdir(sb, root, "hwsampling");
+ if (!dir)
+ return -EINVAL;
+
+ oprofilefs_create_file(sb, dir, "hwsampler",
+ &hwsampler_fops);
+ oprofilefs_create_file(sb, dir, "hw_interval",
+ &hw_interval_fops);
+ oprofilefs_create_ro_ulong(sb, dir, "hw_min_interval",
+ &oprofile_min_interval);
+ oprofilefs_create_ro_ulong(sb, dir, "hw_max_interval",
+ &oprofile_max_interval);
+ oprofilefs_create_ulong(sb, dir, "hw_sdbt_blocks",
+ &oprofile_sdbt_blocks);
+ }
return 0;
}
static int oprofile_hwsampler_init(struct oprofile_operations *ops)
{
+ /*
+ * Initialize the timer mode infrastructure as well in order
+ * to be able to switch back dynamically. oprofile_timer_init
+ * is not supposed to fail.
+ */
+ if (oprofile_timer_init(ops))
+ BUG();
+
+ memcpy(&timer_ops, ops, sizeof(timer_ops));
+ ops->create_files = oprofile_create_hwsampling_files;
+
+ /*
+ * If the user space tools do not support newer cpu types,
+ * the force_cpu_type module parameter
+ * can be used to always return \"timer\" as cpu type.
+ */
+ if (force_cpu_type != timer) {
+ struct cpuid id;
+
+ get_cpu_id (&id);
+
+ switch (id.machine) {
+ case 0x2097: case 0x2098: ops->cpu_type = "s390/z10"; break;
+ case 0x2817: case 0x2818: ops->cpu_type = "s390/z196"; break;
+ default: return -ENODEV;
+ }
+ }
+
if (hwsampler_setup())
return -ENODEV;
/*
- * create hwsampler files only if hwsampler_setup() succeeds.
+ * Query the range for the sampling interval from the
+ * hardware.
*/
oprofile_min_interval = hwsampler_query_min_interval();
if (oprofile_min_interval == 0)
@@ -155,23 +466,17 @@ static int oprofile_hwsampler_init(struct oprofile_operations *ops)
if (oprofile_hw_interval > oprofile_max_interval)
oprofile_hw_interval = oprofile_max_interval;
- if (oprofile_timer_init(ops))
- return -ENODEV;
-
- printk(KERN_INFO "oprofile: using hardware sampling\n");
-
- memcpy(&timer_ops, ops, sizeof(timer_ops));
+ printk(KERN_INFO "oprofile: System z hardware sampling "
+ "facility found.\n");
ops->start = oprofile_hwsampler_start;
ops->stop = oprofile_hwsampler_stop;
- ops->create_files = oprofile_create_hwsampling_files;
return 0;
}
static void oprofile_hwsampler_exit(void)
{
- oprofile_timer_exit();
hwsampler_shutdown();
}
@@ -182,7 +487,15 @@ int __init oprofile_arch_init(struct oprofile_operations *ops)
ops->backtrace = s390_backtrace;
#ifdef CONFIG_64BIT
- return oprofile_hwsampler_init(ops);
+
+ /*
+ * -ENODEV is not reported to the caller. The module itself
+ * will use the timer mode sampling as fallback and this is
+ * always available.
+ */
+ hwsampler_available = oprofile_hwsampler_init(ops) == 0;
+
+ return 0;
#else
return -ENODEV;
#endif
diff --git a/arch/s390/oprofile/op_counter.h b/arch/s390/oprofile/op_counter.h
new file mode 100644
index 000000000000..1a8d3ca09014
--- /dev/null
+++ b/arch/s390/oprofile/op_counter.h
@@ -0,0 +1,23 @@
+/**
+ * arch/s390/oprofile/op_counter.h
+ *
+ * Copyright (C) 2011 IBM Deutschland Entwicklung GmbH, IBM Corporation
+ * Author(s): Andreas Krebbel (krebbel@linux.vnet.ibm.com)
+ *
+ * @remark Copyright 2011 OProfile authors
+ */
+
+#ifndef OP_COUNTER_H
+#define OP_COUNTER_H
+
+struct op_counter_config {
+ /* `enabled' maps to the hwsampler_file variable. */
+ /* `count' maps to the oprofile_hw_interval variable. */
+ /* `event' and `unit_mask' are unused. */
+ unsigned long kernel;
+ unsigned long user;
+};
+
+extern struct op_counter_config counter_config;
+
+#endif /* OP_COUNTER_H */