diff options
Diffstat (limited to 'arch/s390')
85 files changed, 1912 insertions, 2232 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index eb8fb629f00b..2c9789da0e24 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -91,6 +91,7 @@ config S390 select ARCH_INLINE_WRITE_UNLOCK_BH select ARCH_INLINE_WRITE_UNLOCK_IRQ select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE + select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS select ARCH_SAVE_PAGE_KEYS if HIBERNATION select ARCH_WANT_IPC_PARSE_VERSION select BUILDTIME_EXTABLE_SORT @@ -131,7 +132,6 @@ config S390 select HAVE_PERF_EVENTS select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_SYSCALL_TRACEPOINTS - select HAVE_SYSCALL_WRAPPERS select HAVE_UID16 if 32BIT select HAVE_VIRT_CPU_ACCOUNTING select VIRT_TO_BUS @@ -375,19 +375,6 @@ config PACK_STACK Say Y if you are unsure. -config SMALL_STACK - def_bool n - prompt "Use 8kb for kernel stack instead of 16kb" - depends on PACK_STACK && 64BIT && !LOCKDEP - help - If you say Y here and the compiler supports the -mkernel-backchain - option the kernel will use a smaller kernel stack size. The reduced - size is 8kb instead of 16kb. This allows to run more threads on a - system and reduces the pressure on the memory management for higher - order page allocations. - - Say N if you are unsure. - config CHECK_STACK def_bool y prompt "Detect kernel stack overflow" diff --git a/arch/s390/Kconfig.debug b/arch/s390/Kconfig.debug index fc32a2df4974..c56878e1245f 100644 --- a/arch/s390/Kconfig.debug +++ b/arch/s390/Kconfig.debug @@ -17,20 +17,6 @@ config STRICT_DEVMEM If you are unsure, say Y. -config DEBUG_STRICT_USER_COPY_CHECKS - def_bool n - prompt "Strict user copy size checks" - ---help--- - Enabling this option turns a certain set of sanity checks for user - copy operations into compile time warnings. - - The copy_from_user() etc checks are there to help test if there - are sufficient security checks on the length argument of - the copy operation, by having gcc prove that the argument is - within bounds. - - If unsure, or if you run an older (pre 4.4) gcc, say N. - config S390_PTDUMP bool "Export kernel pagetable layout to userspace via debugfs" depends on DEBUG_KERNEL diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 7e3ce78d4290..a7d68a467ce8 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -55,22 +55,12 @@ cflags-$(CONFIG_FRAME_POINTER) += -fno-optimize-sibling-calls ifeq ($(call cc-option-yn,-mkernel-backchain),y) cflags-$(CONFIG_PACK_STACK) += -mkernel-backchain -D__PACK_STACK aflags-$(CONFIG_PACK_STACK) += -D__PACK_STACK -cflags-$(CONFIG_SMALL_STACK) += -D__SMALL_STACK -aflags-$(CONFIG_SMALL_STACK) += -D__SMALL_STACK -ifdef CONFIG_SMALL_STACK -STACK_SIZE := $(shell echo $$(($(STACK_SIZE)/2)) ) -endif endif # new style option for packed stacks ifeq ($(call cc-option-yn,-mpacked-stack),y) cflags-$(CONFIG_PACK_STACK) += -mpacked-stack -D__PACK_STACK aflags-$(CONFIG_PACK_STACK) += -D__PACK_STACK -cflags-$(CONFIG_SMALL_STACK) += -D__SMALL_STACK -aflags-$(CONFIG_SMALL_STACK) += -D__SMALL_STACK -ifdef CONFIG_SMALL_STACK -STACK_SIZE := $(shell echo $$(($(STACK_SIZE)/2)) ) -endif endif ifeq ($(call cc-option-yn,-mstack-size=8192 -mstack-guard=128),y) diff --git a/arch/s390/hypfs/hypfs_dbfs.c b/arch/s390/hypfs/hypfs_dbfs.c index 9fd4a40c6752..bb5dd496614f 100644 --- a/arch/s390/hypfs/hypfs_dbfs.c +++ b/arch/s390/hypfs/hypfs_dbfs.c @@ -105,9 +105,7 @@ void hypfs_dbfs_remove_file(struct hypfs_dbfs_file *df) int hypfs_dbfs_init(void) { dbfs_dir = debugfs_create_dir("s390_hypfs", NULL); - if (IS_ERR(dbfs_dir)) - return PTR_ERR(dbfs_dir); - return 0; + return PTR_RET(dbfs_dir); } void hypfs_dbfs_exit(void) diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index 5f7d7ba2874c..7a539f4f5e30 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -21,6 +21,7 @@ #include <linux/module.h> #include <linux/seq_file.h> #include <linux/mount.h> +#include <linux/aio.h> #include <asm/ebcdic.h> #include "hypfs.h" diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h index 15422933c60b..4d8604e311f3 100644 --- a/arch/s390/include/asm/bitops.h +++ b/arch/s390/include/asm/bitops.h @@ -61,8 +61,6 @@ extern const char _sb_findmap[]; #ifndef CONFIG_64BIT -#define __BITOPS_ALIGN 3 -#define __BITOPS_WORDSIZE 32 #define __BITOPS_OR "or" #define __BITOPS_AND "nr" #define __BITOPS_XOR "xr" @@ -81,8 +79,6 @@ extern const char _sb_findmap[]; #else /* CONFIG_64BIT */ -#define __BITOPS_ALIGN 7 -#define __BITOPS_WORDSIZE 64 #define __BITOPS_OR "ogr" #define __BITOPS_AND "ngr" #define __BITOPS_XOR "xgr" @@ -101,8 +97,7 @@ extern const char _sb_findmap[]; #endif /* CONFIG_64BIT */ -#define __BITOPS_WORDS(bits) (((bits)+__BITOPS_WORDSIZE-1)/__BITOPS_WORDSIZE) -#define __BITOPS_BARRIER() asm volatile("" : : : "memory") +#define __BITOPS_WORDS(bits) (((bits) + BITS_PER_LONG - 1) / BITS_PER_LONG) #ifdef CONFIG_SMP /* @@ -114,9 +109,9 @@ static inline void set_bit_cs(unsigned long nr, volatile unsigned long *ptr) addr = (unsigned long) ptr; /* calculate address for CS */ - addr += (nr ^ (nr & (__BITOPS_WORDSIZE - 1))) >> 3; + addr += (nr ^ (nr & (BITS_PER_LONG - 1))) >> 3; /* make OR mask */ - mask = 1UL << (nr & (__BITOPS_WORDSIZE - 1)); + mask = 1UL << (nr & (BITS_PER_LONG - 1)); /* Do the atomic update. */ __BITOPS_LOOP(old, new, addr, mask, __BITOPS_OR); } @@ -130,9 +125,9 @@ static inline void clear_bit_cs(unsigned long nr, volatile unsigned long *ptr) addr = (unsigned long) ptr; /* calculate address for CS */ - addr += (nr ^ (nr & (__BITOPS_WORDSIZE - 1))) >> 3; + addr += (nr ^ (nr & (BITS_PER_LONG - 1))) >> 3; /* make AND mask */ - mask = ~(1UL << (nr & (__BITOPS_WORDSIZE - 1))); + mask = ~(1UL << (nr & (BITS_PER_LONG - 1))); /* Do the atomic update. */ __BITOPS_LOOP(old, new, addr, mask, __BITOPS_AND); } @@ -146,9 +141,9 @@ static inline void change_bit_cs(unsigned long nr, volatile unsigned long *ptr) addr = (unsigned long) ptr; /* calculate address for CS */ - addr += (nr ^ (nr & (__BITOPS_WORDSIZE - 1))) >> 3; + addr += (nr ^ (nr & (BITS_PER_LONG - 1))) >> 3; /* make XOR mask */ - mask = 1UL << (nr & (__BITOPS_WORDSIZE - 1)); + mask = 1UL << (nr & (BITS_PER_LONG - 1)); /* Do the atomic update. */ __BITOPS_LOOP(old, new, addr, mask, __BITOPS_XOR); } @@ -163,12 +158,12 @@ test_and_set_bit_cs(unsigned long nr, volatile unsigned long *ptr) addr = (unsigned long) ptr; /* calculate address for CS */ - addr += (nr ^ (nr & (__BITOPS_WORDSIZE - 1))) >> 3; + addr += (nr ^ (nr & (BITS_PER_LONG - 1))) >> 3; /* make OR/test mask */ - mask = 1UL << (nr & (__BITOPS_WORDSIZE - 1)); + mask = 1UL << (nr & (BITS_PER_LONG - 1)); /* Do the atomic update. */ __BITOPS_LOOP(old, new, addr, mask, __BITOPS_OR); - __BITOPS_BARRIER(); + barrier(); return (old & mask) != 0; } @@ -182,12 +177,12 @@ test_and_clear_bit_cs(unsigned long nr, volatile unsigned long *ptr) addr = (unsigned long) ptr; /* calculate address for CS */ - addr += (nr ^ (nr & (__BITOPS_WORDSIZE - 1))) >> 3; + addr += (nr ^ (nr & (BITS_PER_LONG - 1))) >> 3; /* make AND/test mask */ - mask = ~(1UL << (nr & (__BITOPS_WORDSIZE - 1))); + mask = ~(1UL << (nr & (BITS_PER_LONG - 1))); /* Do the atomic update. */ __BITOPS_LOOP(old, new, addr, mask, __BITOPS_AND); - __BITOPS_BARRIER(); + barrier(); return (old ^ new) != 0; } @@ -201,12 +196,12 @@ test_and_change_bit_cs(unsigned long nr, volatile unsigned long *ptr) addr = (unsigned long) ptr; /* calculate address for CS */ - addr += (nr ^ (nr & (__BITOPS_WORDSIZE - 1))) >> 3; + addr += (nr ^ (nr & (BITS_PER_LONG - 1))) >> 3; /* make XOR/test mask */ - mask = 1UL << (nr & (__BITOPS_WORDSIZE - 1)); + mask = 1UL << (nr & (BITS_PER_LONG - 1)); /* Do the atomic update. */ __BITOPS_LOOP(old, new, addr, mask, __BITOPS_XOR); - __BITOPS_BARRIER(); + barrier(); return (old & mask) != 0; } #endif /* CONFIG_SMP */ @@ -218,7 +213,7 @@ static inline void __set_bit(unsigned long nr, volatile unsigned long *ptr) { unsigned long addr; - addr = (unsigned long) ptr + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3); + addr = (unsigned long) ptr + ((nr ^ (BITS_PER_LONG - 8)) >> 3); asm volatile( " oc %O0(1,%R0),%1" : "=Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7]) : "cc" ); @@ -229,7 +224,7 @@ __constant_set_bit(const unsigned long nr, volatile unsigned long *ptr) { unsigned long addr; - addr = ((unsigned long) ptr) + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3); + addr = ((unsigned long) ptr) + ((nr ^ (BITS_PER_LONG - 8)) >> 3); *(unsigned char *) addr |= 1 << (nr & 7); } @@ -246,7 +241,7 @@ __clear_bit(unsigned long nr, volatile unsigned long *ptr) { unsigned long addr; - addr = (unsigned long) ptr + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3); + addr = (unsigned long) ptr + ((nr ^ (BITS_PER_LONG - 8)) >> 3); asm volatile( " nc %O0(1,%R0),%1" : "=Q" (*(char *) addr) : "Q" (_ni_bitmap[nr & 7]) : "cc" ); @@ -257,7 +252,7 @@ __constant_clear_bit(const unsigned long nr, volatile unsigned long *ptr) { unsigned long addr; - addr = ((unsigned long) ptr) + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3); + addr = ((unsigned long) ptr) + ((nr ^ (BITS_PER_LONG - 8)) >> 3); *(unsigned char *) addr &= ~(1 << (nr & 7)); } @@ -273,7 +268,7 @@ static inline void __change_bit(unsigned long nr, volatile unsigned long *ptr) { unsigned long addr; - addr = (unsigned long) ptr + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3); + addr = (unsigned long) ptr + ((nr ^ (BITS_PER_LONG - 8)) >> 3); asm volatile( " xc %O0(1,%R0),%1" : "=Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7]) : "cc" ); @@ -284,7 +279,7 @@ __constant_change_bit(const unsigned long nr, volatile unsigned long *ptr) { unsigned long addr; - addr = ((unsigned long) ptr) + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3); + addr = ((unsigned long) ptr) + ((nr ^ (BITS_PER_LONG - 8)) >> 3); *(unsigned char *) addr ^= 1 << (nr & 7); } @@ -302,7 +297,7 @@ test_and_set_bit_simple(unsigned long nr, volatile unsigned long *ptr) unsigned long addr; unsigned char ch; - addr = (unsigned long) ptr + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3); + addr = (unsigned long) ptr + ((nr ^ (BITS_PER_LONG - 8)) >> 3); ch = *(unsigned char *) addr; asm volatile( " oc %O0(1,%R0),%1" @@ -321,7 +316,7 @@ test_and_clear_bit_simple(unsigned long nr, volatile unsigned long *ptr) unsigned long addr; unsigned char ch; - addr = (unsigned long) ptr + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3); + addr = (unsigned long) ptr + ((nr ^ (BITS_PER_LONG - 8)) >> 3); ch = *(unsigned char *) addr; asm volatile( " nc %O0(1,%R0),%1" @@ -340,7 +335,7 @@ test_and_change_bit_simple(unsigned long nr, volatile unsigned long *ptr) unsigned long addr; unsigned char ch; - addr = (unsigned long) ptr + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3); + addr = (unsigned long) ptr + ((nr ^ (BITS_PER_LONG - 8)) >> 3); ch = *(unsigned char *) addr; asm volatile( " xc %O0(1,%R0),%1" @@ -376,7 +371,7 @@ static inline int __test_bit(unsigned long nr, const volatile unsigned long *ptr unsigned long addr; unsigned char ch; - addr = (unsigned long) ptr + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3); + addr = (unsigned long) ptr + ((nr ^ (BITS_PER_LONG - 8)) >> 3); ch = *(volatile unsigned char *) addr; return (ch >> (nr & 7)) & 1; } @@ -384,7 +379,7 @@ static inline int __test_bit(unsigned long nr, const volatile unsigned long *ptr static inline int __constant_test_bit(unsigned long nr, const volatile unsigned long *addr) { return (((volatile char *) addr) - [(nr^(__BITOPS_WORDSIZE-8))>>3] & (1<<(nr&7))) != 0; + [(nr^(BITS_PER_LONG-8))>>3] & (1<<(nr&7))) != 0; } #define test_bit(nr,addr) \ @@ -693,18 +688,18 @@ static inline int find_next_bit_left(const unsigned long *addr, if (offset >= size) return size; - bit = offset & (__BITOPS_WORDSIZE - 1); + bit = offset & (BITS_PER_LONG - 1); offset -= bit; size -= offset; - p = addr + offset / __BITOPS_WORDSIZE; + p = addr + offset / BITS_PER_LONG; if (bit) { set = __flo_word(0, *p & (~0UL << bit)); if (set >= size) return size + offset; - if (set < __BITOPS_WORDSIZE) + if (set < BITS_PER_LONG) return set + offset; - offset += __BITOPS_WORDSIZE; - size -= __BITOPS_WORDSIZE; + offset += BITS_PER_LONG; + size -= BITS_PER_LONG; p++; } return offset + find_first_bit_left(p, size); @@ -736,22 +731,22 @@ static inline int find_next_zero_bit (const unsigned long * addr, if (offset >= size) return size; - bit = offset & (__BITOPS_WORDSIZE - 1); + bit = offset & (BITS_PER_LONG - 1); offset -= bit; size -= offset; - p = addr + offset / __BITOPS_WORDSIZE; + p = addr + offset / BITS_PER_LONG; if (bit) { /* - * __ffz_word returns __BITOPS_WORDSIZE + * __ffz_word returns BITS_PER_LONG * if no zero bit is present in the word. */ set = __ffz_word(bit, *p >> bit); if (set >= size) return size + offset; - if (set < __BITOPS_WORDSIZE) + if (set < BITS_PER_LONG) return set + offset; - offset += __BITOPS_WORDSIZE; - size -= __BITOPS_WORDSIZE; + offset += BITS_PER_LONG; + size -= BITS_PER_LONG; p++; } return offset + find_first_zero_bit(p, size); @@ -773,22 +768,22 @@ static inline int find_next_bit (const unsigned long * addr, if (offset >= size) return size; - bit = offset & (__BITOPS_WORDSIZE - 1); + bit = offset & (BITS_PER_LONG - 1); offset -= bit; size -= offset; - p = addr + offset / __BITOPS_WORDSIZE; + p = addr + offset / BITS_PER_LONG; if (bit) { /* - * __ffs_word returns __BITOPS_WORDSIZE + * __ffs_word returns BITS_PER_LONG * if no one bit is present in the word. */ set = __ffs_word(0, *p & (~0UL << bit)); if (set >= size) return size + offset; - if (set < __BITOPS_WORDSIZE) + if (set < BITS_PER_LONG) return set + offset; - offset += __BITOPS_WORDSIZE; - size -= __BITOPS_WORDSIZE; + offset += BITS_PER_LONG; + size -= BITS_PER_LONG; p++; } return offset + find_first_bit(p, size); @@ -843,22 +838,22 @@ static inline int find_next_zero_bit_le(void *vaddr, unsigned long size, if (offset >= size) return size; - bit = offset & (__BITOPS_WORDSIZE - 1); + bit = offset & (BITS_PER_LONG - 1); offset -= bit; size -= offset; - p = addr + offset / __BITOPS_WORDSIZE; + p = addr + offset / BITS_PER_LONG; if (bit) { /* - * s390 version of ffz returns __BITOPS_WORDSIZE + * s390 version of ffz returns BITS_PER_LONG * if no zero bit is present in the word. */ set = __ffz_word(bit, __load_ulong_le(p, 0) >> bit); if (set >= size) return size + offset; - if (set < __BITOPS_WORDSIZE) + if (set < BITS_PER_LONG) return set + offset; - offset += __BITOPS_WORDSIZE; - size -= __BITOPS_WORDSIZE; + offset += BITS_PER_LONG; + size -= BITS_PER_LONG; p++; } return offset + find_first_zero_bit_le(p, size); @@ -885,22 +880,22 @@ static inline int find_next_bit_le(void *vaddr, unsigned long size, if (offset >= size) return size; - bit = offset & (__BITOPS_WORDSIZE - 1); + bit = offset & (BITS_PER_LONG - 1); offset -= bit; size -= offset; - p = addr + offset / __BITOPS_WORDSIZE; + p = addr + offset / BITS_PER_LONG; if (bit) { /* - * s390 version of ffz returns __BITOPS_WORDSIZE + * s390 version of ffz returns BITS_PER_LONG * if no zero bit is present in the word. */ set = __ffs_word(0, __load_ulong_le(p, 0) & (~0UL << bit)); if (set >= size) return size + offset; - if (set < __BITOPS_WORDSIZE) + if (set < BITS_PER_LONG) return set + offset; - offset += __BITOPS_WORDSIZE; - size -= __BITOPS_WORDSIZE; + offset += BITS_PER_LONG; + size -= BITS_PER_LONG; p++; } return offset + find_first_bit_le(p, size); diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h index e6061617a50b..f201af8be580 100644 --- a/arch/s390/include/asm/ccwdev.h +++ b/arch/s390/include/asm/ccwdev.h @@ -220,7 +220,8 @@ extern void ccw_device_get_id(struct ccw_device *, struct ccw_dev_id *); #define to_ccwdrv(n) container_of(n, struct ccw_driver, driver) extern struct ccw_device *ccw_device_probe_console(void); -extern int ccw_device_force_console(void); +extern void ccw_device_wait_idle(struct ccw_device *); +extern int ccw_device_force_console(struct ccw_device *); int ccw_device_siosl(struct ccw_device *); diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h index ad2b924167d7..ffb898961c8d 100644 --- a/arch/s390/include/asm/cio.h +++ b/arch/s390/include/asm/cio.h @@ -296,8 +296,6 @@ static inline int ccw_dev_id_is_equal(struct ccw_dev_id *dev_id1, return 0; } -extern void wait_cons_dev(void); - extern void css_schedule_reprobe(void); extern void reipl_ccw_dev(struct ccw_dev_id *id); diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h index f8c6df6cd1f0..c1e7c646727c 100644 --- a/arch/s390/include/asm/compat.h +++ b/arch/s390/include/asm/compat.h @@ -70,6 +70,22 @@ typedef u32 compat_ulong_t; typedef u64 compat_u64; typedef u32 compat_uptr_t; +typedef struct { + u32 mask; + u32 addr; +} __aligned(8) psw_compat_t; + +typedef struct { + psw_compat_t psw; + u32 gprs[NUM_GPRS]; + u32 acrs[NUM_ACRS]; + u32 orig_gpr2; +} s390_compat_regs; + +typedef struct { + u32 gprs_high[NUM_GPRS]; +} s390_compat_regs_high; + struct compat_timespec { compat_time_t tv_sec; s32 tv_nsec; @@ -124,18 +140,33 @@ struct compat_flock64 { }; struct compat_statfs { - s32 f_type; - s32 f_bsize; - s32 f_blocks; - s32 f_bfree; - s32 f_bavail; - s32 f_files; - s32 f_ffree; + u32 f_type; + u32 f_bsize; + u32 f_blocks; + u32 f_bfree; + u32 f_bavail; + u32 f_files; + u32 f_ffree; + compat_fsid_t f_fsid; + u32 f_namelen; + u32 f_frsize; + u32 f_flags; + u32 f_spare[4]; +}; + +struct compat_statfs64 { + u32 f_type; + u32 f_bsize; + u64 f_blocks; + u64 f_bfree; + u64 f_bavail; + u64 f_files; + u64 f_ffree; compat_fsid_t f_fsid; - s32 f_namelen; - s32 f_frsize; - s32 f_flags; - s32 f_spare[5]; + u32 f_namelen; + u32 f_frsize; + u32 f_flags; + u32 f_spare[4]; }; #define COMPAT_RLIM_OLD_INFINITY 0x7fffffff @@ -248,8 +279,6 @@ static inline int is_compat_task(void) return is_32bit_task(); } -#endif - static inline void __user *arch_compat_alloc_user_space(long len) { unsigned long stack; @@ -260,6 +289,8 @@ static inline void __user *arch_compat_alloc_user_space(long len) return (void __user *) (stack - len); } +#endif + struct compat_ipc64_perm { compat_key_t key; __compat_uid32_t uid; diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index 1bfdf24b85a2..78f4f8711d58 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -119,6 +119,8 @@ */ #include <asm/ptrace.h> +#include <asm/compat.h> +#include <asm/syscall.h> #include <asm/user.h> typedef s390_fp_regs elf_fpregset_t; @@ -180,18 +182,31 @@ extern unsigned long elf_hwcap; extern char elf_platform[]; #define ELF_PLATFORM (elf_platform) -#ifdef CONFIG_64BIT +#ifndef CONFIG_COMPAT +#define SET_PERSONALITY(ex) \ +do { \ + set_personality(PER_LINUX | \ + (current->personality & (~PER_MASK))); \ + current_thread_info()->sys_call_table = \ + (unsigned long) &sys_call_table; \ +} while (0) +#else /* CONFIG_COMPAT */ #define SET_PERSONALITY(ex) \ do { \ if (personality(current->personality) != PER_LINUX32) \ set_personality(PER_LINUX | \ (current->personality & ~PER_MASK)); \ - if ((ex).e_ident[EI_CLASS] == ELFCLASS32) \ + if ((ex).e_ident[EI_CLASS] == ELFCLASS32) { \ set_thread_flag(TIF_31BIT); \ - else \ + current_thread_info()->sys_call_table = \ + (unsigned long) &sys_call_table_emu; \ + } else { \ clear_thread_flag(TIF_31BIT); \ + current_thread_info()->sys_call_table = \ + (unsigned long) &sys_call_table; \ + } \ } while (0) -#endif /* CONFIG_64BIT */ +#endif /* CONFIG_COMPAT */ #define STACK_RND_MASK 0x7ffUL diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h index 593753ee07f3..bd90359d6d22 100644 --- a/arch/s390/include/asm/hugetlb.h +++ b/arch/s390/include/asm/hugetlb.h @@ -114,7 +114,7 @@ static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, #define huge_ptep_set_wrprotect(__mm, __addr, __ptep) \ ({ \ pte_t __pte = huge_ptep_get(__ptep); \ - if (pte_write(__pte)) { \ + if (huge_pte_write(__pte)) { \ huge_ptep_invalidate(__mm, __addr, __ptep); \ set_huge_pte_at(__mm, __addr, __ptep, \ huge_pte_wrprotect(__pte)); \ @@ -127,4 +127,58 @@ static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, huge_ptep_invalidate(vma->vm_mm, address, ptep); } +static inline pte_t mk_huge_pte(struct page *page, pgprot_t pgprot) +{ + pte_t pte; + pmd_t pmd; + + pmd = mk_pmd_phys(page_to_phys(page), pgprot); + pte_val(pte) = pmd_val(pmd); + return pte; +} + +static inline int huge_pte_write(pte_t pte) +{ + pmd_t pmd; + + pmd_val(pmd) = pte_val(pte); + return pmd_write(pmd); +} + +static inline int huge_pte_dirty(pte_t pte) +{ + /* No dirty bit in the segment table entry. */ + return 0; +} + +static inline pte_t huge_pte_mkwrite(pte_t pte) +{ + pmd_t pmd; + + pmd_val(pmd) = pte_val(pte); + pte_val(pte) = pmd_val(pmd_mkwrite(pmd)); + return pte; +} + +static inline pte_t huge_pte_mkdirty(pte_t pte) +{ + /* No dirty bit in the segment table entry. */ + return pte; +} + +static inline pte_t huge_pte_modify(pte_t pte, pgprot_t newprot) +{ + pmd_t pmd; + + pmd_val(pmd) = pte_val(pte); + pte_val(pte) = pmd_val(pmd_modify(pmd, newprot)); + return pte; +} + +static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) +{ + pmd_clear((pmd_t *) ptep); +} + #endif /* _ASM_S390_HUGETLB_H */ diff --git a/arch/s390/include/asm/io.h b/arch/s390/include/asm/io.h index 27cb32185ce1..379d96e2105e 100644 --- a/arch/s390/include/asm/io.h +++ b/arch/s390/include/asm/io.h @@ -50,10 +50,6 @@ void unxlate_dev_mem_ptr(unsigned long phys, void *addr); #define ioremap_nocache(addr, size) ioremap(addr, size) #define ioremap_wc ioremap_nocache -/* TODO: s390 cannot support io_remap_pfn_range... */ -#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ - remap_pfn_range(vma, vaddr, pfn, size, prot) - static inline void __iomem *ioremap(unsigned long offset, unsigned long size) { return (void __iomem *) offset; diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 05333b7f0469..6c1801235db9 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -140,6 +140,7 @@ static inline bool zdev_enabled(struct zpci_dev *zdev) struct zpci_dev *zpci_alloc_device(void); int zpci_create_device(struct zpci_dev *); int zpci_enable_device(struct zpci_dev *); +int zpci_disable_device(struct zpci_dev *); void zpci_stop_device(struct zpci_dev *); void zpci_free_device(struct zpci_dev *); int zpci_scan_device(struct zpci_dev *); diff --git a/arch/s390/include/asm/pci_debug.h b/arch/s390/include/asm/pci_debug.h index 6bbec4265b6e..1ca5d1047c71 100644 --- a/arch/s390/include/asm/pci_debug.h +++ b/arch/s390/include/asm/pci_debug.h @@ -7,14 +7,11 @@ extern debug_info_t *pci_debug_msg_id; extern debug_info_t *pci_debug_err_id; #ifdef CONFIG_PCI_DEBUG -#define zpci_dbg(fmt, args...) \ - do { \ - if (pci_debug_msg_id->level >= 2) \ - debug_sprintf_event(pci_debug_msg_id, 2, fmt , ## args);\ - } while (0) +#define zpci_dbg(imp, fmt, args...) \ + debug_sprintf_event(pci_debug_msg_id, imp, fmt, ##args) #else /* !CONFIG_PCI_DEBUG */ -#define zpci_dbg(fmt, args...) do { } while (0) +#define zpci_dbg(imp, fmt, args...) do { } while (0) #endif #define zpci_err(text...) \ diff --git a/arch/s390/include/asm/pci_insn.h b/arch/s390/include/asm/pci_insn.h index 1486a98d5dad..e6a2bdd4d705 100644 --- a/arch/s390/include/asm/pci_insn.h +++ b/arch/s390/include/asm/pci_insn.h @@ -1,10 +1,6 @@ #ifndef _ASM_S390_PCI_INSN_H #define _ASM_S390_PCI_INSN_H -#include <linux/delay.h> - -#define ZPCI_INSN_BUSY_DELAY 1 /* 1 microsecond */ - /* Load/Store status codes */ #define ZPCI_PCI_ST_FUNC_NOT_ENABLED 4 #define ZPCI_PCI_ST_FUNC_IN_ERR 8 @@ -82,199 +78,12 @@ struct zpci_fib { u64 reserved7; } __packed; -/* Modify PCI Function Controls */ -static inline u8 __mpcifc(u64 req, struct zpci_fib *fib, u8 *status) -{ - u8 cc; - - asm volatile ( - " .insn rxy,0xe300000000d0,%[req],%[fib]\n" - " ipm %[cc]\n" - " srl %[cc],28\n" - : [cc] "=d" (cc), [req] "+d" (req), [fib] "+Q" (*fib) - : : "cc"); - *status = req >> 24 & 0xff; - return cc; -} - -static inline int mpcifc_instr(u64 req, struct zpci_fib *fib) -{ - u8 cc, status; - - do { - cc = __mpcifc(req, fib, &status); - if (cc == 2) - msleep(ZPCI_INSN_BUSY_DELAY); - } while (cc == 2); - - if (cc) - printk_once(KERN_ERR "%s: error cc: %d status: %d\n", - __func__, cc, status); - return (cc) ? -EIO : 0; -} - -/* Refresh PCI Translations */ -static inline u8 __rpcit(u64 fn, u64 addr, u64 range, u8 *status) -{ - register u64 __addr asm("2") = addr; - register u64 __range asm("3") = range; - u8 cc; - - asm volatile ( - " .insn rre,0xb9d30000,%[fn],%[addr]\n" - " ipm %[cc]\n" - " srl %[cc],28\n" - : [cc] "=d" (cc), [fn] "+d" (fn) - : [addr] "d" (__addr), "d" (__range) - : "cc"); - *status = fn >> 24 & 0xff; - return cc; -} - -static inline int rpcit_instr(u64 fn, u64 addr, u64 range) -{ - u8 cc, status; - - do { - cc = __rpcit(fn, addr, range, &status); - if (cc == 2) - udelay(ZPCI_INSN_BUSY_DELAY); - } while (cc == 2); - - if (cc) - printk_once(KERN_ERR "%s: error cc: %d status: %d dma_addr: %Lx size: %Lx\n", - __func__, cc, status, addr, range); - return (cc) ? -EIO : 0; -} - -/* Store PCI function controls */ -static inline u8 __stpcifc(u32 handle, u8 space, struct zpci_fib *fib, u8 *status) -{ - u64 fn = (u64) handle << 32 | space << 16; - u8 cc; - - asm volatile ( - " .insn rxy,0xe300000000d4,%[fn],%[fib]\n" - " ipm %[cc]\n" - " srl %[cc],28\n" - : [cc] "=d" (cc), [fn] "+d" (fn), [fib] "=m" (*fib) - : : "cc"); - *status = fn >> 24 & 0xff; - return cc; -} - -/* Set Interruption Controls */ -static inline void sic_instr(u16 ctl, char *unused, u8 isc) -{ - asm volatile ( - " .insn rsy,0xeb00000000d1,%[ctl],%[isc],%[u]\n" - : : [ctl] "d" (ctl), [isc] "d" (isc << 27), [u] "Q" (*unused)); -} - -/* PCI Load */ -static inline u8 __pcilg(u64 *data, u64 req, u64 offset, u8 *status) -{ - register u64 __req asm("2") = req; - register u64 __offset asm("3") = offset; - u64 __data; - u8 cc; - - asm volatile ( - " .insn rre,0xb9d20000,%[data],%[req]\n" - " ipm %[cc]\n" - " srl %[cc],28\n" - : [cc] "=d" (cc), [data] "=d" (__data), [req] "+d" (__req) - : "d" (__offset) - : "cc"); - *status = __req >> 24 & 0xff; - *data = __data; - return cc; -} - -static inline int pcilg_instr(u64 *data, u64 req, u64 offset) -{ - u8 cc, status; - - do { - cc = __pcilg(data, req, offset, &status); - if (cc == 2) - udelay(ZPCI_INSN_BUSY_DELAY); - } while (cc == 2); - - if (cc) { - printk_once(KERN_ERR "%s: error cc: %d status: %d req: %Lx offset: %Lx\n", - __func__, cc, status, req, offset); - /* TODO: on IO errors set data to 0xff... - * here or in users of pcilg (le conversion)? - */ - } - return (cc) ? -EIO : 0; -} - -/* PCI Store */ -static inline u8 __pcistg(u64 data, u64 req, u64 offset, u8 *status) -{ - register u64 __req asm("2") = req; - register u64 __offset asm("3") = offset; - u8 cc; - - asm volatile ( - " .insn rre,0xb9d00000,%[data],%[req]\n" - " ipm %[cc]\n" - " srl %[cc],28\n" - : [cc] "=d" (cc), [req] "+d" (__req) - : "d" (__offset), [data] "d" (data) - : "cc"); - *status = __req >> 24 & 0xff; - return cc; -} - -static inline int pcistg_instr(u64 data, u64 req, u64 offset) -{ - u8 cc, status; - - do { - cc = __pcistg(data, req, offset, &status); - if (cc == 2) - udelay(ZPCI_INSN_BUSY_DELAY); - } while (cc == 2); - - if (cc) - printk_once(KERN_ERR "%s: error cc: %d status: %d req: %Lx offset: %Lx\n", - __func__, cc, status, req, offset); - return (cc) ? -EIO : 0; -} - -/* PCI Store Block */ -static inline u8 __pcistb(const u64 *data, u64 req, u64 offset, u8 *status) -{ - u8 cc; - - asm volatile ( - " .insn rsy,0xeb00000000d0,%[req],%[offset],%[data]\n" - " ipm %[cc]\n" - " srl %[cc],28\n" - : [cc] "=d" (cc), [req] "+d" (req) - : [offset] "d" (offset), [data] "Q" (*data) - : "cc"); - *status = req >> 24 & 0xff; - return cc; -} - -static inline int pcistb_instr(const u64 *data, u64 req, u64 offset) -{ - u8 cc, status; - - do { - cc = __pcistb(data, req, offset, &status); - if (cc == 2) - udelay(ZPCI_INSN_BUSY_DELAY); - } while (cc == 2); - if (cc) - printk_once(KERN_ERR "%s: error cc: %d status: %d req: %Lx offset: %Lx\n", - __func__, cc, status, req, offset); - return (cc) ? -EIO : 0; -} +int s390pci_mod_fc(u64 req, struct zpci_fib *fib); +int s390pci_refresh_trans(u64 fn, u64 addr, u64 range); +int s390pci_load(u64 *data, u64 req, u64 offset); +int s390pci_store(u64 data, u64 req, u64 offset); +int s390pci_store_block(const u64 *data, u64 req, u64 offset); +void set_irq_ctrl(u16 ctl, char *unused, u8 isc); #endif diff --git a/arch/s390/include/asm/pci_io.h b/arch/s390/include/asm/pci_io.h index 5fd81f31d6c7..83a9caa6ae53 100644 --- a/arch/s390/include/asm/pci_io.h +++ b/arch/s390/include/asm/pci_io.h @@ -36,7 +36,7 @@ static inline RETTYPE zpci_read_##RETTYPE(const volatile void __iomem *addr) \ u64 data; \ int rc; \ \ - rc = pcilg_instr(&data, req, ZPCI_OFFSET(addr)); \ + rc = s390pci_load(&data, req, ZPCI_OFFSET(addr)); \ if (rc) \ data = -1ULL; \ return (RETTYPE) data; \ @@ -50,7 +50,7 @@ static inline void zpci_write_##VALTYPE(VALTYPE val, \ u64 req = ZPCI_CREATE_REQ(entry->fh, entry->bar, LENGTH); \ u64 data = (VALTYPE) val; \ \ - pcistg_instr(data, req, ZPCI_OFFSET(addr)); \ + s390pci_store(data, req, ZPCI_OFFSET(addr)); \ } zpci_read(8, u64) @@ -83,15 +83,18 @@ static inline int zpci_write_single(u64 req, const u64 *data, u64 offset, u8 len val = 0; /* let FW report error */ break; } - return pcistg_instr(val, req, offset); + return s390pci_store(val, req, offset); } static inline int zpci_read_single(u64 req, u64 *dst, u64 offset, u8 len) { u64 data; - u8 cc; + int cc; + + cc = s390pci_load(&data, req, offset); + if (cc) + goto out; - cc = pcilg_instr(&data, req, offset); switch (len) { case 1: *((u8 *) dst) = (u8) data; @@ -106,12 +109,13 @@ static inline int zpci_read_single(u64 req, u64 *dst, u64 offset, u8 len) *((u64 *) dst) = (u64) data; break; } +out: return cc; } static inline int zpci_write_block(u64 req, const u64 *data, u64 offset) { - return pcistb_instr(data, req, offset); + return s390pci_store_block(data, req, offset); } static inline u8 zpci_get_max_write_size(u64 src, u64 dst, int len, int max) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 4a2930844d43..4105b8221fdd 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -57,6 +57,10 @@ extern unsigned long zero_page_mask; (((unsigned long)(vaddr)) &zero_page_mask)))) #define __HAVE_COLOR_ZERO_PAGE +/* TODO: s390 cannot support io_remap_pfn_range... */ +#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ + remap_pfn_range(vma, vaddr, pfn, size, prot) + #endif /* !__ASSEMBLY__ */ /* @@ -302,6 +306,7 @@ extern unsigned long MODULES_END; #define RCP_HC_BIT 0x00200000UL #define RCP_GR_BIT 0x00040000UL #define RCP_GC_BIT 0x00020000UL +#define RCP_IN_BIT 0x00008000UL /* IPTE notify bit */ /* User dirty / referenced bit for KVM's migration feature */ #define KVM_UR_BIT 0x00008000UL @@ -344,6 +349,7 @@ extern unsigned long MODULES_END; #define _REGION3_ENTRY_CO 0x100 /* change-recording override */ /* Bits in the segment table entry */ +#define _SEGMENT_ENTRY_ORIGIN_LARGE ~0xfffffUL /* large page address */ #define _SEGMENT_ENTRY_ORIGIN ~0x7ffUL/* segment table origin */ #define _SEGMENT_ENTRY_RO 0x200 /* page protection bit */ #define _SEGMENT_ENTRY_INV 0x20 /* invalid segment table entry */ @@ -368,6 +374,7 @@ extern unsigned long MODULES_END; #define RCP_HC_BIT 0x0020000000000000UL #define RCP_GR_BIT 0x0004000000000000UL #define RCP_GC_BIT 0x0002000000000000UL +#define RCP_IN_BIT 0x0000800000000000UL /* IPTE notify bit */ /* User dirty / referenced bit for KVM's migration feature */ #define KVM_UR_BIT 0x0000800000000000UL @@ -419,6 +426,13 @@ extern unsigned long MODULES_END; #define __S110 PAGE_RW #define __S111 PAGE_RW +/* + * Segment entry (large page) protection definitions. + */ +#define SEGMENT_NONE __pgprot(_HPAGE_TYPE_NONE) +#define SEGMENT_RO __pgprot(_HPAGE_TYPE_RO) +#define SEGMENT_RW __pgprot(_HPAGE_TYPE_RW) + static inline int mm_exclusive(struct mm_struct *mm) { return likely(mm == current->active_mm && @@ -734,35 +748,67 @@ struct gmap { /** * struct gmap_rmap - reverse mapping for segment table entries - * @next: pointer to the next gmap_rmap structure in the list + * @gmap: pointer to the gmap_struct * @entry: pointer to a segment table entry + * @vmaddr: virtual address in the guest address space */ struct gmap_rmap { struct list_head list; + struct gmap *gmap; unsigned long *entry; + unsigned long vmaddr; }; /** * struct gmap_pgtable - gmap information attached to a page table * @vmaddr: address of the 1MB segment in the process virtual memory - * @mapper: list of segment table entries maping a page table + * @mapper: list of segment table entries mapping a page table */ struct gmap_pgtable { unsigned long vmaddr; struct list_head mapper; }; +/** + * struct gmap_notifier - notify function block for page invalidation + * @notifier_call: address of callback function + */ +struct gmap_notifier { + struct list_head list; + void (*notifier_call)(struct gmap *gmap, unsigned long address); +}; + struct gmap *gmap_alloc(struct mm_struct *mm); void gmap_free(struct gmap *gmap); void gmap_enable(struct gmap *gmap); void gmap_disable(struct gmap *gmap); int gmap_map_segment(struct gmap *gmap, unsigned long from, - unsigned long to, unsigned long length); + unsigned long to, unsigned long len); int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len); +unsigned long __gmap_translate(unsigned long address, struct gmap *); +unsigned long gmap_translate(unsigned long address, struct gmap *); unsigned long __gmap_fault(unsigned long address, struct gmap *); unsigned long gmap_fault(unsigned long address, struct gmap *); void gmap_discard(unsigned long from, unsigned long to, struct gmap *); +void gmap_register_ipte_notifier(struct gmap_notifier *); +void gmap_unregister_ipte_notifier(struct gmap_notifier *); +int gmap_ipte_notify(struct gmap *, unsigned long start, unsigned long len); +void gmap_do_ipte_notify(struct mm_struct *, unsigned long addr, pte_t *); + +static inline pgste_t pgste_ipte_notify(struct mm_struct *mm, + unsigned long addr, + pte_t *ptep, pgste_t pgste) +{ +#ifdef CONFIG_PGSTE + if (pgste_val(pgste) & RCP_IN_BIT) { + pgste_val(pgste) &= ~RCP_IN_BIT; + gmap_do_ipte_notify(mm, addr, ptep); + } +#endif + return pgste; +} + /* * Certain architectures need to do special things when PTEs * within a page table are directly modified. Thus, the following @@ -907,26 +953,6 @@ static inline pte_t pte_mkspecial(pte_t pte) #ifdef CONFIG_HUGETLB_PAGE static inline pte_t pte_mkhuge(pte_t pte) { - /* - * PROT_NONE needs to be remapped from the pte type to the ste type. - * The HW invalid bit is also different for pte and ste. The pte - * invalid bit happens to be the same as the ste _SEGMENT_ENTRY_LARGE - * bit, so we don't have to clear it. - */ - if (pte_val(pte) & _PAGE_INVALID) { - if (pte_val(pte) & _PAGE_SWT) - pte_val(pte) |= _HPAGE_TYPE_NONE; - pte_val(pte) |= _SEGMENT_ENTRY_INV; - } - /* - * Clear SW pte bits, there are no SW bits in a segment table entry. - */ - pte_val(pte) &= ~(_PAGE_SWT | _PAGE_SWX | _PAGE_SWC | - _PAGE_SWR | _PAGE_SWW); - /* - * Also set the change-override bit because we don't need dirty bit - * tracking for hugetlbfs pages. - */ pte_val(pte) |= (_SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_CO); return pte; } @@ -1038,8 +1064,10 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, pte_t pte; mm->context.flush_mm = 1; - if (mm_has_pgste(mm)) + if (mm_has_pgste(mm)) { pgste = pgste_get_lock(ptep); + pgste = pgste_ipte_notify(mm, address, ptep, pgste); + } pte = *ptep; if (!mm_exclusive(mm)) @@ -1058,11 +1086,14 @@ static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long address, pte_t *ptep) { + pgste_t pgste; pte_t pte; mm->context.flush_mm = 1; - if (mm_has_pgste(mm)) - pgste_get_lock(ptep); + if (mm_has_pgste(mm)) { + pgste = pgste_get_lock(ptep); + pgste_ipte_notify(mm, address, ptep, pgste); + } pte = *ptep; if (!mm_exclusive(mm)) @@ -1088,8 +1119,10 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma, pgste_t pgste; pte_t pte; - if (mm_has_pgste(vma->vm_mm)) + if (mm_has_pgste(vma->vm_mm)) { pgste = pgste_get_lock(ptep); + pgste = pgste_ipte_notify(vma->vm_mm, address, ptep, pgste); + } pte = *ptep; __ptep_ipte(address, ptep); @@ -1117,8 +1150,11 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, pgste_t pgste; pte_t pte; - if (mm_has_pgste(mm)) + if (mm_has_pgste(mm)) { pgste = pgste_get_lock(ptep); + if (!full) + pgste = pgste_ipte_notify(mm, address, ptep, pgste); + } pte = *ptep; if (!full) @@ -1141,8 +1177,10 @@ static inline pte_t ptep_set_wrprotect(struct mm_struct *mm, if (pte_write(pte)) { mm->context.flush_mm = 1; - if (mm_has_pgste(mm)) + if (mm_has_pgste(mm)) { pgste = pgste_get_lock(ptep); + pgste = pgste_ipte_notify(mm, address, ptep, pgste); + } if (!mm_exclusive(mm)) __ptep_ipte(address, ptep); @@ -1166,8 +1204,10 @@ static inline int ptep_set_access_flags(struct vm_area_struct *vma, if (pte_same(*ptep, entry)) return 0; - if (mm_has_pgste(vma->vm_mm)) + if (mm_has_pgste(vma->vm_mm)) { pgste = pgste_get_lock(ptep); + pgste = pgste_ipte_notify(vma->vm_mm, address, ptep, pgste); + } __ptep_ipte(address, ptep); @@ -1271,31 +1311,7 @@ static inline void __pmd_idte(unsigned long address, pmd_t *pmdp) } } -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - -#define SEGMENT_NONE __pgprot(_HPAGE_TYPE_NONE) -#define SEGMENT_RO __pgprot(_HPAGE_TYPE_RO) -#define SEGMENT_RW __pgprot(_HPAGE_TYPE_RW) - -#define __HAVE_ARCH_PGTABLE_DEPOSIT -extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable); - -#define __HAVE_ARCH_PGTABLE_WITHDRAW -extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm); - -static inline int pmd_trans_splitting(pmd_t pmd) -{ - return pmd_val(pmd) & _SEGMENT_ENTRY_SPLIT; -} - -static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, - pmd_t *pmdp, pmd_t entry) -{ - if (!(pmd_val(entry) & _SEGMENT_ENTRY_INV) && MACHINE_HAS_EDAT1) - pmd_val(entry) |= _SEGMENT_ENTRY_CO; - *pmdp = entry; -} - +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE) static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot) { /* @@ -1316,10 +1332,11 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) return pmd; } -static inline pmd_t pmd_mkhuge(pmd_t pmd) +static inline pmd_t mk_pmd_phys(unsigned long physpage, pgprot_t pgprot) { - pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE; - return pmd; + pmd_t __pmd; + pmd_val(__pmd) = physpage + massage_pgprot_pmd(pgprot); + return __pmd; } static inline pmd_t pmd_mkwrite(pmd_t pmd) @@ -1329,6 +1346,34 @@ static inline pmd_t pmd_mkwrite(pmd_t pmd) pmd_val(pmd) &= ~_SEGMENT_ENTRY_RO; return pmd; } +#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLB_PAGE */ + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + +#define __HAVE_ARCH_PGTABLE_DEPOSIT +extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable); + +#define __HAVE_ARCH_PGTABLE_WITHDRAW +extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm); + +static inline int pmd_trans_splitting(pmd_t pmd) +{ + return pmd_val(pmd) & _SEGMENT_ENTRY_SPLIT; +} + +static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp, pmd_t entry) +{ + if (!(pmd_val(entry) & _SEGMENT_ENTRY_INV) && MACHINE_HAS_EDAT1) + pmd_val(entry) |= _SEGMENT_ENTRY_CO; + *pmdp = entry; +} + +static inline pmd_t pmd_mkhuge(pmd_t pmd) +{ + pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE; + return pmd; +} static inline pmd_t pmd_wrprotect(pmd_t pmd) { @@ -1425,13 +1470,6 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, } } -static inline pmd_t mk_pmd_phys(unsigned long physpage, pgprot_t pgprot) -{ - pmd_t __pmd; - pmd_val(__pmd) = physpage + massage_pgprot_pmd(pgprot); - return __pmd; -} - #define pfn_pmd(pfn, pgprot) mk_pmd_phys(__pa((pfn) << PAGE_SHIFT), (pgprot)) #define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot)) @@ -1531,7 +1569,8 @@ extern int s390_enable_sie(void); /* * No page table caches to initialise */ -#define pgtable_cache_init() do { } while (0) +static inline void pgtable_cache_init(void) { } +static inline void check_pgt_cache(void) { } #include <asm-generic/pgtable.h> diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 94e749c90230..6b499870662f 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -161,7 +161,8 @@ extern unsigned long thread_saved_pc(struct task_struct *t); extern void show_code(struct pt_regs *regs); extern void print_fn_code(unsigned char *code, unsigned long len); -extern int insn_to_mnemonic(unsigned char *instruction, char buf[8]); +extern int insn_to_mnemonic(unsigned char *instruction, char *buf, + unsigned int len); unsigned long get_wchan(struct task_struct *p); #define task_pt_regs(tsk) ((struct pt_regs *) \ diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index 3ee5da3bc10c..559512a455da 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -9,9 +9,7 @@ #include <uapi/asm/ptrace.h> #ifndef __ASSEMBLY__ -#ifndef __s390x__ -#else /* __s390x__ */ -#endif /* __s390x__ */ + extern long psw_kernel_bits; extern long psw_user_bits; @@ -77,8 +75,6 @@ struct per_struct_kernel { #define PER_CONTROL_SUSPENSION 0x00400000UL #define PER_CONTROL_ALTERATION 0x00200000UL -#ifdef __s390x__ -#endif /* __s390x__ */ /* * These are defined as per linux/ptrace.h, which see. */ diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index ff67d730c00c..59880dbaf360 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -33,8 +33,6 @@ #define CHUNK_READ_WRITE 0 #define CHUNK_READ_ONLY 1 -#define CHUNK_OLDMEM 4 -#define CHUNK_CRASHK 5 struct mem_chunk { unsigned long addr; @@ -43,13 +41,12 @@ struct mem_chunk { }; extern struct mem_chunk memory_chunk[]; -extern unsigned long real_memory_size; extern int memory_end_set; extern unsigned long memory_end; -void detect_memory_layout(struct mem_chunk chunk[]); -void create_mem_hole(struct mem_chunk memory_chunk[], unsigned long addr, - unsigned long size, int type); +void detect_memory_layout(struct mem_chunk chunk[], unsigned long maxsize); +void create_mem_hole(struct mem_chunk mem_chunk[], unsigned long addr, + unsigned long size); #define PRIMARY_SPACE_MODE 0 #define ACCESS_REGISTER_MODE 1 diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h index fe7b99759e12..cd29d2f4e4f3 100644 --- a/arch/s390/include/asm/syscall.h +++ b/arch/s390/include/asm/syscall.h @@ -23,6 +23,7 @@ * type here is what we want [need] for both 32 bit and 64 bit systems. */ extern const unsigned int sys_call_table[]; +extern const unsigned int sys_call_table_emu[]; static inline long syscall_get_nr(struct task_struct *task, struct pt_regs *regs) diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index 9e2cfe0349c3..eb5f64d26d06 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -14,13 +14,8 @@ #define THREAD_ORDER 1 #define ASYNC_ORDER 1 #else /* CONFIG_64BIT */ -#ifndef __SMALL_STACK #define THREAD_ORDER 2 #define ASYNC_ORDER 2 -#else -#define THREAD_ORDER 1 -#define ASYNC_ORDER 1 -#endif #endif /* CONFIG_64BIT */ #define THREAD_SIZE (PAGE_SIZE << THREAD_ORDER) @@ -41,6 +36,7 @@ struct thread_info { struct task_struct *task; /* main task structure */ struct exec_domain *exec_domain; /* execution domain */ unsigned long flags; /* low level flags */ + unsigned long sys_call_table; /* System call table address */ unsigned int cpu; /* current CPU */ int preempt_count; /* 0 => preemptable, <0 => BUG */ struct restart_block restart_block; diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h index a6667a952969..651886353551 100644 --- a/arch/s390/include/asm/unistd.h +++ b/arch/s390/include/asm/unistd.h @@ -54,12 +54,4 @@ #define __ARCH_WANT_SYS_VFORK #define __ARCH_WANT_SYS_CLONE -/* - * "Conditional" syscalls - * - * What we want is __attribute__((weak,alias("sys_ni_syscall"))), - * but it doesn't work on all toolchains, so we just do it by hand - */ -#define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall") - #endif /* _ASM_S390_UNISTD_H_ */ diff --git a/arch/s390/include/uapi/asm/Kbuild b/arch/s390/include/uapi/asm/Kbuild index 7bf68fff7c5d..9ccd1905bdad 100644 --- a/arch/s390/include/uapi/asm/Kbuild +++ b/arch/s390/include/uapi/asm/Kbuild @@ -44,5 +44,6 @@ header-y += termios.h header-y += types.h header-y += ucontext.h header-y += unistd.h +header-y += virtio-ccw.h header-y += vtoc.h header-y += zcrypt.h diff --git a/arch/s390/include/uapi/asm/ptrace.h b/arch/s390/include/uapi/asm/ptrace.h index a5ca214b34fd..3aa9f1ec5b29 100644 --- a/arch/s390/include/uapi/asm/ptrace.h +++ b/arch/s390/include/uapi/asm/ptrace.h @@ -215,12 +215,6 @@ typedef struct unsigned long addr; } __attribute__ ((aligned(8))) psw_t; -typedef struct -{ - __u32 mask; - __u32 addr; -} __attribute__ ((aligned(8))) psw_compat_t; - #ifndef __s390x__ #define PSW_MASK_PER 0x40000000UL @@ -295,20 +289,6 @@ typedef struct unsigned long orig_gpr2; } s390_regs; -typedef struct -{ - psw_compat_t psw; - __u32 gprs[NUM_GPRS]; - __u32 acrs[NUM_ACRS]; - __u32 orig_gpr2; -} s390_compat_regs; - -typedef struct -{ - __u32 gprs_high[NUM_GPRS]; -} s390_compat_regs_high; - - /* * Now for the user space program event recording (trace) definitions. * The following structures are used only for the ptrace interface, don't diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h index f99eea7fff0f..2dacb306835c 100644 --- a/arch/s390/include/uapi/asm/socket.h +++ b/arch/s390/include/uapi/asm/socket.h @@ -78,4 +78,6 @@ #define SO_LOCK_FILTER 44 +#define SO_SELECT_ERR_QUEUE 45 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/s390/include/uapi/asm/statfs.h b/arch/s390/include/uapi/asm/statfs.h index 5acca0a34c20..a61d538756f2 100644 --- a/arch/s390/include/uapi/asm/statfs.h +++ b/arch/s390/include/uapi/asm/statfs.h @@ -7,9 +7,6 @@ #ifndef _S390_STATFS_H #define _S390_STATFS_H -#ifndef __s390x__ -#include <asm-generic/statfs.h> -#else /* * We can't use <asm-generic/statfs.h> because in 64-bit mode * we mix ints of different sizes in our struct statfs. @@ -21,49 +18,33 @@ typedef __kernel_fsid_t fsid_t; #endif struct statfs { - int f_type; - int f_bsize; - long f_blocks; - long f_bfree; - long f_bavail; - long f_files; - long f_ffree; + unsigned int f_type; + unsigned int f_bsize; + unsigned long f_blocks; + unsigned long f_bfree; + unsigned long f_bavail; + unsigned long f_files; + unsigned long f_ffree; __kernel_fsid_t f_fsid; - int f_namelen; - int f_frsize; - int f_flags; - int f_spare[4]; + unsigned int f_namelen; + unsigned int f_frsize; + unsigned int f_flags; + unsigned int f_spare[4]; }; struct statfs64 { - int f_type; - int f_bsize; - long f_blocks; - long f_bfree; - long f_bavail; - long f_files; - long f_ffree; + unsigned int f_type; + unsigned int f_bsize; + unsigned long f_blocks; + unsigned long f_bfree; + unsigned long f_bavail; + unsigned long f_files; + unsigned long f_ffree; __kernel_fsid_t f_fsid; - int f_namelen; - int f_frsize; - int f_flags; - int f_spare[4]; + unsigned int f_namelen; + unsigned int f_frsize; + unsigned int f_flags; + unsigned int f_spare[4]; }; -struct compat_statfs64 { - __u32 f_type; - __u32 f_bsize; - __u64 f_blocks; - __u64 f_bfree; - __u64 f_bavail; - __u64 f_files; - __u64 f_ffree; - __kernel_fsid_t f_fsid; - __u32 f_namelen; - __u32 f_frsize; - __u32 f_flags; - __u32 f_spare[4]; -}; - -#endif /* __s390x__ */ #endif diff --git a/arch/s390/include/uapi/asm/virtio-ccw.h b/arch/s390/include/uapi/asm/virtio-ccw.h new file mode 100644 index 000000000000..a9a4ebf79fa7 --- /dev/null +++ b/arch/s390/include/uapi/asm/virtio-ccw.h @@ -0,0 +1,21 @@ +/* + * Definitions for virtio-ccw devices. + * + * Copyright IBM Corp. 2013 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com> + */ +#ifndef __KVM_VIRTIO_CCW_H +#define __KVM_VIRTIO_CCW_H + +/* Alignment of vring buffers. */ +#define KVM_VIRTIO_CCW_RING_ALIGN 4096 + +/* Subcode for diagnose 500 (virtio hypercall). */ +#define KVM_S390_VIRTIO_CCW_NOTIFY 3 + +#endif diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 2ac311ef5c9b..4bb2a4656163 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -14,16 +14,25 @@ endif CFLAGS_smp.o := -Wno-nonnull # +# Disable tailcall optimizations for stack / callchain walking functions +# since this might generate broken code when accessing register 15 and +# passing its content to other functions. +# +CFLAGS_stacktrace.o += -fno-optimize-sibling-calls +CFLAGS_dumpstack.o += -fno-optimize-sibling-calls + +# # Pass UTS_MACHINE for user_regset definition # CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' CFLAGS_sysinfo.o += -Iinclude/math-emu -Iarch/s390/math-emu -w -obj-y := bitmap.o traps.o time.o process.o base.o early.o setup.o vtime.o \ - processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o \ - debug.o irq.o ipl.o dis.o diag.o mem_detect.o sclp.o vdso.o \ - sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o +obj-y := bitmap.o traps.o time.o process.o base.o early.o setup.o vtime.o +obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o +obj-y += debug.o irq.o ipl.o dis.o diag.o sclp.o vdso.o +obj-y += sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o +obj-y += dumpstack.o obj-y += $(if $(CONFIG_64BIT),entry64.o,entry.o) obj-y += $(if $(CONFIG_64BIT),reipl64.o,reipl.o) diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index fface87056eb..7a82f9f70100 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -35,6 +35,7 @@ int main(void) DEFINE(__TI_task, offsetof(struct thread_info, task)); DEFINE(__TI_domain, offsetof(struct thread_info, exec_domain)); DEFINE(__TI_flags, offsetof(struct thread_info, flags)); + DEFINE(__TI_sysc_table, offsetof(struct thread_info, sys_call_table)); DEFINE(__TI_cpu, offsetof(struct thread_info, cpu)); DEFINE(__TI_precount, offsetof(struct thread_info, preempt_count)); DEFINE(__TI_user_timer, offsetof(struct thread_info, user_timer)); diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index 19f26de27fae..8b6e4f5288a2 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -288,51 +288,13 @@ asmlinkage long sys32_getegid16(void) return high2lowgid(from_kgid_munged(current_user_ns(), current_egid())); } -/* - * sys32_ipc() is the de-multiplexer for the SysV IPC calls in 32bit emulation. - * - * This is really horribly ugly. - */ #ifdef CONFIG_SYSVIPC -asmlinkage long sys32_ipc(u32 call, int first, int second, int third, u32 ptr) +COMPAT_SYSCALL_DEFINE5(s390_ipc, uint, call, int, first, unsigned long, second, + unsigned long, third, compat_uptr_t, ptr) { if (call >> 16) /* hack for backward compatibility */ return -EINVAL; - switch (call) { - case SEMTIMEDOP: - return compat_sys_semtimedop(first, compat_ptr(ptr), - second, compat_ptr(third)); - case SEMOP: - /* struct sembuf is the same on 32 and 64bit :)) */ - return sys_semtimedop(first, compat_ptr(ptr), - second, NULL); - case SEMGET: - return sys_semget(first, second, third); - case SEMCTL: - return compat_sys_semctl(first, second, third, - compat_ptr(ptr)); - case MSGSND: - return compat_sys_msgsnd(first, second, third, - compat_ptr(ptr)); - case MSGRCV: - return compat_sys_msgrcv(first, second, 0, third, - 0, compat_ptr(ptr)); - case MSGGET: - return sys_msgget((key_t) first, second); - case MSGCTL: - return compat_sys_msgctl(first, second, compat_ptr(ptr)); - case SHMAT: - return compat_sys_shmat(first, second, third, - 0, compat_ptr(ptr)); - case SHMDT: - return sys_shmdt(compat_ptr(ptr)); - case SHMGET: - return sys_shmget(first, (unsigned)second, third); - case SHMCTL: - return compat_sys_shmctl(first, second, compat_ptr(ptr)); - } - - return -ENOSYS; + return compat_sys_ipc(call, first, second, third, ptr, third); } #endif @@ -373,48 +335,6 @@ asmlinkage compat_ssize_t sys32_readahead(int fd, u32 offhi, u32 offlo, s32 coun return sys_readahead(fd, ((loff_t)AA(offhi) << 32) | AA(offlo), count); } -asmlinkage long sys32_sendfile(int out_fd, int in_fd, compat_off_t __user *offset, size_t count) -{ - mm_segment_t old_fs = get_fs(); - int ret; - off_t of; - - if (offset && get_user(of, offset)) - return -EFAULT; - - set_fs(KERNEL_DS); - ret = sys_sendfile(out_fd, in_fd, - offset ? (off_t __force __user *) &of : NULL, count); - set_fs(old_fs); - - if (offset && put_user(of, offset)) - return -EFAULT; - - return ret; -} - -asmlinkage long sys32_sendfile64(int out_fd, int in_fd, - compat_loff_t __user *offset, s32 count) -{ - mm_segment_t old_fs = get_fs(); - int ret; - loff_t lof; - - if (offset && get_user(lof, offset)) - return -EFAULT; - - set_fs(KERNEL_DS); - ret = sys_sendfile64(out_fd, in_fd, - offset ? (loff_t __force __user *) &lof : NULL, - count); - set_fs(old_fs); - - if (offset && put_user(lof, offset)) - return -EFAULT; - - return ret; -} - struct stat64_emu31 { unsigned long long st_dev; unsigned int __pad1; diff --git a/arch/s390/kernel/compat_linux.h b/arch/s390/kernel/compat_linux.h index 00d92a5a6f6c..976518c0592a 100644 --- a/arch/s390/kernel/compat_linux.h +++ b/arch/s390/kernel/compat_linux.h @@ -94,7 +94,6 @@ long sys32_getuid16(void); long sys32_geteuid16(void); long sys32_getgid16(void); long sys32_getegid16(void); -long sys32_ipc(u32 call, int first, int second, int third, u32 ptr); long sys32_truncate64(const char __user * path, unsigned long high, unsigned long low); long sys32_ftruncate64(unsigned int fd, unsigned long high, unsigned long low); @@ -106,10 +105,6 @@ long sys32_pread64(unsigned int fd, char __user *ubuf, size_t count, long sys32_pwrite64(unsigned int fd, const char __user *ubuf, size_t count, u32 poshi, u32 poslo); compat_ssize_t sys32_readahead(int fd, u32 offhi, u32 offlo, s32 count); -long sys32_sendfile(int out_fd, int in_fd, compat_off_t __user *offset, - size_t count); -long sys32_sendfile64(int out_fd, int in_fd, compat_loff_t __user *offset, - s32 count); long sys32_stat64(const char __user * filename, struct stat64_emu31 __user * statbuf); long sys32_lstat64(const char __user * filename, struct stat64_emu31 __user * statbuf); diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index 6de049fbe62d..c439ac9ced09 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -362,6 +362,7 @@ static int setup_frame32(int sig, struct k_sigaction *ka, /* set extra registers only for synchronous signals */ regs->gprs[4] = regs->int_code & 127; regs->gprs[5] = regs->int_parm_long; + regs->gprs[6] = task_thread_info(current)->last_break; } /* Place signal number on stack to allow backtrace from handler. */ @@ -421,6 +422,7 @@ static int setup_rt_frame32(int sig, struct k_sigaction *ka, siginfo_t *info, regs->gprs[2] = map_signal(sig); regs->gprs[3] = (__force __u64) &frame->info; regs->gprs[4] = (__force __u64) &frame->uc; + regs->gprs[5] = task_thread_info(current)->last_break; return 0; give_sigsegv: diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index 3c98c4dc5aca..2d72d9e96c15 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -258,11 +258,6 @@ ENTRY(sys32_mmap2_wrapper) llgtr %r2,%r2 # struct mmap_arg_struct_emu31 * jg sys32_mmap2 # branch to system call -ENTRY(compat_sys_getrusage_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # struct rusage_emu31 * - jg compat_sys_getrusage # branch to system call - ENTRY(compat_sys_gettimeofday_wrapper) llgtr %r2,%r2 # struct timeval_emu31 * llgtr %r3,%r3 # struct timezone * @@ -393,14 +388,6 @@ ENTRY(compat_sys_sysinfo_wrapper) llgtr %r2,%r2 # struct sysinfo_emu31 * jg compat_sys_sysinfo # branch to system call -ENTRY(sys32_ipc_wrapper) - llgfr %r2,%r2 # uint - lgfr %r3,%r3 # int - lgfr %r4,%r4 # int - lgfr %r5,%r5 # int - llgfr %r6,%r6 # u32 - jg sys32_ipc # branch to system call - ENTRY(sys32_fsync_wrapper) llgfr %r2,%r2 # unsigned int jg sys_fsync # branch to system call @@ -666,13 +653,6 @@ ENTRY(sys32_capset_wrapper) llgtr %r3,%r3 # const cap_user_data_t jg sys_capset # branch to system call -ENTRY(sys32_sendfile_wrapper) - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - llgtr %r4,%r4 # __kernel_off_emu31_t * - llgfr %r5,%r5 # size_t - jg sys32_sendfile # branch to system call - #sys32_vfork_wrapper # done in vfork_glue ENTRY(sys32_truncate64_wrapper) @@ -938,13 +918,6 @@ ENTRY(sys_epoll_wait_wrapper) lgfr %r5,%r5 # int jg sys_epoll_wait # branch to system call -ENTRY(sys32_lookup_dcookie_wrapper) - sllg %r2,%r2,32 # get high word of 64bit dcookie - or %r2,%r3 # get low word of 64bit dcookie - llgtr %r3,%r4 # char * - llgfr %r4,%r5 # size_t - jg sys_lookup_dcookie - ENTRY(sys32_fadvise64_wrapper) lgfr %r2,%r2 # int sllg %r3,%r3,32 # get high word of 64bit loff_t @@ -1264,29 +1237,12 @@ ENTRY(sys_tee_wrapper) llgfr %r5,%r5 # unsigned int jg sys_tee -ENTRY(compat_sys_vmsplice_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # compat_iovec * - llgfr %r4,%r4 # unsigned int - llgfr %r5,%r5 # unsigned int - jg compat_sys_vmsplice - ENTRY(sys_getcpu_wrapper) llgtr %r2,%r2 # unsigned * llgtr %r3,%r3 # unsigned * llgtr %r4,%r4 # struct getcpu_cache * jg sys_getcpu -ENTRY(compat_sys_epoll_pwait_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # struct compat_epoll_event * - lgfr %r4,%r4 # int - lgfr %r5,%r5 # int - llgtr %r6,%r6 # compat_sigset_t * - llgf %r0,164(%r15) # compat_size_t - stg %r0,160(%r15) - jg compat_sys_epoll_pwait - ENTRY(compat_sys_utimes_wrapper) llgtr %r2,%r2 # char * llgtr %r3,%r3 # struct compat_timeval * @@ -1299,12 +1255,6 @@ ENTRY(compat_sys_utimensat_wrapper) lgfr %r5,%r5 # int jg compat_sys_utimensat -ENTRY(compat_sys_signalfd_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # compat_sigset_t * - llgfr %r4,%r4 # compat_size_t - jg compat_sys_signalfd - ENTRY(sys_eventfd_wrapper) llgfr %r2,%r2 # unsigned int jg sys_eventfd @@ -1323,13 +1273,6 @@ ENTRY(sys_timerfd_create_wrapper) lgfr %r3,%r3 # int jg sys_timerfd_create -ENTRY(compat_sys_signalfd4_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # compat_sigset_t * - llgfr %r4,%r4 # compat_size_t - lgfr %r5,%r5 # int - jg compat_sys_signalfd4 - ENTRY(sys_eventfd2_wrapper) llgfr %r2,%r2 # unsigned int lgfr %r3,%r3 # int @@ -1361,13 +1304,6 @@ ENTRY(sys32_readahead_wrapper) lgfr %r5,%r5 # s32 jg sys32_readahead # branch to system call -ENTRY(sys32_sendfile64_wrapper) - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - llgtr %r4,%r4 # compat_loff_t * - lgfr %r5,%r5 # s32 - jg sys32_sendfile64 # branch to system call - ENTRY(sys_tkill_wrapper) lgfr %r2,%r2 # pid_t lgfr %r3,%r3 # int @@ -1387,22 +1323,6 @@ ENTRY(compat_sys_keyctl_wrapper) llgfr %r6,%r6 # u32 jg compat_sys_keyctl # branch to system call -ENTRY(compat_sys_preadv_wrapper) - llgfr %r2,%r2 # unsigned long - llgtr %r3,%r3 # compat_iovec * - llgfr %r4,%r4 # unsigned long - llgfr %r5,%r5 # u32 - llgfr %r6,%r6 # u32 - jg compat_sys_preadv # branch to system call - -ENTRY(compat_sys_pwritev_wrapper) - llgfr %r2,%r2 # unsigned long - llgtr %r3,%r3 # compat_iovec * - llgfr %r4,%r4 # unsigned long - llgfr %r5,%r5 # u32 - llgfr %r6,%r6 # u32 - jg compat_sys_pwritev # branch to system call - ENTRY(sys_perf_event_open_wrapper) llgtr %r2,%r2 # const struct perf_event_attr * lgfr %r3,%r3 # pid_t diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index fb8d8781a011..f703d91bf720 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -88,8 +88,8 @@ static struct mem_chunk *get_memory_layout(void) struct mem_chunk *chunk_array; chunk_array = kzalloc_panic(MEMORY_CHUNKS * sizeof(struct mem_chunk)); - detect_memory_layout(chunk_array); - create_mem_hole(chunk_array, OLDMEM_BASE, OLDMEM_SIZE, CHUNK_CRASHK); + detect_memory_layout(chunk_array, 0); + create_mem_hole(chunk_array, OLDMEM_BASE, OLDMEM_SIZE); return chunk_array; } @@ -344,7 +344,7 @@ static int loads_init(Elf64_Phdr *phdr, u64 loads_offset) for (i = 0; i < MEMORY_CHUNKS; i++) { mem_chunk = &chunk_array[i]; if (mem_chunk->size == 0) - break; + continue; if (chunk_array[i].type != CHUNK_READ_WRITE && chunk_array[i].type != CHUNK_READ_ONLY) continue; diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index 3ad5e9540160..7f4a4a8c847c 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -1696,14 +1696,15 @@ static struct insn *find_insn(unsigned char *code) * insn_to_mnemonic - decode an s390 instruction * @instruction: instruction to decode * @buf: buffer to fill with mnemonic + * @len: length of buffer * * Decode the instruction at @instruction and store the corresponding - * mnemonic into @buf. + * mnemonic into @buf of length @len. * @buf is left unchanged if the instruction could not be decoded. * Returns: * %0 on success, %-ENOENT if the instruction was not found. */ -int insn_to_mnemonic(unsigned char *instruction, char buf[8]) +int insn_to_mnemonic(unsigned char *instruction, char *buf, unsigned int len) { struct insn *insn; @@ -1711,10 +1712,10 @@ int insn_to_mnemonic(unsigned char *instruction, char buf[8]) if (!insn) return -ENOENT; if (insn->name[0] == '\0') - snprintf(buf, 8, "%s", + snprintf(buf, len, "%s", long_insn_name[(int) insn->name[1]]); else - snprintf(buf, 8, "%.5s", insn->name); + snprintf(buf, len, "%.5s", insn->name); return 0; } EXPORT_SYMBOL_GPL(insn_to_mnemonic); diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c new file mode 100644 index 000000000000..298297477257 --- /dev/null +++ b/arch/s390/kernel/dumpstack.c @@ -0,0 +1,212 @@ +/* + * Stack dumping functions + * + * Copyright IBM Corp. 1999, 2013 + */ + +#include <linux/kallsyms.h> +#include <linux/hardirq.h> +#include <linux/kprobes.h> +#include <linux/utsname.h> +#include <linux/export.h> +#include <linux/kdebug.h> +#include <linux/ptrace.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <asm/processor.h> +#include <asm/debug.h> +#include <asm/ipl.h> + +#ifndef CONFIG_64BIT +#define LONG "%08lx " +#define FOURLONG "%08lx %08lx %08lx %08lx\n" +static int kstack_depth_to_print = 12; +#else /* CONFIG_64BIT */ +#define LONG "%016lx " +#define FOURLONG "%016lx %016lx %016lx %016lx\n" +static int kstack_depth_to_print = 20; +#endif /* CONFIG_64BIT */ + +/* + * For show_trace we have tree different stack to consider: + * - the panic stack which is used if the kernel stack has overflown + * - the asynchronous interrupt stack (cpu related) + * - the synchronous kernel stack (process related) + * The stack trace can start at any of the three stack and can potentially + * touch all of them. The order is: panic stack, async stack, sync stack. + */ +static unsigned long +__show_trace(unsigned long sp, unsigned long low, unsigned long high) +{ + struct stack_frame *sf; + struct pt_regs *regs; + + while (1) { + sp = sp & PSW_ADDR_INSN; + if (sp < low || sp > high - sizeof(*sf)) + return sp; + sf = (struct stack_frame *) sp; + printk("([<%016lx>] ", sf->gprs[8] & PSW_ADDR_INSN); + print_symbol("%s)\n", sf->gprs[8] & PSW_ADDR_INSN); + /* Follow the backchain. */ + while (1) { + low = sp; + sp = sf->back_chain & PSW_ADDR_INSN; + if (!sp) + break; + if (sp <= low || sp > high - sizeof(*sf)) + return sp; + sf = (struct stack_frame *) sp; + printk(" [<%016lx>] ", sf->gprs[8] & PSW_ADDR_INSN); + print_symbol("%s\n", sf->gprs[8] & PSW_ADDR_INSN); + } + /* Zero backchain detected, check for interrupt frame. */ + sp = (unsigned long) (sf + 1); + if (sp <= low || sp > high - sizeof(*regs)) + return sp; + regs = (struct pt_regs *) sp; + printk(" [<%016lx>] ", regs->psw.addr & PSW_ADDR_INSN); + print_symbol("%s\n", regs->psw.addr & PSW_ADDR_INSN); + low = sp; + sp = regs->gprs[15]; + } +} + +static void show_trace(struct task_struct *task, unsigned long *stack) +{ + register unsigned long __r15 asm ("15"); + unsigned long sp; + + sp = (unsigned long) stack; + if (!sp) + sp = task ? task->thread.ksp : __r15; + printk("Call Trace:\n"); +#ifdef CONFIG_CHECK_STACK + sp = __show_trace(sp, S390_lowcore.panic_stack - 4096, + S390_lowcore.panic_stack); +#endif + sp = __show_trace(sp, S390_lowcore.async_stack - ASYNC_SIZE, + S390_lowcore.async_stack); + if (task) + __show_trace(sp, (unsigned long) task_stack_page(task), + (unsigned long) task_stack_page(task) + THREAD_SIZE); + else + __show_trace(sp, S390_lowcore.thread_info, + S390_lowcore.thread_info + THREAD_SIZE); + if (!task) + task = current; + debug_show_held_locks(task); +} + +void show_stack(struct task_struct *task, unsigned long *sp) +{ + register unsigned long *__r15 asm ("15"); + unsigned long *stack; + int i; + + if (!sp) + stack = task ? (unsigned long *) task->thread.ksp : __r15; + else + stack = sp; + + for (i = 0; i < kstack_depth_to_print; i++) { + if (((addr_t) stack & (THREAD_SIZE-1)) == 0) + break; + if ((i * sizeof(long) % 32) == 0) + printk("%s ", i == 0 ? "" : "\n"); + printk(LONG, *stack++); + } + printk("\n"); + show_trace(task, sp); +} + +static void show_last_breaking_event(struct pt_regs *regs) +{ +#ifdef CONFIG_64BIT + printk("Last Breaking-Event-Address:\n"); + printk(" [<%016lx>] ", regs->args[0] & PSW_ADDR_INSN); + print_symbol("%s\n", regs->args[0] & PSW_ADDR_INSN); +#endif +} + +static inline int mask_bits(struct pt_regs *regs, unsigned long bits) +{ + return (regs->psw.mask & bits) / ((~bits + 1) & bits); +} + +void show_registers(struct pt_regs *regs) +{ + char *mode; + + mode = user_mode(regs) ? "User" : "Krnl"; + printk("%s PSW : %p %p", + mode, (void *) regs->psw.mask, + (void *) regs->psw.addr); + print_symbol(" (%s)\n", regs->psw.addr & PSW_ADDR_INSN); + printk(" R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x " + "P:%x AS:%x CC:%x PM:%x", mask_bits(regs, PSW_MASK_PER), + mask_bits(regs, PSW_MASK_DAT), mask_bits(regs, PSW_MASK_IO), + mask_bits(regs, PSW_MASK_EXT), mask_bits(regs, PSW_MASK_KEY), + mask_bits(regs, PSW_MASK_MCHECK), mask_bits(regs, PSW_MASK_WAIT), + mask_bits(regs, PSW_MASK_PSTATE), mask_bits(regs, PSW_MASK_ASC), + mask_bits(regs, PSW_MASK_CC), mask_bits(regs, PSW_MASK_PM)); +#ifdef CONFIG_64BIT + printk(" EA:%x", mask_bits(regs, PSW_MASK_EA | PSW_MASK_BA)); +#endif + printk("\n%s GPRS: " FOURLONG, mode, + regs->gprs[0], regs->gprs[1], regs->gprs[2], regs->gprs[3]); + printk(" " FOURLONG, + regs->gprs[4], regs->gprs[5], regs->gprs[6], regs->gprs[7]); + printk(" " FOURLONG, + regs->gprs[8], regs->gprs[9], regs->gprs[10], regs->gprs[11]); + printk(" " FOURLONG, + regs->gprs[12], regs->gprs[13], regs->gprs[14], regs->gprs[15]); + show_code(regs); +} + +void show_regs(struct pt_regs *regs) +{ + show_regs_print_info(KERN_DEFAULT); + show_registers(regs); + /* Show stack backtrace if pt_regs is from kernel mode */ + if (!user_mode(regs)) + show_trace(NULL, (unsigned long *) regs->gprs[15]); + show_last_breaking_event(regs); +} + +static DEFINE_SPINLOCK(die_lock); + +void die(struct pt_regs *regs, const char *str) +{ + static int die_counter; + + oops_enter(); + lgr_info_log(); + debug_stop_all(); + console_verbose(); + spin_lock_irq(&die_lock); + bust_spinlocks(1); + printk("%s: %04x [#%d] ", str, regs->int_code & 0xffff, ++die_counter); +#ifdef CONFIG_PREEMPT + printk("PREEMPT "); +#endif +#ifdef CONFIG_SMP + printk("SMP "); +#endif +#ifdef CONFIG_DEBUG_PAGEALLOC + printk("DEBUG_PAGEALLOC"); +#endif + printk("\n"); + notify_die(DIE_OOPS, str, regs, 0, regs->int_code & 0xffff, SIGSEGV); + print_modules(); + show_regs(regs); + bust_spinlocks(0); + add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); + spin_unlock_irq(&die_lock); + if (in_interrupt()) + panic("Fatal exception in interrupt"); + if (panic_on_oops) + panic("Fatal exception: panic_on_oops"); + oops_exit(); + do_exit(SIGSEGV); +} diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index bda011e2f8ae..dc8770d7173c 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -482,7 +482,6 @@ void __init startup_init(void) detect_machine_facilities(); setup_topology(); sclp_facilities_detect(); - detect_memory_layout(memory_chunk); #ifdef CONFIG_DYNAMIC_FTRACE S390_lowcore.ftrace_func = (unsigned long)ftrace_caller; #endif diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 94feff7d6132..4d5e6f8a7978 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -45,6 +45,7 @@ _TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER STACK_SIZE = 1 << STACK_SHIFT +STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE #define BASED(name) name-system_call(%r13) @@ -97,10 +98,10 @@ STACK_SIZE = 1 << STACK_SHIFT sra %r14,\shift jnz 1f CHECK_STACK 1<<\shift,\savearea + ahi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) j 2f 1: l %r15,\stack # load target stack -2: ahi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) - la %r11,STACK_FRAME_OVERHEAD(%r15) +2: la %r11,STACK_FRAME_OVERHEAD(%r15) .endm .macro ADD64 high,low,timer @@ -150,7 +151,7 @@ ENTRY(__switch_to) l %r4,__THREAD_info(%r2) # get thread_info of prev l %r5,__THREAD_info(%r3) # get thread_info of next lr %r15,%r5 - ahi %r15,STACK_SIZE # end of kernel stack of next + ahi %r15,STACK_INIT # end of kernel stack of next st %r3,__LC_CURRENT # store task struct of next st %r5,__LC_THREAD_INFO # store thread info of next st %r15,__LC_KERNEL_STACK # store end of kernel stack @@ -178,7 +179,6 @@ sysc_stm: l %r13,__LC_SVC_NEW_PSW+4 sysc_per: l %r15,__LC_KERNEL_STACK - ahi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) la %r11,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs sysc_vtime: UPDATE_VTIME %r8,%r9,__LC_SYNC_ENTER_TIMER @@ -188,6 +188,7 @@ sysc_vtime: mvc __PT_INT_CODE(4,%r11),__LC_SVC_ILC sysc_do_svc: oi __TI_flags+3(%r12),_TIF_SYSCALL + l %r10,__TI_sysc_table(%r12) # 31 bit system call table lh %r8,__PT_INT_CODE+2(%r11) sla %r8,2 # shift and test for svc0 jnz sysc_nr_ok @@ -198,7 +199,6 @@ sysc_do_svc: lr %r8,%r1 sla %r8,2 sysc_nr_ok: - l %r10,BASED(.Lsys_call_table) # 31 bit system call table xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) st %r2,__PT_ORIG_GPR2(%r11) st %r7,STACK_FRAME_OVERHEAD(%r15) @@ -359,11 +359,11 @@ ENTRY(pgm_check_handler) tm __LC_PGM_ILC+3,0x80 # check for per exception jnz pgm_svcper # -> single stepped svc 0: CHECK_STACK STACK_SIZE,__LC_SAVE_AREA_SYNC + ahi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) j 2f 1: UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER l %r15,__LC_KERNEL_STACK -2: ahi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) - la %r11,STACK_FRAME_OVERHEAD(%r15) +2: la %r11,STACK_FRAME_OVERHEAD(%r15) stm %r0,%r7,__PT_R0(%r11) mvc __PT_R8(32,%r11),__LC_SAVE_AREA_SYNC stm %r8,%r9,__PT_PSW(%r11) @@ -485,7 +485,6 @@ io_work: # io_work_user: l %r1,__LC_KERNEL_STACK - ahi %r1,-(STACK_FRAME_OVERHEAD + __PT_SIZE) mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11) xc __SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1) la %r11,STACK_FRAME_OVERHEAD(%r1) @@ -646,7 +645,6 @@ mcck_skip: tm __PT_PSW+1(%r11),0x01 # returning to user ? jno mcck_return l %r1,__LC_KERNEL_STACK # switch to kernel stack - ahi %r1,-(STACK_FRAME_OVERHEAD + __PT_SIZE) mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11) xc __SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1) la %r11,STACK_FRAME_OVERHEAD(%r15) @@ -674,6 +672,7 @@ mcck_panic: sra %r14,PAGE_SHIFT jz 0f l %r15,__LC_PANIC_STACK + j mcck_skip 0: ahi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) j mcck_skip @@ -714,12 +713,10 @@ ENTRY(restart_int_handler) */ stack_overflow: l %r15,__LC_PANIC_STACK # change to panic stack - ahi %r15,-__PT_SIZE # create pt_regs - stm %r0,%r7,__PT_R0(%r15) - stm %r8,%r9,__PT_PSW(%r15) + la %r11,STACK_FRAME_OVERHEAD(%r15) + stm %r0,%r7,__PT_R0(%r11) + stm %r8,%r9,__PT_PSW(%r11) mvc __PT_R8(32,%r11),0(%r14) - lr %r15,%r11 - ahi %r15,-STACK_FRAME_OVERHEAD l %r1,BASED(1f) xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) lr %r2,%r11 # pass pointer to pt_regs @@ -799,15 +796,14 @@ cleanup_system_call: mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER # set up saved register 11 l %r15,__LC_KERNEL_STACK - ahi %r15,-__PT_SIZE - st %r15,12(%r11) # r11 pt_regs pointer + la %r9,STACK_FRAME_OVERHEAD(%r15) + st %r9,12(%r11) # r11 pt_regs pointer # fill pt_regs - mvc __PT_R8(32,%r15),__LC_SAVE_AREA_SYNC - stm %r0,%r7,__PT_R0(%r15) - mvc __PT_PSW(8,%r15),__LC_SVC_OLD_PSW - mvc __PT_INT_CODE(4,%r15),__LC_SVC_ILC + mvc __PT_R8(32,%r9),__LC_SAVE_AREA_SYNC + stm %r0,%r7,__PT_R0(%r9) + mvc __PT_PSW(8,%r9),__LC_SVC_OLD_PSW + mvc __PT_INT_CODE(4,%r9),__LC_SVC_ILC # setup saved register 15 - ahi %r15,-STACK_FRAME_OVERHEAD st %r15,28(%r11) # r15 stack pointer # set new psw address and exit l %r9,BASED(cleanup_table+4) # sysc_do_svc + 0x80000000 @@ -910,7 +906,6 @@ cleanup_idle_wait: .Ltrace_enter: .long do_syscall_trace_enter .Ltrace_exit: .long do_syscall_trace_exit .Lschedule_tail: .long schedule_tail -.Lsys_call_table: .long sys_call_table .Lsysc_per: .long sysc_per + 0x80000000 #ifdef CONFIG_TRACE_IRQFLAGS .Lhardirqs_on: .long trace_hardirqs_on_caller diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index c3a736a3ed44..aa0ab02e9595 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -7,6 +7,7 @@ #include <asm/cputime.h> extern void *restart_stack; +extern unsigned long suspend_zero_pages; void system_call(void); void pgm_check_handler(void); diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 2e6d60c55f90..4c17eece707e 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -39,6 +39,7 @@ __PT_R15 = __PT_GPRS + 120 STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER STACK_SIZE = 1 << STACK_SHIFT +STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE _TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ _TIF_MCCK_PENDING | _TIF_PER_TRAP ) @@ -124,10 +125,10 @@ _TIF_EXIT_SIE = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | _TIF_MCCK_PENDING) srag %r14,%r14,\shift jnz 1f CHECK_STACK 1<<\shift,\savearea + aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) j 2f 1: lg %r15,\stack # load target stack -2: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) - la %r11,STACK_FRAME_OVERHEAD(%r15) +2: la %r11,STACK_FRAME_OVERHEAD(%r15) .endm .macro UPDATE_VTIME scratch,enter_timer @@ -177,7 +178,7 @@ ENTRY(__switch_to) lg %r4,__THREAD_info(%r2) # get thread_info of prev lg %r5,__THREAD_info(%r3) # get thread_info of next lgr %r15,%r5 - aghi %r15,STACK_SIZE # end of kernel stack of next + aghi %r15,STACK_INIT # end of kernel stack of next stg %r3,__LC_CURRENT # store task struct of next stg %r5,__LC_THREAD_INFO # store thread info of next stg %r15,__LC_KERNEL_STACK # store end of kernel stack @@ -203,10 +204,8 @@ sysc_stmg: stmg %r8,%r15,__LC_SAVE_AREA_SYNC lg %r10,__LC_LAST_BREAK lg %r12,__LC_THREAD_INFO - larl %r13,system_call sysc_per: lg %r15,__LC_KERNEL_STACK - aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) la %r11,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs sysc_vtime: UPDATE_VTIME %r13,__LC_SYNC_ENTER_TIMER @@ -217,6 +216,7 @@ sysc_vtime: mvc __PT_INT_CODE(4,%r11),__LC_SVC_ILC sysc_do_svc: oi __TI_flags+7(%r12),_TIF_SYSCALL + lg %r10,__TI_sysc_table(%r12) # address of system call table llgh %r8,__PT_INT_CODE+2(%r11) slag %r8,%r8,2 # shift and test for svc 0 jnz sysc_nr_ok @@ -227,13 +227,6 @@ sysc_do_svc: sth %r1,__PT_INT_CODE+2(%r11) slag %r8,%r1,2 sysc_nr_ok: - larl %r10,sys_call_table # 64 bit system call table -#ifdef CONFIG_COMPAT - tm __TI_flags+5(%r12),(_TIF_31BIT>>16) - jno sysc_noemu - larl %r10,sys_call_table_emu # 31 bit system call table -sysc_noemu: -#endif xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) stg %r2,__PT_ORIG_GPR2(%r11) stg %r7,STACK_FRAME_OVERHEAD(%r15) @@ -389,6 +382,7 @@ ENTRY(pgm_check_handler) tm __LC_PGM_ILC+3,0x80 # check for per exception jnz pgm_svcper # -> single stepped svc 0: CHECK_STACK STACK_SIZE,__LC_SAVE_AREA_SYNC + aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) j 2f 1: UPDATE_VTIME %r14,__LC_SYNC_ENTER_TIMER LAST_BREAK %r14 @@ -398,8 +392,7 @@ ENTRY(pgm_check_handler) tm __LC_PGM_ILC+2,0x02 # check for transaction abort jz 2f mvc __THREAD_trap_tdb(256,%r14),0(%r13) -2: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) - la %r11,STACK_FRAME_OVERHEAD(%r15) +2: la %r11,STACK_FRAME_OVERHEAD(%r15) stmg %r0,%r7,__PT_R0(%r11) mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC stmg %r8,%r9,__PT_PSW(%r11) @@ -526,7 +519,6 @@ io_work: # io_work_user: lg %r1,__LC_KERNEL_STACK - aghi %r1,-(STACK_FRAME_OVERHEAD + __PT_SIZE) mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11) xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) la %r11,STACK_FRAME_OVERHEAD(%r1) @@ -688,7 +680,6 @@ mcck_skip: tm __PT_PSW+1(%r11),0x01 # returning to user ? jno mcck_return lg %r1,__LC_KERNEL_STACK # switch to kernel stack - aghi %r1,-(STACK_FRAME_OVERHEAD + __PT_SIZE) mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11) xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) la %r11,STACK_FRAME_OVERHEAD(%r1) @@ -755,14 +746,12 @@ ENTRY(restart_int_handler) * Setup a pt_regs so that show_trace can provide a good call trace. */ stack_overflow: - lg %r11,__LC_PANIC_STACK # change to panic stack - aghi %r11,-__PT_SIZE # create pt_regs + lg %r15,__LC_PANIC_STACK # change to panic stack + la %r11,STACK_FRAME_OVERHEAD(%r15) stmg %r0,%r7,__PT_R0(%r11) stmg %r8,%r9,__PT_PSW(%r11) mvc __PT_R8(64,%r11),0(%r14) stg %r10,__PT_ORIG_GPR2(%r11) # store last break to orig_gpr2 - lgr %r15,%r11 - aghi %r15,-STACK_FRAME_OVERHEAD xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) lgr %r2,%r11 # pass pointer to pt_regs jg kernel_stack_overflow @@ -846,15 +835,14 @@ cleanup_system_call: mvc __TI_last_break(8,%r12),16(%r11) 0: # set up saved register r11 lg %r15,__LC_KERNEL_STACK - aghi %r15,-__PT_SIZE - stg %r15,24(%r11) # r11 pt_regs pointer + la %r9,STACK_FRAME_OVERHEAD(%r15) + stg %r9,24(%r11) # r11 pt_regs pointer # fill pt_regs - mvc __PT_R8(64,%r15),__LC_SAVE_AREA_SYNC - stmg %r0,%r7,__PT_R0(%r15) - mvc __PT_PSW(16,%r15),__LC_SVC_OLD_PSW - mvc __PT_INT_CODE(4,%r15),__LC_SVC_ILC + mvc __PT_R8(64,%r9),__LC_SAVE_AREA_SYNC + stmg %r0,%r7,__PT_R0(%r9) + mvc __PT_PSW(16,%r9),__LC_SVC_OLD_PSW + mvc __PT_INT_CODE(4,%r9),__LC_SVC_ILC # setup saved register r15 - aghi %r15,-STACK_FRAME_OVERHEAD stg %r15,56(%r11) # r15 stack pointer # set new psw address and exit larl %r9,sysc_do_svc @@ -1011,6 +999,7 @@ sys_call_table: #ifdef CONFIG_COMPAT #define SYSCALL(esa,esame,emu) .long emu + .globl sys_call_table_emu sys_call_table_emu: #include "syscalls.S" #undef SYSCALL diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index 1630f439cd2a..f7fb58903f6a 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -33,7 +33,7 @@ struct irq_class { }; /* - * The list of "main" irq classes on s390. This is the list of interrrupts + * The list of "main" irq classes on s390. This is the list of interrupts * that appear both in /proc/stat ("intr" line) and /proc/interrupts. * Historically only external and I/O interrupts have been part of /proc/stat. * We can't add the split external and I/O sub classes since the first field @@ -162,10 +162,8 @@ asmlinkage void do_softirq(void) #ifdef CONFIG_PROC_FS void init_irq_proc(void) { - struct proc_dir_entry *root_irq_dir; - - root_irq_dir = proc_mkdir("irq", NULL); - create_prof_cpu_mask(root_irq_dir); + if (proc_mkdir("irq", NULL)) + create_prof_cpu_mask(); } #endif diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index b3de27700016..ac2178161ec3 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -13,6 +13,7 @@ #include <linux/reboot.h> #include <linux/ftrace.h> #include <linux/debug_locks.h> +#include <linux/suspend.h> #include <asm/cio.h> #include <asm/setup.h> #include <asm/pgtable.h> @@ -67,6 +68,35 @@ void setup_regs(void) memcpy((void *) SAVE_AREA_BASE, (void *) sa, sizeof(struct save_area)); } +/* + * PM notifier callback for kdump + */ +static int machine_kdump_pm_cb(struct notifier_block *nb, unsigned long action, + void *ptr) +{ + switch (action) { + case PM_SUSPEND_PREPARE: + case PM_HIBERNATION_PREPARE: + if (crashk_res.start) + crash_map_reserved_pages(); + break; + case PM_POST_SUSPEND: + case PM_POST_HIBERNATION: + if (crashk_res.start) + crash_unmap_reserved_pages(); + break; + default: + return NOTIFY_DONE; + } + return NOTIFY_OK; +} + +static int __init machine_kdump_pm_init(void) +{ + pm_notifier(machine_kdump_pm_cb, 0); + return 0; +} +arch_initcall(machine_kdump_pm_init); #endif /* diff --git a/arch/s390/kernel/mem_detect.c b/arch/s390/kernel/mem_detect.c deleted file mode 100644 index 22d502e885ed..000000000000 --- a/arch/s390/kernel/mem_detect.c +++ /dev/null @@ -1,145 +0,0 @@ -/* - * Copyright IBM Corp. 2008, 2009 - * - * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> - */ - -#include <linux/kernel.h> -#include <linux/module.h> -#include <asm/ipl.h> -#include <asm/sclp.h> -#include <asm/setup.h> - -#define ADDR2G (1ULL << 31) - -static void find_memory_chunks(struct mem_chunk chunk[]) -{ - unsigned long long memsize, rnmax, rzm; - unsigned long addr = 0, size; - int i = 0, type; - - rzm = sclp_get_rzm(); - rnmax = sclp_get_rnmax(); - memsize = rzm * rnmax; - if (!rzm) - rzm = 1ULL << 17; - if (sizeof(long) == 4) { - rzm = min(ADDR2G, rzm); - memsize = memsize ? min(ADDR2G, memsize) : ADDR2G; - } - do { - size = 0; - type = tprot(addr); - do { - size += rzm; - if (memsize && addr + size >= memsize) - break; - } while (type == tprot(addr + size)); - if (type == CHUNK_READ_WRITE || type == CHUNK_READ_ONLY) { - chunk[i].addr = addr; - chunk[i].size = size; - chunk[i].type = type; - i++; - } - addr += size; - } while (addr < memsize && i < MEMORY_CHUNKS); -} - -void detect_memory_layout(struct mem_chunk chunk[]) -{ - unsigned long flags, cr0; - - memset(chunk, 0, MEMORY_CHUNKS * sizeof(struct mem_chunk)); - /* Disable IRQs, DAT and low address protection so tprot does the - * right thing and we don't get scheduled away with low address - * protection disabled. - */ - flags = __arch_local_irq_stnsm(0xf8); - __ctl_store(cr0, 0, 0); - __ctl_clear_bit(0, 28); - find_memory_chunks(chunk); - __ctl_load(cr0, 0, 0); - arch_local_irq_restore(flags); -} -EXPORT_SYMBOL(detect_memory_layout); - -/* - * Move memory chunks array from index "from" to index "to" - */ -static void mem_chunk_move(struct mem_chunk chunk[], int to, int from) -{ - int cnt = MEMORY_CHUNKS - to; - - memmove(&chunk[to], &chunk[from], cnt * sizeof(struct mem_chunk)); -} - -/* - * Initialize memory chunk - */ -static void mem_chunk_init(struct mem_chunk *chunk, unsigned long addr, - unsigned long size, int type) -{ - chunk->type = type; - chunk->addr = addr; - chunk->size = size; -} - -/* - * Create memory hole with given address, size, and type - */ -void create_mem_hole(struct mem_chunk chunk[], unsigned long addr, - unsigned long size, int type) -{ - unsigned long lh_start, lh_end, lh_size, ch_start, ch_end, ch_size; - int i, ch_type; - - for (i = 0; i < MEMORY_CHUNKS; i++) { - if (chunk[i].size == 0) - continue; - - /* Define chunk properties */ - ch_start = chunk[i].addr; - ch_size = chunk[i].size; - ch_end = ch_start + ch_size - 1; - ch_type = chunk[i].type; - - /* Is memory chunk hit by memory hole? */ - if (addr + size <= ch_start) - continue; /* No: memory hole in front of chunk */ - if (addr > ch_end) - continue; /* No: memory hole after chunk */ - - /* Yes: Define local hole properties */ - lh_start = max(addr, chunk[i].addr); - lh_end = min(addr + size - 1, ch_end); - lh_size = lh_end - lh_start + 1; - - if (lh_start == ch_start && lh_end == ch_end) { - /* Hole covers complete memory chunk */ - mem_chunk_init(&chunk[i], lh_start, lh_size, type); - } else if (lh_end == ch_end) { - /* Hole starts in memory chunk and convers chunk end */ - mem_chunk_move(chunk, i + 1, i); - mem_chunk_init(&chunk[i], ch_start, ch_size - lh_size, - ch_type); - mem_chunk_init(&chunk[i + 1], lh_start, lh_size, type); - i += 1; - } else if (lh_start == ch_start) { - /* Hole ends in memory chunk */ - mem_chunk_move(chunk, i + 1, i); - mem_chunk_init(&chunk[i], lh_start, lh_size, type); - mem_chunk_init(&chunk[i + 1], lh_end + 1, - ch_size - lh_size, ch_type); - break; - } else { - /* Hole splits memory chunk */ - mem_chunk_move(chunk, i + 2, i); - mem_chunk_init(&chunk[i], ch_start, - lh_start - ch_start, ch_type); - mem_chunk_init(&chunk[i + 1], lh_start, lh_size, type); - mem_chunk_init(&chunk[i + 2], lh_end + 1, - ch_end - lh_end, ch_type); - break; - } - } -} diff --git a/arch/s390/kernel/os_info.c b/arch/s390/kernel/os_info.c index 46480d81df00..d112fc66f993 100644 --- a/arch/s390/kernel/os_info.c +++ b/arch/s390/kernel/os_info.c @@ -10,6 +10,7 @@ #include <linux/crash_dump.h> #include <linux/kernel.h> +#include <linux/slab.h> #include <asm/checksum.h> #include <asm/lowcore.h> #include <asm/os_info.h> diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 536d64579d9a..2bc3eddae34a 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -61,18 +61,8 @@ unsigned long thread_saved_pc(struct task_struct *tsk) return sf->gprs[8]; } -/* - * The idle loop on a S390... - */ -static void default_idle(void) +void arch_cpu_idle(void) { - if (cpu_is_offline(smp_processor_id())) - cpu_die(); - local_irq_disable(); - if (need_resched()) { - local_irq_enable(); - return; - } local_mcck_disable(); if (test_thread_flag(TIF_MCCK_PENDING)) { local_mcck_enable(); @@ -83,19 +73,15 @@ static void default_idle(void) vtime_stop_cpu(); } -void cpu_idle(void) +void arch_cpu_idle_exit(void) { - for (;;) { - tick_nohz_idle_enter(); - rcu_idle_enter(); - while (!need_resched() && !test_thread_flag(TIF_MCCK_PENDING)) - default_idle(); - rcu_idle_exit(); - tick_nohz_idle_exit(); - if (test_thread_flag(TIF_MCCK_PENDING)) - s390_handle_mcck(); - schedule_preempt_disabled(); - } + if (test_thread_flag(TIF_MCCK_PENDING)) + s390_handle_mcck(); +} + +void arch_cpu_idle_dead(void) +{ + cpu_die(); } extern void __kprobes kernel_thread_starter(void); diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 29268859d8ee..0a49095104c9 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -226,25 +226,17 @@ static void __init conmode_default(void) } #ifdef CONFIG_ZFCPDUMP -static void __init setup_zfcpdump(unsigned int console_devno) +static void __init setup_zfcpdump(void) { - static char str[41]; - if (ipl_info.type != IPL_TYPE_FCP_DUMP) return; if (OLDMEM_BASE) return; - if (console_devno != -1) - sprintf(str, " cio_ignore=all,!0.0.%04x,!0.0.%04x", - ipl_info.data.fcp.dev_id.devno, console_devno); - else - sprintf(str, " cio_ignore=all,!0.0.%04x", - ipl_info.data.fcp.dev_id.devno); - strcat(boot_command_line, str); + strcat(boot_command_line, " cio_ignore=all,!ipldev,!condev"); console_loglevel = 2; } #else -static inline void setup_zfcpdump(unsigned int console_devno) {} +static inline void setup_zfcpdump(void) {} #endif /* CONFIG_ZFCPDUMP */ /* @@ -377,11 +369,14 @@ static void __init setup_lowcore(void) PSW_MASK_DAT | PSW_MASK_MCHECK; lc->io_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) io_int_handler; lc->clock_comparator = -1ULL; - lc->kernel_stack = ((unsigned long) &init_thread_union) + THREAD_SIZE; + lc->kernel_stack = ((unsigned long) &init_thread_union) + + THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); lc->async_stack = (unsigned long) - __alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0) + ASYNC_SIZE; + __alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0) + + ASYNC_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); lc->panic_stack = (unsigned long) - __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, 0) + PAGE_SIZE; + __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, 0) + + PAGE_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); lc->current_task = (unsigned long) init_thread_union.thread_info.task; lc->thread_info = (unsigned long) &init_thread_union; lc->machine_flags = S390_lowcore.machine_flags; @@ -468,14 +463,10 @@ static void __init setup_resources(void) for (i = 0; i < MEMORY_CHUNKS; i++) { if (!memory_chunk[i].size) continue; - if (memory_chunk[i].type == CHUNK_OLDMEM || - memory_chunk[i].type == CHUNK_CRASHK) - continue; res = alloc_bootmem_low(sizeof(*res)); res->flags = IORESOURCE_BUSY | IORESOURCE_MEM; switch (memory_chunk[i].type) { case CHUNK_READ_WRITE: - case CHUNK_CRASHK: res->name = "System RAM"; break; case CHUNK_READ_ONLY: @@ -507,12 +498,10 @@ static void __init setup_resources(void) } } -unsigned long real_memory_size; -EXPORT_SYMBOL_GPL(real_memory_size); - static void __init setup_memory_end(void) { unsigned long vmax, vmalloc_size, tmp; + unsigned long real_memory_size = 0; int i; @@ -522,7 +511,6 @@ static void __init setup_memory_end(void) memory_end_set = 1; } #endif - real_memory_size = 0; memory_end &= PAGE_MASK; /* @@ -535,6 +523,8 @@ static void __init setup_memory_end(void) unsigned long align; chunk = &memory_chunk[i]; + if (!chunk->size) + continue; align = 1UL << (MAX_ORDER + PAGE_SHIFT - 1); start = (chunk->addr + align - 1) & ~(align - 1); end = (chunk->addr + chunk->size) & ~(align - 1); @@ -585,6 +575,8 @@ static void __init setup_memory_end(void) for (i = 0; i < MEMORY_CHUNKS; i++) { struct mem_chunk *chunk = &memory_chunk[i]; + if (!chunk->size) + continue; if (chunk->addr >= memory_end) { memset(chunk, 0, sizeof(*chunk)); continue; @@ -685,15 +677,6 @@ static int __init verify_crash_base(unsigned long crash_base, } /* - * Reserve kdump memory by creating a memory hole in the mem_chunk array - */ -static void __init reserve_kdump_bootmem(unsigned long addr, unsigned long size, - int type) -{ - create_mem_hole(memory_chunk, addr, size, type); -} - -/* * When kdump is enabled, we have to ensure that no memory from * the area [0 - crashkernel memory size] and * [crashk_res.start - crashk_res.end] is set offline. @@ -724,16 +707,22 @@ static struct notifier_block kdump_mem_nb = { static void reserve_oldmem(void) { #ifdef CONFIG_CRASH_DUMP + unsigned long real_size = 0; + int i; + if (!OLDMEM_BASE) return; + for (i = 0; i < MEMORY_CHUNKS; i++) { + struct mem_chunk *chunk = &memory_chunk[i]; - reserve_kdump_bootmem(OLDMEM_BASE, OLDMEM_SIZE, CHUNK_OLDMEM); - reserve_kdump_bootmem(OLDMEM_SIZE, memory_end - OLDMEM_SIZE, - CHUNK_OLDMEM); - if (OLDMEM_BASE + OLDMEM_SIZE == real_memory_size) + real_size = max(real_size, chunk->addr + chunk->size); + } + create_mem_hole(memory_chunk, OLDMEM_BASE, OLDMEM_SIZE); + create_mem_hole(memory_chunk, OLDMEM_SIZE, real_size - OLDMEM_SIZE); + if (OLDMEM_BASE + OLDMEM_SIZE == real_size) saved_max_pfn = PFN_DOWN(OLDMEM_BASE) - 1; else - saved_max_pfn = PFN_DOWN(real_memory_size) - 1; + saved_max_pfn = PFN_DOWN(real_size) - 1; #endif } @@ -772,7 +761,7 @@ static void __init reserve_crashkernel(void) crashk_res.start = crash_base; crashk_res.end = crash_base + crash_size - 1; insert_resource(&iomem_resource, &crashk_res); - reserve_kdump_bootmem(crash_base, crash_size, CHUNK_CRASHK); + create_mem_hole(memory_chunk, crash_base, crash_size); pr_info("Reserving %lluMB of memory at %lluMB " "for crashkernel (System RAM: %luMB)\n", crash_size >> 20, crash_base >> 20, memory_end >> 20); @@ -844,11 +833,10 @@ static void __init setup_memory(void) * Register RAM areas with the bootmem allocator. */ - for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) { + for (i = 0; i < MEMORY_CHUNKS; i++) { unsigned long start_chunk, end_chunk, pfn; - if (memory_chunk[i].type != CHUNK_READ_WRITE && - memory_chunk[i].type != CHUNK_CRASHK) + if (!memory_chunk[i].size) continue; start_chunk = PFN_DOWN(memory_chunk[i].addr); end_chunk = start_chunk + PFN_DOWN(memory_chunk[i].size); @@ -1064,12 +1052,12 @@ void __init setup_arch(char **cmdline_p) memcpy(&uaccess, &uaccess_std, sizeof(uaccess)); parse_early_param(); - + detect_memory_layout(memory_chunk, memory_end); os_info_init(); setup_ipl(); + reserve_oldmem(); setup_memory_end(); setup_addressing_mode(); - reserve_oldmem(); reserve_crashkernel(); setup_memory(); setup_resources(); @@ -1094,5 +1082,5 @@ void __init setup_arch(char **cmdline_p) set_preferred_console(); /* Setup zfcpdump support */ - setup_zfcpdump(console_devno); + setup_zfcpdump(); } diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 549c9d173c0f..8074cb4b7cbf 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -181,8 +181,10 @@ static int __cpuinit pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) lc = pcpu->lowcore; memcpy(lc, &S390_lowcore, 512); memset((char *) lc + 512, 0, sizeof(*lc) - 512); - lc->async_stack = pcpu->async_stack + ASYNC_SIZE; - lc->panic_stack = pcpu->panic_stack + PAGE_SIZE; + lc->async_stack = pcpu->async_stack + ASYNC_SIZE + - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); + lc->panic_stack = pcpu->panic_stack + PAGE_SIZE + - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); lc->cpu_nr = cpu; #ifndef CONFIG_64BIT if (MACHINE_HAS_IEEE) { @@ -253,7 +255,8 @@ static void pcpu_attach_task(struct pcpu *pcpu, struct task_struct *tsk) struct _lowcore *lc = pcpu->lowcore; struct thread_info *ti = task_thread_info(tsk); - lc->kernel_stack = (unsigned long) task_stack_page(tsk) + THREAD_SIZE; + lc->kernel_stack = (unsigned long) task_stack_page(tsk) + + THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); lc->thread_info = (unsigned long) task_thread_info(tsk); lc->current_task = (unsigned long) tsk; lc->user_timer = ti->user_timer; @@ -711,8 +714,7 @@ static void __cpuinit smp_start_secondary(void *cpuvoid) set_cpu_online(smp_processor_id(), true); inc_irq_stat(CPU_RST); local_irq_enable(); - /* cpu_idle will call schedule for us */ - cpu_idle(); + cpu_startup_entry(CPUHP_ONLINE); } /* Upping and downing of CPUs */ @@ -810,8 +812,10 @@ void __init smp_prepare_boot_cpu(void) pcpu->state = CPU_STATE_CONFIGURED; pcpu->address = boot_cpu_address; pcpu->lowcore = (struct _lowcore *)(unsigned long) store_prefix(); - pcpu->async_stack = S390_lowcore.async_stack - ASYNC_SIZE; - pcpu->panic_stack = S390_lowcore.panic_stack - PAGE_SIZE; + pcpu->async_stack = S390_lowcore.async_stack - ASYNC_SIZE + + STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); + pcpu->panic_stack = S390_lowcore.panic_stack - PAGE_SIZE + + STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); S390_lowcore.percpu_offset = __per_cpu_offset[0]; smp_cpu_set_polarization(0, POLARIZATION_UNKNOWN); set_cpu_present(0, true); diff --git a/arch/s390/kernel/suspend.c b/arch/s390/kernel/suspend.c index aa1494d0e380..c479d2f9605b 100644 --- a/arch/s390/kernel/suspend.c +++ b/arch/s390/kernel/suspend.c @@ -41,6 +41,7 @@ struct page_key_data { static struct page_key_data *page_key_data; static struct page_key_data *page_key_rp, *page_key_wp; static unsigned long page_key_rx, page_key_wx; +unsigned long suspend_zero_pages; /* * For each page in the hibernation image one additional byte is @@ -149,6 +150,36 @@ int pfn_is_nosave(unsigned long pfn) return 0; } +/* + * PM notifier callback for suspend + */ +static int suspend_pm_cb(struct notifier_block *nb, unsigned long action, + void *ptr) +{ + switch (action) { + case PM_SUSPEND_PREPARE: + case PM_HIBERNATION_PREPARE: + suspend_zero_pages = __get_free_pages(GFP_KERNEL, LC_ORDER); + if (!suspend_zero_pages) + return NOTIFY_BAD; + break; + case PM_POST_SUSPEND: + case PM_POST_HIBERNATION: + free_pages(suspend_zero_pages, LC_ORDER); + break; + default: + return NOTIFY_DONE; + } + return NOTIFY_OK; +} + +static int __init suspend_pm_init(void) +{ + pm_notifier(suspend_pm_cb, 0); + return 0; +} +arch_initcall(suspend_pm_init); + void save_processor_state(void) { /* swsusp_arch_suspend() actually saves all cpu register contents. diff --git a/arch/s390/kernel/swsusp_asm64.S b/arch/s390/kernel/swsusp_asm64.S index d4ca4e0617b5..c487be4cfc81 100644 --- a/arch/s390/kernel/swsusp_asm64.S +++ b/arch/s390/kernel/swsusp_asm64.S @@ -36,8 +36,8 @@ ENTRY(swsusp_arch_suspend) /* Store prefix register on stack */ stpx __SF_EMPTY(%r15) - /* Save prefix register contents for lowcore */ - llgf %r4,__SF_EMPTY(%r15) + /* Save prefix register contents for lowcore copy */ + llgf %r10,__SF_EMPTY(%r15) /* Get pointer to save area */ lghi %r1,0x1000 @@ -91,7 +91,18 @@ ENTRY(swsusp_arch_suspend) xc __SF_EMPTY(4,%r15),__SF_EMPTY(%r15) spx __SF_EMPTY(%r15) + /* Save absolute zero pages */ + larl %r2,suspend_zero_pages + lg %r2,0(%r2) + lghi %r4,0 + lghi %r3,2*PAGE_SIZE + lghi %r5,2*PAGE_SIZE +1: mvcle %r2,%r4,0 + jo 1b + + /* Copy lowcore to absolute zero lowcore */ lghi %r2,0 + lgr %r4,%r10 lghi %r3,2*PAGE_SIZE lghi %r5,2*PAGE_SIZE 1: mvcle %r2,%r4,0 @@ -248,8 +259,20 @@ restore_registers: /* Load old stack */ lg %r15,0x2f8(%r13) + /* Save prefix register */ + mvc __SF_EMPTY(4,%r15),0x318(%r13) + + /* Restore absolute zero pages */ + lghi %r2,0 + larl %r4,suspend_zero_pages + lg %r4,0(%r4) + lghi %r3,2*PAGE_SIZE + lghi %r5,2*PAGE_SIZE +1: mvcle %r2,%r4,0 + jo 1b + /* Restore prefix register */ - spx 0x318(%r13) + spx __SF_EMPTY(%r15) /* Activate DAT */ stosm __SF_EMPTY(%r15),0x04 diff --git a/arch/s390/kernel/sys_s390.c b/arch/s390/kernel/sys_s390.c index d0964d22adb5..23eb222c1658 100644 --- a/arch/s390/kernel/sys_s390.c +++ b/arch/s390/kernel/sys_s390.c @@ -132,19 +132,9 @@ SYSCALL_DEFINE1(s390_fadvise64_64, struct fadvise64_64_args __user *, args) * to * %r2: fd, %r3: mode, %r4/%r5: offset, 96(%r15)-103(%r15): len */ -SYSCALL_DEFINE(s390_fallocate)(int fd, int mode, loff_t offset, - u32 len_high, u32 len_low) +SYSCALL_DEFINE5(s390_fallocate, int, fd, int, mode, loff_t, offset, + u32, len_high, u32, len_low) { return sys_fallocate(fd, mode, offset, ((u64)len_high << 32) | len_low); } -#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS -asmlinkage long SyS_s390_fallocate(long fd, long mode, loff_t offset, - long len_high, long len_low) -{ - return SYSC_s390_fallocate((int) fd, (int) mode, offset, - (u32) len_high, (u32) len_low); -} -SYSCALL_ALIAS(sys_s390_fallocate, SyS_s390_fallocate); -#endif - #endif diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index 630b935d1284..9f214e992eed 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -85,7 +85,7 @@ SYSCALL(sys_sigpending,sys_sigpending,compat_sys_sigpending_wrapper) SYSCALL(sys_sethostname,sys_sethostname,sys32_sethostname_wrapper) SYSCALL(sys_setrlimit,sys_setrlimit,compat_sys_setrlimit_wrapper) /* 75 */ SYSCALL(sys_old_getrlimit,sys_getrlimit,compat_sys_old_getrlimit_wrapper) -SYSCALL(sys_getrusage,sys_getrusage,compat_sys_getrusage_wrapper) +SYSCALL(sys_getrusage,sys_getrusage,compat_sys_getrusage) SYSCALL(sys_gettimeofday,sys_gettimeofday,compat_sys_gettimeofday_wrapper) SYSCALL(sys_settimeofday,sys_settimeofday,compat_sys_settimeofday_wrapper) SYSCALL(sys_getgroups16,sys_ni_syscall,sys32_getgroups16_wrapper) /* 80 old getgroups16 syscall */ @@ -118,14 +118,14 @@ SYSCALL(sys_newstat,sys_newstat,compat_sys_newstat_wrapper) SYSCALL(sys_newlstat,sys_newlstat,compat_sys_newlstat_wrapper) SYSCALL(sys_newfstat,sys_newfstat,compat_sys_newfstat_wrapper) NI_SYSCALL /* old uname syscall */ -SYSCALL(sys_lookup_dcookie,sys_lookup_dcookie,sys32_lookup_dcookie_wrapper) /* 110 */ +SYSCALL(sys_lookup_dcookie,sys_lookup_dcookie,compat_sys_lookup_dcookie) /* 110 */ SYSCALL(sys_vhangup,sys_vhangup,sys_vhangup) NI_SYSCALL /* old "idle" system call */ NI_SYSCALL /* vm86old for i386 */ SYSCALL(sys_wait4,sys_wait4,compat_sys_wait4) SYSCALL(sys_swapoff,sys_swapoff,sys32_swapoff_wrapper) /* 115 */ SYSCALL(sys_sysinfo,sys_sysinfo,compat_sys_sysinfo_wrapper) -SYSCALL(sys_s390_ipc,sys_s390_ipc,sys32_ipc_wrapper) +SYSCALL(sys_s390_ipc,sys_s390_ipc,compat_sys_s390_ipc) SYSCALL(sys_fsync,sys_fsync,sys32_fsync_wrapper) SYSCALL(sys_sigreturn,sys_sigreturn,sys32_sigreturn) SYSCALL(sys_clone,sys_clone,sys_clone_wrapper) /* 120 */ @@ -195,7 +195,7 @@ SYSCALL(sys_getcwd,sys_getcwd,sys32_getcwd_wrapper) SYSCALL(sys_capget,sys_capget,sys32_capget_wrapper) SYSCALL(sys_capset,sys_capset,sys32_capset_wrapper) /* 185 */ SYSCALL(sys_sigaltstack,sys_sigaltstack,compat_sys_sigaltstack) -SYSCALL(sys_sendfile,sys_sendfile64,sys32_sendfile_wrapper) +SYSCALL(sys_sendfile,sys_sendfile64,compat_sys_sendfile) NI_SYSCALL /* streams1 */ NI_SYSCALL /* streams2 */ SYSCALL(sys_vfork,sys_vfork,sys_vfork) /* 190 */ @@ -231,7 +231,7 @@ SYSCALL(sys_madvise,sys_madvise,sys32_madvise_wrapper) SYSCALL(sys_getdents64,sys_getdents64,sys32_getdents64_wrapper) /* 220 */ SYSCALL(sys_fcntl64,sys_ni_syscall,compat_sys_fcntl64_wrapper) SYSCALL(sys_readahead,sys_readahead,sys32_readahead_wrapper) -SYSCALL(sys_sendfile64,sys_ni_syscall,sys32_sendfile64_wrapper) +SYSCALL(sys_sendfile64,sys_ni_syscall,compat_sys_sendfile64) SYSCALL(sys_setxattr,sys_setxattr,sys32_setxattr_wrapper) SYSCALL(sys_lsetxattr,sys_lsetxattr,sys32_lsetxattr_wrapper) /* 225 */ SYSCALL(sys_fsetxattr,sys_fsetxattr,sys32_fsetxattr_wrapper) @@ -317,27 +317,27 @@ SYSCALL(sys_get_robust_list,sys_get_robust_list,compat_sys_get_robust_list) SYSCALL(sys_splice,sys_splice,sys_splice_wrapper) SYSCALL(sys_sync_file_range,sys_sync_file_range,sys_sync_file_range_wrapper) SYSCALL(sys_tee,sys_tee,sys_tee_wrapper) -SYSCALL(sys_vmsplice,sys_vmsplice,compat_sys_vmsplice_wrapper) +SYSCALL(sys_vmsplice,sys_vmsplice,compat_sys_vmsplice) NI_SYSCALL /* 310 sys_move_pages */ SYSCALL(sys_getcpu,sys_getcpu,sys_getcpu_wrapper) -SYSCALL(sys_epoll_pwait,sys_epoll_pwait,compat_sys_epoll_pwait_wrapper) +SYSCALL(sys_epoll_pwait,sys_epoll_pwait,compat_sys_epoll_pwait) SYSCALL(sys_utimes,sys_utimes,compat_sys_utimes_wrapper) SYSCALL(sys_s390_fallocate,sys_fallocate,sys_fallocate_wrapper) SYSCALL(sys_utimensat,sys_utimensat,compat_sys_utimensat_wrapper) /* 315 */ -SYSCALL(sys_signalfd,sys_signalfd,compat_sys_signalfd_wrapper) +SYSCALL(sys_signalfd,sys_signalfd,compat_sys_signalfd) NI_SYSCALL /* 317 old sys_timer_fd */ SYSCALL(sys_eventfd,sys_eventfd,sys_eventfd_wrapper) SYSCALL(sys_timerfd_create,sys_timerfd_create,sys_timerfd_create_wrapper) SYSCALL(sys_timerfd_settime,sys_timerfd_settime,compat_sys_timerfd_settime) /* 320 */ SYSCALL(sys_timerfd_gettime,sys_timerfd_gettime,compat_sys_timerfd_gettime) -SYSCALL(sys_signalfd4,sys_signalfd4,compat_sys_signalfd4_wrapper) +SYSCALL(sys_signalfd4,sys_signalfd4,compat_sys_signalfd4) SYSCALL(sys_eventfd2,sys_eventfd2,sys_eventfd2_wrapper) SYSCALL(sys_inotify_init1,sys_inotify_init1,sys_inotify_init1_wrapper) SYSCALL(sys_pipe2,sys_pipe2,sys_pipe2_wrapper) /* 325 */ SYSCALL(sys_dup3,sys_dup3,sys_dup3_wrapper) SYSCALL(sys_epoll_create1,sys_epoll_create1,sys_epoll_create1_wrapper) -SYSCALL(sys_preadv,sys_preadv,compat_sys_preadv_wrapper) -SYSCALL(sys_pwritev,sys_pwritev,compat_sys_pwritev_wrapper) +SYSCALL(sys_preadv,sys_preadv,compat_sys_preadv) +SYSCALL(sys_pwritev,sys_pwritev,compat_sys_pwritev) SYSCALL(sys_rt_tgsigqueueinfo,sys_rt_tgsigqueueinfo,compat_sys_rt_tgsigqueueinfo) /* 330 */ SYSCALL(sys_perf_event_open,sys_perf_event_open,sys_perf_event_open_wrapper) SYSCALL(sys_fanotify_init,sys_fanotify_init,sys_fanotify_init_wrapper) diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 13dd63fba367..c5762324d9ee 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -12,49 +12,16 @@ * 'Traps.c' handles hardware traps and faults after we have saved some * state in 'asm.s'. */ -#include <linux/sched.h> -#include <linux/kernel.h> -#include <linux/string.h> -#include <linux/errno.h> +#include <linux/kprobes.h> +#include <linux/kdebug.h> +#include <linux/module.h> #include <linux/ptrace.h> -#include <linux/timer.h> +#include <linux/sched.h> #include <linux/mm.h> -#include <linux/smp.h> -#include <linux/init.h> -#include <linux/interrupt.h> -#include <linux/seq_file.h> -#include <linux/delay.h> -#include <linux/module.h> -#include <linux/kdebug.h> -#include <linux/kallsyms.h> -#include <linux/reboot.h> -#include <linux/kprobes.h> -#include <linux/bug.h> -#include <linux/utsname.h> -#include <asm/uaccess.h> -#include <asm/io.h> -#include <linux/atomic.h> -#include <asm/mathemu.h> -#include <asm/cpcmd.h> -#include <asm/lowcore.h> -#include <asm/debug.h> -#include <asm/ipl.h> #include "entry.h" int show_unhandled_signals = 1; -#define stack_pointer ({ void **sp; asm("la %0,0(15)" : "=&d" (sp)); sp; }) - -#ifndef CONFIG_64BIT -#define LONG "%08lx " -#define FOURLONG "%08lx %08lx %08lx %08lx\n" -static int kstack_depth_to_print = 12; -#else /* CONFIG_64BIT */ -#define LONG "%016lx " -#define FOURLONG "%016lx %016lx %016lx %016lx\n" -static int kstack_depth_to_print = 20; -#endif /* CONFIG_64BIT */ - static inline void __user *get_trap_ip(struct pt_regs *regs) { #ifdef CONFIG_64BIT @@ -72,215 +39,6 @@ static inline void __user *get_trap_ip(struct pt_regs *regs) #endif } -/* - * For show_trace we have tree different stack to consider: - * - the panic stack which is used if the kernel stack has overflown - * - the asynchronous interrupt stack (cpu related) - * - the synchronous kernel stack (process related) - * The stack trace can start at any of the three stack and can potentially - * touch all of them. The order is: panic stack, async stack, sync stack. - */ -static unsigned long -__show_trace(unsigned long sp, unsigned long low, unsigned long high) -{ - struct stack_frame *sf; - struct pt_regs *regs; - - while (1) { - sp = sp & PSW_ADDR_INSN; - if (sp < low || sp > high - sizeof(*sf)) - return sp; - sf = (struct stack_frame *) sp; - printk("([<%016lx>] ", sf->gprs[8] & PSW_ADDR_INSN); - print_symbol("%s)\n", sf->gprs[8] & PSW_ADDR_INSN); - /* Follow the backchain. */ - while (1) { - low = sp; - sp = sf->back_chain & PSW_ADDR_INSN; - if (!sp) - break; - if (sp <= low || sp > high - sizeof(*sf)) - return sp; - sf = (struct stack_frame *) sp; - printk(" [<%016lx>] ", sf->gprs[8] & PSW_ADDR_INSN); - print_symbol("%s\n", sf->gprs[8] & PSW_ADDR_INSN); - } - /* Zero backchain detected, check for interrupt frame. */ - sp = (unsigned long) (sf + 1); - if (sp <= low || sp > high - sizeof(*regs)) - return sp; - regs = (struct pt_regs *) sp; - printk(" [<%016lx>] ", regs->psw.addr & PSW_ADDR_INSN); - print_symbol("%s\n", regs->psw.addr & PSW_ADDR_INSN); - low = sp; - sp = regs->gprs[15]; - } -} - -static void show_trace(struct task_struct *task, unsigned long *stack) -{ - register unsigned long __r15 asm ("15"); - unsigned long sp; - - sp = (unsigned long) stack; - if (!sp) - sp = task ? task->thread.ksp : __r15; - printk("Call Trace:\n"); -#ifdef CONFIG_CHECK_STACK - sp = __show_trace(sp, S390_lowcore.panic_stack - 4096, - S390_lowcore.panic_stack); -#endif - sp = __show_trace(sp, S390_lowcore.async_stack - ASYNC_SIZE, - S390_lowcore.async_stack); - if (task) - __show_trace(sp, (unsigned long) task_stack_page(task), - (unsigned long) task_stack_page(task) + THREAD_SIZE); - else - __show_trace(sp, S390_lowcore.thread_info, - S390_lowcore.thread_info + THREAD_SIZE); - if (!task) - task = current; - debug_show_held_locks(task); -} - -void show_stack(struct task_struct *task, unsigned long *sp) -{ - register unsigned long * __r15 asm ("15"); - unsigned long *stack; - int i; - - if (!sp) - stack = task ? (unsigned long *) task->thread.ksp : __r15; - else - stack = sp; - - for (i = 0; i < kstack_depth_to_print; i++) { - if (((addr_t) stack & (THREAD_SIZE-1)) == 0) - break; - if ((i * sizeof(long) % 32) == 0) - printk("%s ", i == 0 ? "" : "\n"); - printk(LONG, *stack++); - } - printk("\n"); - show_trace(task, sp); -} - -static void show_last_breaking_event(struct pt_regs *regs) -{ -#ifdef CONFIG_64BIT - printk("Last Breaking-Event-Address:\n"); - printk(" [<%016lx>] ", regs->args[0] & PSW_ADDR_INSN); - print_symbol("%s\n", regs->args[0] & PSW_ADDR_INSN); -#endif -} - -/* - * The architecture-independent dump_stack generator - */ -void dump_stack(void) -{ - printk("CPU: %d %s %s %.*s\n", - task_thread_info(current)->cpu, print_tainted(), - init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); - printk("Process %s (pid: %d, task: %p, ksp: %p)\n", - current->comm, current->pid, current, - (void *) current->thread.ksp); - show_stack(NULL, NULL); -} -EXPORT_SYMBOL(dump_stack); - -static inline int mask_bits(struct pt_regs *regs, unsigned long bits) -{ - return (regs->psw.mask & bits) / ((~bits + 1) & bits); -} - -void show_registers(struct pt_regs *regs) -{ - char *mode; - - mode = user_mode(regs) ? "User" : "Krnl"; - printk("%s PSW : %p %p", - mode, (void *) regs->psw.mask, - (void *) regs->psw.addr); - print_symbol(" (%s)\n", regs->psw.addr & PSW_ADDR_INSN); - printk(" R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x " - "P:%x AS:%x CC:%x PM:%x", mask_bits(regs, PSW_MASK_PER), - mask_bits(regs, PSW_MASK_DAT), mask_bits(regs, PSW_MASK_IO), - mask_bits(regs, PSW_MASK_EXT), mask_bits(regs, PSW_MASK_KEY), - mask_bits(regs, PSW_MASK_MCHECK), mask_bits(regs, PSW_MASK_WAIT), - mask_bits(regs, PSW_MASK_PSTATE), mask_bits(regs, PSW_MASK_ASC), - mask_bits(regs, PSW_MASK_CC), mask_bits(regs, PSW_MASK_PM)); -#ifdef CONFIG_64BIT - printk(" EA:%x", mask_bits(regs, PSW_MASK_EA | PSW_MASK_BA)); -#endif - printk("\n%s GPRS: " FOURLONG, mode, - regs->gprs[0], regs->gprs[1], regs->gprs[2], regs->gprs[3]); - printk(" " FOURLONG, - regs->gprs[4], regs->gprs[5], regs->gprs[6], regs->gprs[7]); - printk(" " FOURLONG, - regs->gprs[8], regs->gprs[9], regs->gprs[10], regs->gprs[11]); - printk(" " FOURLONG, - regs->gprs[12], regs->gprs[13], regs->gprs[14], regs->gprs[15]); - - show_code(regs); -} - -void show_regs(struct pt_regs *regs) -{ - printk("CPU: %d %s %s %.*s\n", - task_thread_info(current)->cpu, print_tainted(), - init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); - printk("Process %s (pid: %d, task: %p, ksp: %p)\n", - current->comm, current->pid, current, - (void *) current->thread.ksp); - show_registers(regs); - /* Show stack backtrace if pt_regs is from kernel mode */ - if (!user_mode(regs)) - show_trace(NULL, (unsigned long *) regs->gprs[15]); - show_last_breaking_event(regs); -} - -static DEFINE_SPINLOCK(die_lock); - -void die(struct pt_regs *regs, const char *str) -{ - static int die_counter; - - oops_enter(); - lgr_info_log(); - debug_stop_all(); - console_verbose(); - spin_lock_irq(&die_lock); - bust_spinlocks(1); - printk("%s: %04x [#%d] ", str, regs->int_code & 0xffff, ++die_counter); -#ifdef CONFIG_PREEMPT - printk("PREEMPT "); -#endif -#ifdef CONFIG_SMP - printk("SMP "); -#endif -#ifdef CONFIG_DEBUG_PAGEALLOC - printk("DEBUG_PAGEALLOC"); -#endif - printk("\n"); - notify_die(DIE_OOPS, str, regs, 0, regs->int_code & 0xffff, SIGSEGV); - print_modules(); - show_regs(regs); - bust_spinlocks(0); - add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); - spin_unlock_irq(&die_lock); - if (in_interrupt()) - panic("Fatal exception in interrupt"); - if (panic_on_oops) - panic("Fatal exception: panic_on_oops"); - oops_exit(); - do_exit(SIGSEGV); -} - static inline void report_user_fault(struct pt_regs *regs, int signr) { if ((task_pid_nr(current) > 1) && !show_unhandled_signals) diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index a0042acbd989..3fb09359eda6 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -158,8 +158,6 @@ void __kprobes vtime_stop_cpu(void) unsigned long psw_mask; trace_hardirqs_on(); - /* Don't trace preempt off for idle. */ - stop_critical_timings(); /* Wait for external, I/O or machine check interrupt. */ psw_mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_DAT | @@ -169,9 +167,6 @@ void __kprobes vtime_stop_cpu(void) /* Call the assembler magic in entry.S */ psw_idle(idle, psw_mask); - /* Reenable preemption tracer. */ - start_critical_timings(); - /* Account time spent with enabled wait psw loaded as idle time. */ idle->sequence++; smp_wmb(); diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig index 60f9f8ae0fc8..70b46eacf8e1 100644 --- a/arch/s390/kvm/Kconfig +++ b/arch/s390/kvm/Kconfig @@ -22,6 +22,7 @@ config KVM select PREEMPT_NOTIFIERS select ANON_INODES select HAVE_KVM_CPU_RELAX_INTERCEPT + select HAVE_KVM_EVENTFD ---help--- Support hosting paravirtualized guest machines using the SIE virtualization capability on the mainframe. This should work diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile index 3975722bb19d..8fe9d65a4585 100644 --- a/arch/s390/kvm/Makefile +++ b/arch/s390/kvm/Makefile @@ -6,7 +6,7 @@ # it under the terms of the GNU General Public License (version 2 only) # as published by the Free Software Foundation. -common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o) +common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o eventfd.o) ccflags-y := -Ivirt/kvm -Iarch/s390/kvm diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index a390687feb13..1c01a9912989 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -13,6 +13,7 @@ #include <linux/kvm.h> #include <linux/kvm_host.h> +#include <asm/virtio-ccw.h> #include "kvm-s390.h" #include "trace.h" #include "trace-s390.h" @@ -104,6 +105,29 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu) return -EREMOTE; } +static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu) +{ + int ret, idx; + + /* No virtio-ccw notification? Get out quickly. */ + if (!vcpu->kvm->arch.css_support || + (vcpu->run->s.regs.gprs[1] != KVM_S390_VIRTIO_CCW_NOTIFY)) + return -EOPNOTSUPP; + + idx = srcu_read_lock(&vcpu->kvm->srcu); + /* + * The layout is as follows: + * - gpr 2 contains the subchannel id (passed as addr) + * - gpr 3 contains the virtqueue index (passed as datamatch) + */ + ret = kvm_io_bus_write(vcpu->kvm, KVM_VIRTIO_CCW_NOTIFY_BUS, + vcpu->run->s.regs.gprs[2], + 8, &vcpu->run->s.regs.gprs[3]); + srcu_read_unlock(&vcpu->kvm->srcu, idx); + /* kvm_io_bus_write returns -EOPNOTSUPP if it found no match. */ + return ret < 0 ? ret : 0; +} + int kvm_s390_handle_diag(struct kvm_vcpu *vcpu) { int code = (vcpu->arch.sie_block->ipb & 0xfff0000) >> 16; @@ -118,6 +142,8 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu) return __diag_time_slice_end_directed(vcpu); case 0x308: return __diag_ipl_functions(vcpu); + case 0x500: + return __diag_virtio_hypercall(vcpu); default: return -EOPNOTSUPP; } diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h index 4703f129e95e..302e0e52b009 100644 --- a/arch/s390/kvm/gaccess.h +++ b/arch/s390/kvm/gaccess.h @@ -18,369 +18,86 @@ #include <asm/uaccess.h> #include "kvm-s390.h" -static inline void __user *__guestaddr_to_user(struct kvm_vcpu *vcpu, - unsigned long guestaddr) +static inline void __user *__gptr_to_uptr(struct kvm_vcpu *vcpu, + void __user *gptr, + int prefixing) { unsigned long prefix = vcpu->arch.sie_block->prefix; - - if (guestaddr < 2 * PAGE_SIZE) - guestaddr += prefix; - else if ((guestaddr >= prefix) && (guestaddr < prefix + 2 * PAGE_SIZE)) - guestaddr -= prefix; - - return (void __user *) gmap_fault(guestaddr, vcpu->arch.gmap); -} - -static inline int get_guest_u64(struct kvm_vcpu *vcpu, unsigned long guestaddr, - u64 *result) -{ - void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); - - BUG_ON(guestaddr & 7); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); - - return get_user(*result, (unsigned long __user *) uptr); -} - -static inline int get_guest_u32(struct kvm_vcpu *vcpu, unsigned long guestaddr, - u32 *result) -{ - void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); - - BUG_ON(guestaddr & 3); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); - - return get_user(*result, (u32 __user *) uptr); -} - -static inline int get_guest_u16(struct kvm_vcpu *vcpu, unsigned long guestaddr, - u16 *result) -{ - void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); - - BUG_ON(guestaddr & 1); - - if (IS_ERR(uptr)) - return PTR_ERR(uptr); - - return get_user(*result, (u16 __user *) uptr); -} - -static inline int get_guest_u8(struct kvm_vcpu *vcpu, unsigned long guestaddr, - u8 *result) -{ - void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); - - return get_user(*result, (u8 __user *) uptr); -} - -static inline int put_guest_u64(struct kvm_vcpu *vcpu, unsigned long guestaddr, - u64 value) -{ - void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); - - BUG_ON(guestaddr & 7); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); - - return put_user(value, (u64 __user *) uptr); -} - -static inline int put_guest_u32(struct kvm_vcpu *vcpu, unsigned long guestaddr, - u32 value) -{ - void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); - - BUG_ON(guestaddr & 3); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); - - return put_user(value, (u32 __user *) uptr); -} - -static inline int put_guest_u16(struct kvm_vcpu *vcpu, unsigned long guestaddr, - u16 value) -{ - void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); - - BUG_ON(guestaddr & 1); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); - - return put_user(value, (u16 __user *) uptr); -} - -static inline int put_guest_u8(struct kvm_vcpu *vcpu, unsigned long guestaddr, - u8 value) -{ - void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); - - return put_user(value, (u8 __user *) uptr); -} - - -static inline int __copy_to_guest_slow(struct kvm_vcpu *vcpu, - unsigned long guestdest, - void *from, unsigned long n) -{ - int rc; - unsigned long i; - u8 *data = from; - - for (i = 0; i < n; i++) { - rc = put_guest_u8(vcpu, guestdest++, *(data++)); - if (rc < 0) - return rc; + unsigned long gaddr = (unsigned long) gptr; + unsigned long uaddr; + + if (prefixing) { + if (gaddr < 2 * PAGE_SIZE) + gaddr += prefix; + else if ((gaddr >= prefix) && (gaddr < prefix + 2 * PAGE_SIZE)) + gaddr -= prefix; } - return 0; -} - -static inline int __copy_to_guest_fast(struct kvm_vcpu *vcpu, - unsigned long guestdest, - void *from, unsigned long n) -{ - int r; + uaddr = gmap_fault(gaddr, vcpu->arch.gmap); + if (IS_ERR_VALUE(uaddr)) + uaddr = -EFAULT; + return (void __user *)uaddr; +} + +#define get_guest(vcpu, x, gptr) \ +({ \ + __typeof__(gptr) __uptr = __gptr_to_uptr(vcpu, gptr, 1);\ + int __mask = sizeof(__typeof__(*(gptr))) - 1; \ + int __ret = PTR_RET((void __force *)__uptr); \ + \ + if (!__ret) { \ + BUG_ON((unsigned long)__uptr & __mask); \ + __ret = get_user(x, __uptr); \ + } \ + __ret; \ +}) + +#define put_guest(vcpu, x, gptr) \ +({ \ + __typeof__(gptr) __uptr = __gptr_to_uptr(vcpu, gptr, 1);\ + int __mask = sizeof(__typeof__(*(gptr))) - 1; \ + int __ret = PTR_RET((void __force *)__uptr); \ + \ + if (!__ret) { \ + BUG_ON((unsigned long)__uptr & __mask); \ + __ret = put_user(x, __uptr); \ + } \ + __ret; \ +}) + +static inline int __copy_guest(struct kvm_vcpu *vcpu, unsigned long to, + unsigned long from, unsigned long len, + int to_guest, int prefixing) +{ + unsigned long _len, rc; void __user *uptr; - unsigned long size; - - if (guestdest + n < guestdest) - return -EFAULT; - - /* simple case: all within one segment table entry? */ - if ((guestdest & PMD_MASK) == ((guestdest+n) & PMD_MASK)) { - uptr = (void __user *) gmap_fault(guestdest, vcpu->arch.gmap); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); - - r = copy_to_user(uptr, from, n); - - if (r) - r = -EFAULT; - - goto out; - } - - /* copy first segment */ - uptr = (void __user *)gmap_fault(guestdest, vcpu->arch.gmap); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); - size = PMD_SIZE - (guestdest & ~PMD_MASK); - - r = copy_to_user(uptr, from, size); - - if (r) { - r = -EFAULT; - goto out; - } - from += size; - n -= size; - guestdest += size; - - /* copy full segments */ - while (n >= PMD_SIZE) { - uptr = (void __user *)gmap_fault(guestdest, vcpu->arch.gmap); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); - - r = copy_to_user(uptr, from, PMD_SIZE); - - if (r) { - r = -EFAULT; - goto out; - } - from += PMD_SIZE; - n -= PMD_SIZE; - guestdest += PMD_SIZE; - } - - /* copy the tail segment */ - if (n) { - uptr = (void __user *)gmap_fault(guestdest, vcpu->arch.gmap); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); - - r = copy_to_user(uptr, from, n); - - if (r) - r = -EFAULT; - } -out: - return r; -} - -static inline int copy_to_guest_absolute(struct kvm_vcpu *vcpu, - unsigned long guestdest, - void *from, unsigned long n) -{ - return __copy_to_guest_fast(vcpu, guestdest, from, n); -} - -static inline int copy_to_guest(struct kvm_vcpu *vcpu, unsigned long guestdest, - void *from, unsigned long n) -{ - unsigned long prefix = vcpu->arch.sie_block->prefix; - - if ((guestdest < 2 * PAGE_SIZE) && (guestdest + n > 2 * PAGE_SIZE)) - goto slowpath; - - if ((guestdest < prefix) && (guestdest + n > prefix)) - goto slowpath; - - if ((guestdest < prefix + 2 * PAGE_SIZE) - && (guestdest + n > prefix + 2 * PAGE_SIZE)) - goto slowpath; - - if (guestdest < 2 * PAGE_SIZE) - guestdest += prefix; - else if ((guestdest >= prefix) && (guestdest < prefix + 2 * PAGE_SIZE)) - guestdest -= prefix; - - return __copy_to_guest_fast(vcpu, guestdest, from, n); -slowpath: - return __copy_to_guest_slow(vcpu, guestdest, from, n); -} - -static inline int __copy_from_guest_slow(struct kvm_vcpu *vcpu, void *to, - unsigned long guestsrc, - unsigned long n) -{ - int rc; - unsigned long i; - u8 *data = to; - - for (i = 0; i < n; i++) { - rc = get_guest_u8(vcpu, guestsrc++, data++); - if (rc < 0) - return rc; + while (len) { + uptr = to_guest ? (void __user *)to : (void __user *)from; + uptr = __gptr_to_uptr(vcpu, uptr, prefixing); + if (IS_ERR((void __force *)uptr)) + return -EFAULT; + _len = PAGE_SIZE - ((unsigned long)uptr & (PAGE_SIZE - 1)); + _len = min(_len, len); + if (to_guest) + rc = copy_to_user((void __user *) uptr, (void *)from, _len); + else + rc = copy_from_user((void *)to, (void __user *)uptr, _len); + if (rc) + return -EFAULT; + len -= _len; + from += _len; + to += _len; } return 0; } -static inline int __copy_from_guest_fast(struct kvm_vcpu *vcpu, void *to, - unsigned long guestsrc, - unsigned long n) -{ - int r; - void __user *uptr; - unsigned long size; - - if (guestsrc + n < guestsrc) - return -EFAULT; - - /* simple case: all within one segment table entry? */ - if ((guestsrc & PMD_MASK) == ((guestsrc+n) & PMD_MASK)) { - uptr = (void __user *) gmap_fault(guestsrc, vcpu->arch.gmap); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); - - r = copy_from_user(to, uptr, n); - - if (r) - r = -EFAULT; - - goto out; - } - - /* copy first segment */ - uptr = (void __user *)gmap_fault(guestsrc, vcpu->arch.gmap); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); - - size = PMD_SIZE - (guestsrc & ~PMD_MASK); - - r = copy_from_user(to, uptr, size); - - if (r) { - r = -EFAULT; - goto out; - } - to += size; - n -= size; - guestsrc += size; - - /* copy full segments */ - while (n >= PMD_SIZE) { - uptr = (void __user *)gmap_fault(guestsrc, vcpu->arch.gmap); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); - - r = copy_from_user(to, uptr, PMD_SIZE); - - if (r) { - r = -EFAULT; - goto out; - } - to += PMD_SIZE; - n -= PMD_SIZE; - guestsrc += PMD_SIZE; - } - - /* copy the tail segment */ - if (n) { - uptr = (void __user *)gmap_fault(guestsrc, vcpu->arch.gmap); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); - - r = copy_from_user(to, uptr, n); - - if (r) - r = -EFAULT; - } -out: - return r; -} - -static inline int copy_from_guest_absolute(struct kvm_vcpu *vcpu, void *to, - unsigned long guestsrc, - unsigned long n) -{ - return __copy_from_guest_fast(vcpu, to, guestsrc, n); -} - -static inline int copy_from_guest(struct kvm_vcpu *vcpu, void *to, - unsigned long guestsrc, unsigned long n) -{ - unsigned long prefix = vcpu->arch.sie_block->prefix; - - if ((guestsrc < 2 * PAGE_SIZE) && (guestsrc + n > 2 * PAGE_SIZE)) - goto slowpath; +#define copy_to_guest(vcpu, to, from, size) \ + __copy_guest(vcpu, to, (unsigned long)from, size, 1, 1) +#define copy_from_guest(vcpu, to, from, size) \ + __copy_guest(vcpu, (unsigned long)to, from, size, 0, 1) +#define copy_to_guest_absolute(vcpu, to, from, size) \ + __copy_guest(vcpu, to, (unsigned long)from, size, 1, 0) +#define copy_from_guest_absolute(vcpu, to, from, size) \ + __copy_guest(vcpu, (unsigned long)to, from, size, 0, 0) - if ((guestsrc < prefix) && (guestsrc + n > prefix)) - goto slowpath; - - if ((guestsrc < prefix + 2 * PAGE_SIZE) - && (guestsrc + n > prefix + 2 * PAGE_SIZE)) - goto slowpath; - - if (guestsrc < 2 * PAGE_SIZE) - guestsrc += prefix; - else if ((guestsrc >= prefix) && (guestsrc < prefix + 2 * PAGE_SIZE)) - guestsrc -= prefix; - - return __copy_from_guest_fast(vcpu, to, guestsrc, n); -slowpath: - return __copy_from_guest_slow(vcpu, to, guestsrc, n); -} -#endif +#endif /* __KVM_S390_GACCESS_H */ diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index f26ff1e31bdb..b7d1b2edeeb3 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -43,12 +43,10 @@ static int handle_lctlg(struct kvm_vcpu *vcpu) trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, useraddr); do { - rc = get_guest_u64(vcpu, useraddr, - &vcpu->arch.sie_block->gcr[reg]); - if (rc == -EFAULT) { - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - break; - } + rc = get_guest(vcpu, vcpu->arch.sie_block->gcr[reg], + (u64 __user *) useraddr); + if (rc) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); useraddr += 8; if (reg == reg3) break; @@ -78,11 +76,9 @@ static int handle_lctl(struct kvm_vcpu *vcpu) reg = reg1; do { - rc = get_guest_u32(vcpu, useraddr, &val); - if (rc == -EFAULT) { - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - break; - } + rc = get_guest(vcpu, val, (u32 __user *) useraddr); + if (rc) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul; vcpu->arch.sie_block->gcr[reg] |= val; useraddr += 4; diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 37116a77cb4b..5c948177529e 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -180,7 +180,7 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, struct kvm_s390_interrupt_info *inti) { const unsigned short table[] = { 2, 4, 4, 6 }; - int rc, exception = 0; + int rc = 0; switch (inti->type) { case KVM_S390_INT_EMERGENCY: @@ -188,74 +188,41 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, vcpu->stat.deliver_emergency_signal++; trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, inti->emerg.code, 0); - rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1201); - if (rc == -EFAULT) - exception = 1; - - rc = put_guest_u16(vcpu, __LC_EXT_CPU_ADDR, inti->emerg.code); - if (rc == -EFAULT) - exception = 1; - - rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; - - rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, - __LC_EXT_NEW_PSW, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; + rc = put_guest(vcpu, 0x1201, (u16 __user *)__LC_EXT_INT_CODE); + rc |= put_guest(vcpu, inti->emerg.code, + (u16 __user *)__LC_EXT_CPU_ADDR); + rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + __LC_EXT_NEW_PSW, sizeof(psw_t)); break; - case KVM_S390_INT_EXTERNAL_CALL: VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp ext call"); vcpu->stat.deliver_external_call++; trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, inti->extcall.code, 0); - rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1202); - if (rc == -EFAULT) - exception = 1; - - rc = put_guest_u16(vcpu, __LC_EXT_CPU_ADDR, inti->extcall.code); - if (rc == -EFAULT) - exception = 1; - - rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; - - rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, - __LC_EXT_NEW_PSW, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; + rc = put_guest(vcpu, 0x1202, (u16 __user *)__LC_EXT_INT_CODE); + rc |= put_guest(vcpu, inti->extcall.code, + (u16 __user *)__LC_EXT_CPU_ADDR); + rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + __LC_EXT_NEW_PSW, sizeof(psw_t)); break; - case KVM_S390_INT_SERVICE: VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x", inti->ext.ext_params); vcpu->stat.deliver_service_signal++; trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, inti->ext.ext_params, 0); - rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x2401); - if (rc == -EFAULT) - exception = 1; - - rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; - - rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, - __LC_EXT_NEW_PSW, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; - - rc = put_guest_u32(vcpu, __LC_EXT_PARAMS, inti->ext.ext_params); - if (rc == -EFAULT) - exception = 1; + rc = put_guest(vcpu, 0x2401, (u16 __user *)__LC_EXT_INT_CODE); + rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + __LC_EXT_NEW_PSW, sizeof(psw_t)); + rc |= put_guest(vcpu, inti->ext.ext_params, + (u32 __user *)__LC_EXT_PARAMS); break; - case KVM_S390_INT_VIRTIO: VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx", inti->ext.ext_params, inti->ext.ext_params2); @@ -263,34 +230,17 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, inti->ext.ext_params, inti->ext.ext_params2); - rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x2603); - if (rc == -EFAULT) - exception = 1; - - rc = put_guest_u16(vcpu, __LC_EXT_CPU_ADDR, 0x0d00); - if (rc == -EFAULT) - exception = 1; - - rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; - - rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, - __LC_EXT_NEW_PSW, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; - - rc = put_guest_u32(vcpu, __LC_EXT_PARAMS, inti->ext.ext_params); - if (rc == -EFAULT) - exception = 1; - - rc = put_guest_u64(vcpu, __LC_EXT_PARAMS2, - inti->ext.ext_params2); - if (rc == -EFAULT) - exception = 1; + rc = put_guest(vcpu, 0x2603, (u16 __user *)__LC_EXT_INT_CODE); + rc |= put_guest(vcpu, 0x0d00, (u16 __user *)__LC_EXT_CPU_ADDR); + rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + __LC_EXT_NEW_PSW, sizeof(psw_t)); + rc |= put_guest(vcpu, inti->ext.ext_params, + (u32 __user *)__LC_EXT_PARAMS); + rc |= put_guest(vcpu, inti->ext.ext_params2, + (u64 __user *)__LC_EXT_PARAMS2); break; - case KVM_S390_SIGP_STOP: VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu stop"); vcpu->stat.deliver_stop_signal++; @@ -313,18 +263,14 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, vcpu->stat.deliver_restart_signal++; trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0, 0); - rc = copy_to_guest(vcpu, offsetof(struct _lowcore, - restart_old_psw), &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; - - rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, - offsetof(struct _lowcore, restart_psw), sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; + rc = copy_to_guest(vcpu, + offsetof(struct _lowcore, restart_old_psw), + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + offsetof(struct _lowcore, restart_psw), + sizeof(psw_t)); atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags); break; - case KVM_S390_PROGRAM_INT: VCPU_EVENT(vcpu, 4, "interrupt: pgm check code:%x, ilc:%x", inti->pgm.code, @@ -332,24 +278,13 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, vcpu->stat.deliver_program_int++; trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, inti->pgm.code, 0); - rc = put_guest_u16(vcpu, __LC_PGM_INT_CODE, inti->pgm.code); - if (rc == -EFAULT) - exception = 1; - - rc = put_guest_u16(vcpu, __LC_PGM_ILC, - table[vcpu->arch.sie_block->ipa >> 14]); - if (rc == -EFAULT) - exception = 1; - - rc = copy_to_guest(vcpu, __LC_PGM_OLD_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; - - rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, - __LC_PGM_NEW_PSW, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; + rc = put_guest(vcpu, inti->pgm.code, (u16 __user *)__LC_PGM_INT_CODE); + rc |= put_guest(vcpu, table[vcpu->arch.sie_block->ipa >> 14], + (u16 __user *)__LC_PGM_ILC); + rc |= copy_to_guest(vcpu, __LC_PGM_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + __LC_PGM_NEW_PSW, sizeof(psw_t)); break; case KVM_S390_MCHK: @@ -358,24 +293,13 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, inti->mchk.cr14, inti->mchk.mcic); - rc = kvm_s390_vcpu_store_status(vcpu, - KVM_S390_STORE_STATUS_PREFIXED); - if (rc == -EFAULT) - exception = 1; - - rc = put_guest_u64(vcpu, __LC_MCCK_CODE, inti->mchk.mcic); - if (rc == -EFAULT) - exception = 1; - - rc = copy_to_guest(vcpu, __LC_MCK_OLD_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; - - rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, - __LC_MCK_NEW_PSW, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; + rc = kvm_s390_vcpu_store_status(vcpu, + KVM_S390_STORE_STATUS_PREFIXED); + rc |= put_guest(vcpu, inti->mchk.mcic, (u64 __user *) __LC_MCCK_CODE); + rc |= copy_to_guest(vcpu, __LC_MCK_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + __LC_MCK_NEW_PSW, sizeof(psw_t)); break; case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: @@ -388,67 +312,44 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, vcpu->stat.deliver_io_int++; trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, param0, param1); - rc = put_guest_u16(vcpu, __LC_SUBCHANNEL_ID, - inti->io.subchannel_id); - if (rc == -EFAULT) - exception = 1; - - rc = put_guest_u16(vcpu, __LC_SUBCHANNEL_NR, - inti->io.subchannel_nr); - if (rc == -EFAULT) - exception = 1; - - rc = put_guest_u32(vcpu, __LC_IO_INT_PARM, - inti->io.io_int_parm); - if (rc == -EFAULT) - exception = 1; - - rc = put_guest_u32(vcpu, __LC_IO_INT_WORD, - inti->io.io_int_word); - if (rc == -EFAULT) - exception = 1; - - rc = copy_to_guest(vcpu, __LC_IO_OLD_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; - - rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, - __LC_IO_NEW_PSW, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; + rc = put_guest(vcpu, inti->io.subchannel_id, + (u16 __user *) __LC_SUBCHANNEL_ID); + rc |= put_guest(vcpu, inti->io.subchannel_nr, + (u16 __user *) __LC_SUBCHANNEL_NR); + rc |= put_guest(vcpu, inti->io.io_int_parm, + (u32 __user *) __LC_IO_INT_PARM); + rc |= put_guest(vcpu, inti->io.io_int_word, + (u32 __user *) __LC_IO_INT_WORD); + rc |= copy_to_guest(vcpu, __LC_IO_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + __LC_IO_NEW_PSW, sizeof(psw_t)); break; } default: BUG(); } - if (exception) { + if (rc) { printk("kvm: The guest lowcore is not mapped during interrupt " - "delivery, killing userspace\n"); + "delivery, killing userspace\n"); do_exit(SIGKILL); } } static int __try_deliver_ckc_interrupt(struct kvm_vcpu *vcpu) { - int rc, exception = 0; + int rc; if (psw_extint_disabled(vcpu)) return 0; if (!(vcpu->arch.sie_block->gcr[0] & 0x800ul)) return 0; - rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1004); - if (rc == -EFAULT) - exception = 1; - rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; - rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, - __LC_EXT_NEW_PSW, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; - if (exception) { + rc = put_guest(vcpu, 0x1004, (u16 __user *)__LC_EXT_INT_CODE); + rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + __LC_EXT_NEW_PSW, sizeof(psw_t)); + if (rc) { printk("kvm: The guest lowcore is not mapped during interrupt " "delivery, killing userspace\n"); do_exit(SIGKILL); diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 4cf35a0a79e7..c1c7c683fa26 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -142,12 +142,16 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_ONE_REG: case KVM_CAP_ENABLE_CAP: case KVM_CAP_S390_CSS_SUPPORT: + case KVM_CAP_IOEVENTFD: r = 1; break; case KVM_CAP_NR_VCPUS: case KVM_CAP_MAX_VCPUS: r = KVM_MAX_VCPUS; break; + case KVM_CAP_NR_MEMSLOTS: + r = KVM_USER_MEM_SLOTS; + break; case KVM_CAP_S390_COW: r = MACHINE_HAS_ESOP; break; @@ -632,8 +636,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) } else { VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); trace_kvm_s390_sie_fault(vcpu); - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - rc = 0; + rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); } } VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", @@ -974,22 +977,13 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) /* Section: memory related */ int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, - struct kvm_memory_slot old, struct kvm_userspace_memory_region *mem, - bool user_alloc) + enum kvm_mr_change change) { - /* A few sanity checks. We can have exactly one memory slot which has - to start at guest virtual zero and which has to be located at a - page boundary in userland and which has to end at a page boundary. - The memory in userland is ok to be fragmented into various different - vmas. It is okay to mmap() and munmap() stuff in this slot after - doing this call at any time */ - - if (mem->slot) - return -EINVAL; - - if (mem->guest_phys_addr) - return -EINVAL; + /* A few sanity checks. We can have memory slots which have to be + located/ended at a segment boundary (1MB). The memory in userland is + ok to be fragmented into various different vmas. It is okay to mmap() + and munmap() stuff in this slot after doing this call at any time */ if (mem->userspace_addr & 0xffffful) return -EINVAL; @@ -997,19 +991,26 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, if (mem->memory_size & 0xffffful) return -EINVAL; - if (!user_alloc) - return -EINVAL; - return 0; } void kvm_arch_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, - struct kvm_memory_slot old, - bool user_alloc) + const struct kvm_memory_slot *old, + enum kvm_mr_change change) { int rc; + /* If the basics of the memslot do not change, we do not want + * to update the gmap. Every update causes several unnecessary + * segment translation exceptions. This is usually handled just + * fine by the normal fault handler + gmap, but it will also + * cause faults on the prefix page of running guest CPUs. + */ + if (old->userspace_addr == mem->userspace_addr && + old->base_gfn * PAGE_SIZE == mem->guest_phys_addr && + old->npages * PAGE_SIZE == mem->memory_size) + return; rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr, mem->guest_phys_addr, mem->memory_size); diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 4d89d64a8161..efc14f687265 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -110,12 +110,12 @@ enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer); void kvm_s390_tasklet(unsigned long parm); void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu); void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu); -int kvm_s390_inject_vm(struct kvm *kvm, - struct kvm_s390_interrupt *s390int); -int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, - struct kvm_s390_interrupt *s390int); -int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code); -int kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action); +int __must_check kvm_s390_inject_vm(struct kvm *kvm, + struct kvm_s390_interrupt *s390int); +int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, + struct kvm_s390_interrupt *s390int); +int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code); +int __must_check kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action); struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, u64 cr6, u64 schid); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 0ef9894606e5..6bbd7b5a0bbe 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -14,6 +14,8 @@ #include <linux/kvm.h> #include <linux/gfp.h> #include <linux/errno.h> +#include <linux/compat.h> +#include <asm/asm-offsets.h> #include <asm/current.h> #include <asm/debug.h> #include <asm/ebcdic.h> @@ -35,31 +37,24 @@ static int handle_set_prefix(struct kvm_vcpu *vcpu) operand2 = kvm_s390_get_base_disp_s(vcpu); /* must be word boundary */ - if (operand2 & 3) { - kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - goto out; - } + if (operand2 & 3) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); /* get the value */ - if (get_guest_u32(vcpu, operand2, &address)) { - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - goto out; - } + if (get_guest(vcpu, address, (u32 __user *) operand2)) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); address = address & 0x7fffe000u; /* make sure that the new value is valid memory */ if (copy_from_guest_absolute(vcpu, &tmp, address, 1) || - (copy_from_guest_absolute(vcpu, &tmp, address + PAGE_SIZE, 1))) { - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - goto out; - } + (copy_from_guest_absolute(vcpu, &tmp, address + PAGE_SIZE, 1))) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); kvm_s390_set_prefix(vcpu, address); VCPU_EVENT(vcpu, 5, "setting prefix to %x", address); trace_kvm_s390_handle_prefix(vcpu, 1, address); -out: return 0; } @@ -73,49 +68,37 @@ static int handle_store_prefix(struct kvm_vcpu *vcpu) operand2 = kvm_s390_get_base_disp_s(vcpu); /* must be word boundary */ - if (operand2 & 3) { - kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - goto out; - } + if (operand2 & 3) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); address = vcpu->arch.sie_block->prefix; address = address & 0x7fffe000u; /* get the value */ - if (put_guest_u32(vcpu, operand2, address)) { - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - goto out; - } + if (put_guest(vcpu, address, (u32 __user *)operand2)) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); VCPU_EVENT(vcpu, 5, "storing prefix to %x", address); trace_kvm_s390_handle_prefix(vcpu, 0, address); -out: return 0; } static int handle_store_cpu_address(struct kvm_vcpu *vcpu) { u64 useraddr; - int rc; vcpu->stat.instruction_stap++; useraddr = kvm_s390_get_base_disp_s(vcpu); - if (useraddr & 1) { - kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - goto out; - } + if (useraddr & 1) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - rc = put_guest_u16(vcpu, useraddr, vcpu->vcpu_id); - if (rc == -EFAULT) { - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - goto out; - } + if (put_guest(vcpu, vcpu->vcpu_id, (u16 __user *)useraddr)) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); VCPU_EVENT(vcpu, 5, "storing cpu address to %llx", useraddr); trace_kvm_s390_handle_stap(vcpu, useraddr); -out: return 0; } @@ -129,36 +112,38 @@ static int handle_skey(struct kvm_vcpu *vcpu) static int handle_tpi(struct kvm_vcpu *vcpu) { - u64 addr; struct kvm_s390_interrupt_info *inti; + u64 addr; int cc; addr = kvm_s390_get_base_disp_s(vcpu); - + if (addr & 3) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + cc = 0; inti = kvm_s390_get_io_int(vcpu->kvm, vcpu->run->s.regs.crs[6], 0); - if (inti) { - if (addr) { - /* - * Store the two-word I/O interruption code into the - * provided area. - */ - put_guest_u16(vcpu, addr, inti->io.subchannel_id); - put_guest_u16(vcpu, addr + 2, inti->io.subchannel_nr); - put_guest_u32(vcpu, addr + 4, inti->io.io_int_parm); - } else { - /* - * Store the three-word I/O interruption code into - * the appropriate lowcore area. - */ - put_guest_u16(vcpu, 184, inti->io.subchannel_id); - put_guest_u16(vcpu, 186, inti->io.subchannel_nr); - put_guest_u32(vcpu, 188, inti->io.io_int_parm); - put_guest_u32(vcpu, 192, inti->io.io_int_word); - } - cc = 1; - } else - cc = 0; + if (!inti) + goto no_interrupt; + cc = 1; + if (addr) { + /* + * Store the two-word I/O interruption code into the + * provided area. + */ + put_guest(vcpu, inti->io.subchannel_id, (u16 __user *) addr); + put_guest(vcpu, inti->io.subchannel_nr, (u16 __user *) (addr + 2)); + put_guest(vcpu, inti->io.io_int_parm, (u32 __user *) (addr + 4)); + } else { + /* + * Store the three-word I/O interruption code into + * the appropriate lowcore area. + */ + put_guest(vcpu, inti->io.subchannel_id, (u16 __user *) __LC_SUBCHANNEL_ID); + put_guest(vcpu, inti->io.subchannel_nr, (u16 __user *) __LC_SUBCHANNEL_NR); + put_guest(vcpu, inti->io.io_int_parm, (u32 __user *) __LC_IO_INT_PARM); + put_guest(vcpu, inti->io.io_int_word, (u32 __user *) __LC_IO_INT_WORD); + } kfree(inti); +no_interrupt: /* Set condition code and we're done. */ vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); vcpu->arch.sie_block->gpsw.mask |= (cc & 3ul) << 44; @@ -230,13 +215,10 @@ static int handle_stfl(struct kvm_vcpu *vcpu) rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list), &facility_list, sizeof(facility_list)); - if (rc == -EFAULT) - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - else { - VCPU_EVENT(vcpu, 5, "store facility list value %x", - facility_list); - trace_kvm_s390_handle_stfl(vcpu, facility_list); - } + if (rc) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + VCPU_EVENT(vcpu, 5, "store facility list value %x", facility_list); + trace_kvm_s390_handle_stfl(vcpu, facility_list); return 0; } @@ -249,112 +231,80 @@ static void handle_new_psw(struct kvm_vcpu *vcpu) #define PSW_MASK_ADDR_MODE (PSW_MASK_EA | PSW_MASK_BA) #define PSW_MASK_UNASSIGNED 0xb80800fe7fffffffUL -#define PSW_ADDR_24 0x00000000000fffffUL +#define PSW_ADDR_24 0x0000000000ffffffUL #define PSW_ADDR_31 0x000000007fffffffUL +static int is_valid_psw(psw_t *psw) { + if (psw->mask & PSW_MASK_UNASSIGNED) + return 0; + if ((psw->mask & PSW_MASK_ADDR_MODE) == PSW_MASK_BA) { + if (psw->addr & ~PSW_ADDR_31) + return 0; + } + if (!(psw->mask & PSW_MASK_ADDR_MODE) && (psw->addr & ~PSW_ADDR_24)) + return 0; + if ((psw->mask & PSW_MASK_ADDR_MODE) == PSW_MASK_EA) + return 0; + return 1; +} + int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu) { - u64 addr; + psw_t *gpsw = &vcpu->arch.sie_block->gpsw; psw_compat_t new_psw; + u64 addr; - if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + if (gpsw->mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OPERATION); - addr = kvm_s390_get_base_disp_s(vcpu); - - if (addr & 7) { - kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - goto out; - } - - if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw))) { - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - goto out; - } - - if (!(new_psw.mask & PSW32_MASK_BASE)) { - kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - goto out; - } - - vcpu->arch.sie_block->gpsw.mask = - (new_psw.mask & ~PSW32_MASK_BASE) << 32; - vcpu->arch.sie_block->gpsw.addr = new_psw.addr; - - if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_UNASSIGNED) || - (!(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) && - (vcpu->arch.sie_block->gpsw.addr & ~PSW_ADDR_24)) || - ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) == - PSW_MASK_EA)) { - kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - goto out; - } - + if (addr & 7) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw))) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + if (!(new_psw.mask & PSW32_MASK_BASE)) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + gpsw->mask = (new_psw.mask & ~PSW32_MASK_BASE) << 32; + gpsw->mask |= new_psw.addr & PSW32_ADDR_AMODE; + gpsw->addr = new_psw.addr & ~PSW32_ADDR_AMODE; + if (!is_valid_psw(gpsw)) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); handle_new_psw(vcpu); -out: return 0; } static int handle_lpswe(struct kvm_vcpu *vcpu) { - u64 addr; psw_t new_psw; + u64 addr; addr = kvm_s390_get_base_disp_s(vcpu); - - if (addr & 7) { - kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - goto out; - } - - if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw))) { - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - goto out; - } - - vcpu->arch.sie_block->gpsw.mask = new_psw.mask; - vcpu->arch.sie_block->gpsw.addr = new_psw.addr; - - if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_UNASSIGNED) || - (((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) == - PSW_MASK_BA) && - (vcpu->arch.sie_block->gpsw.addr & ~PSW_ADDR_31)) || - (!(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) && - (vcpu->arch.sie_block->gpsw.addr & ~PSW_ADDR_24)) || - ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) == - PSW_MASK_EA)) { - kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - goto out; - } - + if (addr & 7) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw))) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + vcpu->arch.sie_block->gpsw = new_psw; + if (!is_valid_psw(&vcpu->arch.sie_block->gpsw)) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); handle_new_psw(vcpu); -out: return 0; } static int handle_stidp(struct kvm_vcpu *vcpu) { u64 operand2; - int rc; vcpu->stat.instruction_stidp++; operand2 = kvm_s390_get_base_disp_s(vcpu); - if (operand2 & 7) { - kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - goto out; - } + if (operand2 & 7) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - rc = put_guest_u64(vcpu, operand2, vcpu->arch.stidp_data); - if (rc == -EFAULT) { - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - goto out; - } + if (put_guest(vcpu, vcpu->arch.stidp_data, (u64 __user *)operand2)) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); VCPU_EVENT(vcpu, 5, "%s", "store cpu id"); -out: return 0; } @@ -394,8 +344,9 @@ static int handle_stsi(struct kvm_vcpu *vcpu) int fc = (vcpu->run->s.regs.gprs[0] & 0xf0000000) >> 28; int sel1 = vcpu->run->s.regs.gprs[0] & 0xff; int sel2 = vcpu->run->s.regs.gprs[1] & 0xffff; + unsigned long mem = 0; u64 operand2; - unsigned long mem; + int rc = 0; vcpu->stat.instruction_stsi++; VCPU_EVENT(vcpu, 4, "stsi: fc: %x sel1: %x sel2: %x", fc, sel1, sel2); @@ -414,37 +365,37 @@ static int handle_stsi(struct kvm_vcpu *vcpu) case 2: mem = get_zeroed_page(GFP_KERNEL); if (!mem) - goto out_fail; + goto out_no_data; if (stsi((void *) mem, fc, sel1, sel2)) - goto out_mem; + goto out_no_data; break; case 3: if (sel1 != 2 || sel2 != 2) - goto out_fail; + goto out_no_data; mem = get_zeroed_page(GFP_KERNEL); if (!mem) - goto out_fail; + goto out_no_data; handle_stsi_3_2_2(vcpu, (void *) mem); break; default: - goto out_fail; + goto out_no_data; } if (copy_to_guest_absolute(vcpu, operand2, (void *) mem, PAGE_SIZE)) { - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - goto out_mem; + rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + goto out_exception; } trace_kvm_s390_handle_stsi(vcpu, fc, sel1, sel2, operand2); free_page(mem); vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); vcpu->run->s.regs.gprs[0] = 0; return 0; -out_mem: - free_page(mem); -out_fail: +out_no_data: /* condition code 3 */ vcpu->arch.sie_block->gpsw.mask |= 3ul << 44; - return 0; +out_exception: + free_page(mem); + return rc; } static const intercept_handler_t b2_handlers[256] = { @@ -575,20 +526,13 @@ static int handle_tprot(struct kvm_vcpu *vcpu) if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT) return -EOPNOTSUPP; - - /* we must resolve the address without holding the mmap semaphore. - * This is ok since the userspace hypervisor is not supposed to change - * the mapping while the guest queries the memory. Otherwise the guest - * might crash or get wrong info anyway. */ - user_address = (unsigned long) __guestaddr_to_user(vcpu, address1); - down_read(¤t->mm->mmap_sem); + user_address = __gmap_translate(address1, vcpu->arch.gmap); + if (IS_ERR_VALUE(user_address)) + goto out_inject; vma = find_vma(current->mm, user_address); - if (!vma) { - up_read(¤t->mm->mmap_sem); - return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - } - + if (!vma) + goto out_inject; vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); if (!(vma->vm_flags & VM_WRITE) && (vma->vm_flags & VM_READ)) vcpu->arch.sie_block->gpsw.mask |= (1ul << 44); @@ -597,6 +541,10 @@ static int handle_tprot(struct kvm_vcpu *vcpu) up_read(¤t->mm->mmap_sem); return 0; + +out_inject: + up_read(¤t->mm->mmap_sem); + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); } int kvm_s390_handle_e5(struct kvm_vcpu *vcpu) diff --git a/arch/s390/kvm/trace.h b/arch/s390/kvm/trace.h index 2b29e62351d3..c2f582bb1cb2 100644 --- a/arch/s390/kvm/trace.h +++ b/arch/s390/kvm/trace.h @@ -67,7 +67,7 @@ TRACE_EVENT(kvm_s390_sie_fault, #define sie_intercept_code \ {0x04, "Instruction"}, \ {0x08, "Program interruption"}, \ - {0x0C, "Instruction and program interuption"}, \ + {0x0C, "Instruction and program interruption"}, \ {0x10, "External request"}, \ {0x14, "External interruption"}, \ {0x18, "I/O request"}, \ @@ -117,7 +117,7 @@ TRACE_EVENT(kvm_s390_intercept_instruction, __entry->instruction, insn_to_mnemonic((unsigned char *) &__entry->instruction, - __entry->insn) ? + __entry->insn, sizeof(__entry->insn)) ? "unknown" : __entry->insn) ); diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile index 6ab0d0b5cec8..20b0e97a7df2 100644 --- a/arch/s390/lib/Makefile +++ b/arch/s390/lib/Makefile @@ -3,7 +3,6 @@ # lib-y += delay.o string.o uaccess_std.o uaccess_pt.o -obj-y += usercopy.o obj-$(CONFIG_32BIT) += div64.o qrnnd.o ucmpdi2.o mem32.o obj-$(CONFIG_64BIT) += mem64.o lib-$(CONFIG_64BIT) += uaccess_mvcos.o diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c index dff631d34b45..50ea137a2d3c 100644 --- a/arch/s390/lib/uaccess_pt.c +++ b/arch/s390/lib/uaccess_pt.c @@ -77,42 +77,72 @@ static size_t copy_in_kernel(size_t count, void __user *to, * >= -4095 (IS_ERR_VALUE(x) returns true), a fault has occured and the address * contains the (negative) exception code. */ -static __always_inline unsigned long follow_table(struct mm_struct *mm, - unsigned long addr, int write) +#ifdef CONFIG_64BIT +static unsigned long follow_table(struct mm_struct *mm, + unsigned long address, int write) { - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *ptep; + unsigned long *table = (unsigned long *)__pa(mm->pgd); + + switch (mm->context.asce_bits & _ASCE_TYPE_MASK) { + case _ASCE_TYPE_REGION1: + table = table + ((address >> 53) & 0x7ff); + if (unlikely(*table & _REGION_ENTRY_INV)) + return -0x39UL; + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + /* fallthrough */ + case _ASCE_TYPE_REGION2: + table = table + ((address >> 42) & 0x7ff); + if (unlikely(*table & _REGION_ENTRY_INV)) + return -0x3aUL; + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + /* fallthrough */ + case _ASCE_TYPE_REGION3: + table = table + ((address >> 31) & 0x7ff); + if (unlikely(*table & _REGION_ENTRY_INV)) + return -0x3bUL; + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + /* fallthrough */ + case _ASCE_TYPE_SEGMENT: + table = table + ((address >> 20) & 0x7ff); + if (unlikely(*table & _SEGMENT_ENTRY_INV)) + return -0x10UL; + if (unlikely(*table & _SEGMENT_ENTRY_LARGE)) { + if (write && (*table & _SEGMENT_ENTRY_RO)) + return -0x04UL; + return (*table & _SEGMENT_ENTRY_ORIGIN_LARGE) + + (address & ~_SEGMENT_ENTRY_ORIGIN_LARGE); + } + table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN); + } + table = table + ((address >> 12) & 0xff); + if (unlikely(*table & _PAGE_INVALID)) + return -0x11UL; + if (write && (*table & _PAGE_RO)) + return -0x04UL; + return (*table & PAGE_MASK) + (address & ~PAGE_MASK); +} - pgd = pgd_offset(mm, addr); - if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) - return -0x3aUL; +#else /* CONFIG_64BIT */ - pud = pud_offset(pgd, addr); - if (pud_none(*pud) || unlikely(pud_bad(*pud))) - return -0x3bUL; +static unsigned long follow_table(struct mm_struct *mm, + unsigned long address, int write) +{ + unsigned long *table = (unsigned long *)__pa(mm->pgd); - pmd = pmd_offset(pud, addr); - if (pmd_none(*pmd)) + table = table + ((address >> 20) & 0x7ff); + if (unlikely(*table & _SEGMENT_ENTRY_INV)) return -0x10UL; - if (pmd_large(*pmd)) { - if (write && (pmd_val(*pmd) & _SEGMENT_ENTRY_RO)) - return -0x04UL; - return (pmd_val(*pmd) & HPAGE_MASK) + (addr & ~HPAGE_MASK); - } - if (unlikely(pmd_bad(*pmd))) - return -0x10UL; - - ptep = pte_offset_map(pmd, addr); - if (!pte_present(*ptep)) + table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN); + table = table + ((address >> 12) & 0xff); + if (unlikely(*table & _PAGE_INVALID)) return -0x11UL; - if (write && (!pte_write(*ptep) || !pte_dirty(*ptep))) + if (write && (*table & _PAGE_RO)) return -0x04UL; - - return (pte_val(*ptep) & PAGE_MASK) + (addr & ~PAGE_MASK); + return (*table & PAGE_MASK) + (address & ~PAGE_MASK); } +#endif /* CONFIG_64BIT */ + static __always_inline size_t __user_copy_pt(unsigned long uaddr, void *kptr, size_t n, int write_user) { @@ -197,7 +227,7 @@ size_t copy_to_user_pt(size_t n, void __user *to, const void *from) static size_t clear_user_pt(size_t n, void __user *to) { - void *zpage = &empty_zero_page; + void *zpage = (void *) empty_zero_page; long done, size, ret; done = 0; diff --git a/arch/s390/lib/usercopy.c b/arch/s390/lib/usercopy.c deleted file mode 100644 index 14b363fec8a2..000000000000 --- a/arch/s390/lib/usercopy.c +++ /dev/null @@ -1,8 +0,0 @@ -#include <linux/module.h> -#include <linux/bug.h> - -void copy_from_user_overflow(void) -{ - WARN(1, "Buffer overflow detected!\n"); -} -EXPORT_SYMBOL(copy_from_user_overflow); diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile index 640bea12303c..839592ca265c 100644 --- a/arch/s390/mm/Makefile +++ b/arch/s390/mm/Makefile @@ -3,7 +3,7 @@ # obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o maccess.o -obj-y += page-states.o gup.o extable.o pageattr.o +obj-y += page-states.o gup.o extable.o pageattr.o mem_detect.o obj-$(CONFIG_CMM) += cmm.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c index 479e94282910..9d84a1feefef 100644 --- a/arch/s390/mm/cmm.c +++ b/arch/s390/mm/cmm.c @@ -458,12 +458,10 @@ static int __init cmm_init(void) if (rc) goto out_pm; cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread"); - rc = IS_ERR(cmm_thread_ptr) ? PTR_ERR(cmm_thread_ptr) : 0; - if (rc) - goto out_kthread; - return 0; + if (!IS_ERR(cmm_thread_ptr)) + return 0; -out_kthread: + rc = PTR_ERR(cmm_thread_ptr); unregister_pm_notifier(&cmm_power_notifier); out_pm: unregister_oom_notifier(&cmm_oom_nb); diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 2fb9e63b8fc4..047c3e4c59a2 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -395,8 +395,13 @@ void __kprobes do_protection_exception(struct pt_regs *regs) int fault; trans_exc_code = regs->int_parm_long; - /* Protection exception is suppressing, decrement psw address. */ - regs->psw.addr = __rewind_psw(regs->psw, regs->int_code >> 16); + /* + * Protection exceptions are suppressing, decrement psw address. + * The exception to this rule are aborted transactions, for these + * the PSW already points to the correct location. + */ + if (!(regs->int_code & 0x200)) + regs->psw.addr = __rewind_psw(regs->psw, regs->int_code >> 16); /* * Check for low-address protection. This needs to be treated * as a special case because the translation exception code diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index 532525ec88c1..121089d57802 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -39,7 +39,7 @@ int arch_prepare_hugepage(struct page *page) if (!ptep) return -ENOMEM; - pte = mk_pte(page, PAGE_RW); + pte_val(pte) = addr; for (i = 0; i < PTRS_PER_PTE; i++) { set_pte_at(&init_mm, addr + i * PAGE_SIZE, ptep + i, pte); pte_val(pte) += PAGE_SIZE; diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 49ce6bb2c641..89ebae4008f2 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -21,6 +21,7 @@ #include <linux/init.h> #include <linux/pagemap.h> #include <linux/bootmem.h> +#include <linux/memory.h> #include <linux/pfn.h> #include <linux/poison.h> #include <linux/initrd.h> @@ -36,17 +37,17 @@ #include <asm/tlbflush.h> #include <asm/sections.h> #include <asm/ctl_reg.h> +#include <asm/sclp.h> pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((__aligned__(PAGE_SIZE))); unsigned long empty_zero_page, zero_page_mask; EXPORT_SYMBOL(empty_zero_page); -static unsigned long __init setup_zero_pages(void) +static void __init setup_zero_pages(void) { struct cpuid cpu_id; unsigned int order; - unsigned long size; struct page *page; int i; @@ -63,10 +64,18 @@ static unsigned long __init setup_zero_pages(void) break; case 0x2097: /* z10 */ case 0x2098: /* z10 */ - default: + case 0x2817: /* z196 */ + case 0x2818: /* z196 */ order = 2; break; + case 0x2827: /* zEC12 */ + default: + order = 5; + break; } + /* Limit number of empty zero pages for small memory sizes */ + if (order > 2 && totalram_pages <= 16384) + order = 2; empty_zero_page = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order); if (!empty_zero_page) @@ -75,14 +84,11 @@ static unsigned long __init setup_zero_pages(void) page = virt_to_page((void *) empty_zero_page); split_page(page, order); for (i = 1 << order; i > 0; i--) { - SetPageReserved(page); + mark_page_reserved(page); page++; } - size = PAGE_SIZE << order; - zero_page_mask = (size - 1) & PAGE_MASK; - - return 1UL << order; + zero_page_mask = ((PAGE_SIZE << order) - 1) & PAGE_MASK; } /* @@ -139,7 +145,7 @@ void __init mem_init(void) /* this will put all low memory onto the freelists */ totalram_pages += free_all_bootmem(); - totalram_pages -= setup_zero_pages(); /* Setup zeroed pages. */ + setup_zero_pages(); /* Setup zeroed pages. */ reservedpages = 0; @@ -158,34 +164,15 @@ void __init mem_init(void) PFN_ALIGN((unsigned long)&_eshared) - 1); } -void free_init_pages(char *what, unsigned long begin, unsigned long end) -{ - unsigned long addr = begin; - - if (begin >= end) - return; - for (; addr < end; addr += PAGE_SIZE) { - ClearPageReserved(virt_to_page(addr)); - init_page_count(virt_to_page(addr)); - memset((void *)(addr & PAGE_MASK), POISON_FREE_INITMEM, - PAGE_SIZE); - free_page(addr); - totalram_pages++; - } - printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); -} - void free_initmem(void) { - free_init_pages("unused kernel memory", - (unsigned long)&__init_begin, - (unsigned long)&__init_end); + free_initmem_default(0); } #ifdef CONFIG_BLK_DEV_INITRD void __init free_initrd_mem(unsigned long start, unsigned long end) { - free_init_pages("initrd memory", start, end); + free_reserved_area(start, end, POISON_FREE_INITMEM, "initrd"); } #endif @@ -229,6 +216,15 @@ int arch_add_memory(int nid, u64 start, u64 size) return rc; } +unsigned long memory_block_size_bytes(void) +{ + /* + * Make sure the memory block size is always greater + * or equal than the memory increment size. + */ + return max_t(unsigned long, MIN_MEMORY_BLOCK_SIZE, sclp_get_rzm()); +} + #ifdef CONFIG_MEMORY_HOTREMOVE int arch_remove_memory(u64 start, u64 size) { diff --git a/arch/s390/mm/mem_detect.c b/arch/s390/mm/mem_detect.c new file mode 100644 index 000000000000..3cbd3b8bf311 --- /dev/null +++ b/arch/s390/mm/mem_detect.c @@ -0,0 +1,134 @@ +/* + * Copyright IBM Corp. 2008, 2009 + * + * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <asm/ipl.h> +#include <asm/sclp.h> +#include <asm/setup.h> + +#define ADDR2G (1ULL << 31) + +static void find_memory_chunks(struct mem_chunk chunk[], unsigned long maxsize) +{ + unsigned long long memsize, rnmax, rzm; + unsigned long addr = 0, size; + int i = 0, type; + + rzm = sclp_get_rzm(); + rnmax = sclp_get_rnmax(); + memsize = rzm * rnmax; + if (!rzm) + rzm = 1ULL << 17; + if (sizeof(long) == 4) { + rzm = min(ADDR2G, rzm); + memsize = memsize ? min(ADDR2G, memsize) : ADDR2G; + } + if (maxsize) + memsize = memsize ? min((unsigned long)memsize, maxsize) : maxsize; + do { + size = 0; + type = tprot(addr); + do { + size += rzm; + if (memsize && addr + size >= memsize) + break; + } while (type == tprot(addr + size)); + if (type == CHUNK_READ_WRITE || type == CHUNK_READ_ONLY) { + if (memsize && (addr + size > memsize)) + size = memsize - addr; + chunk[i].addr = addr; + chunk[i].size = size; + chunk[i].type = type; + i++; + } + addr += size; + } while (addr < memsize && i < MEMORY_CHUNKS); +} + +/** + * detect_memory_layout - fill mem_chunk array with memory layout data + * @chunk: mem_chunk array to be filled + * @maxsize: maximum address where memory detection should stop + * + * Fills the passed in memory chunk array with the memory layout of the + * machine. The array must have a size of at least MEMORY_CHUNKS and will + * be fully initialized afterwards. + * If the maxsize paramater has a value > 0 memory detection will stop at + * that address. It is guaranteed that all chunks have an ending address + * that is smaller than maxsize. + * If maxsize is 0 all memory will be detected. + */ +void detect_memory_layout(struct mem_chunk chunk[], unsigned long maxsize) +{ + unsigned long flags, flags_dat, cr0; + + memset(chunk, 0, MEMORY_CHUNKS * sizeof(struct mem_chunk)); + /* + * Disable IRQs, DAT and low address protection so tprot does the + * right thing and we don't get scheduled away with low address + * protection disabled. + */ + local_irq_save(flags); + flags_dat = __arch_local_irq_stnsm(0xfb); + /* + * In case DAT was enabled, make sure chunk doesn't reside in vmalloc + * space. We have disabled DAT and any access to vmalloc area will + * cause an exception. + * If DAT was disabled we are called from early ipl code. + */ + if (test_bit(5, &flags_dat)) { + if (WARN_ON_ONCE(is_vmalloc_or_module_addr(chunk))) + goto out; + } + __ctl_store(cr0, 0, 0); + __ctl_clear_bit(0, 28); + find_memory_chunks(chunk, maxsize); + __ctl_load(cr0, 0, 0); +out: + __arch_local_irq_ssm(flags_dat); + local_irq_restore(flags); +} +EXPORT_SYMBOL(detect_memory_layout); + +/* + * Create memory hole with given address and size. + */ +void create_mem_hole(struct mem_chunk mem_chunk[], unsigned long addr, + unsigned long size) +{ + int i; + + for (i = 0; i < MEMORY_CHUNKS; i++) { + struct mem_chunk *chunk = &mem_chunk[i]; + + if (chunk->size == 0) + continue; + if (addr > chunk->addr + chunk->size) + continue; + if (addr + size <= chunk->addr) + continue; + /* Split */ + if ((addr > chunk->addr) && + (addr + size < chunk->addr + chunk->size)) { + struct mem_chunk *new = chunk + 1; + + memmove(new, chunk, (MEMORY_CHUNKS-i-1) * sizeof(*new)); + new->addr = addr + size; + new->size = chunk->addr + chunk->size - new->addr; + chunk->size = addr - chunk->addr; + continue; + } else if ((addr <= chunk->addr) && + (addr + size >= chunk->addr + chunk->size)) { + memset(chunk, 0 , sizeof(*chunk)); + } else if (addr + size < chunk->addr + chunk->size) { + chunk->size = chunk->addr + chunk->size - addr - size; + chunk->addr = addr + size; + } else if (addr > chunk->addr) { + chunk->size = addr - chunk->addr; + } + } +} diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index d21040ed5e59..80adfbf75065 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -9,31 +9,25 @@ #include <asm/pgtable.h> #include <asm/page.h> +static inline unsigned long sske_frame(unsigned long addr, unsigned char skey) +{ + asm volatile(".insn rrf,0xb22b0000,%[skey],%[addr],9,0" + : [addr] "+a" (addr) : [skey] "d" (skey)); + return addr; +} + void storage_key_init_range(unsigned long start, unsigned long end) { - unsigned long boundary, function, size; + unsigned long boundary, size; while (start < end) { - if (MACHINE_HAS_EDAT2) { - /* set storage keys for a 2GB frame */ - function = 0x22000 | PAGE_DEFAULT_KEY; - size = 1UL << 31; - boundary = (start + size) & ~(size - 1); - if (boundary <= end) { - do { - start = pfmf(function, start); - } while (start < boundary); - continue; - } - } if (MACHINE_HAS_EDAT1) { /* set storage keys for a 1MB frame */ - function = 0x21000 | PAGE_DEFAULT_KEY; size = 1UL << 20; boundary = (start + size) & ~(size - 1); if (boundary <= end) { do { - start = pfmf(function, start); + start = sske_frame(start, PAGE_DEFAULT_KEY); } while (start < boundary); continue; } diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index ae44d2a34313..7805ddca833d 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -379,75 +379,184 @@ out_unmap: } EXPORT_SYMBOL_GPL(gmap_map_segment); -/* - * this function is assumed to be called with mmap_sem held - */ -unsigned long __gmap_fault(unsigned long address, struct gmap *gmap) +static unsigned long *gmap_table_walk(unsigned long address, struct gmap *gmap) { - unsigned long *table, vmaddr, segment; - struct mm_struct *mm; - struct gmap_pgtable *mp; - struct gmap_rmap *rmap; - struct vm_area_struct *vma; - struct page *page; - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; + unsigned long *table; - current->thread.gmap_addr = address; - mm = gmap->mm; - /* Walk the gmap address space page table */ table = gmap->table + ((address >> 53) & 0x7ff); if (unlikely(*table & _REGION_ENTRY_INV)) - return -EFAULT; + return ERR_PTR(-EFAULT); table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); table = table + ((address >> 42) & 0x7ff); if (unlikely(*table & _REGION_ENTRY_INV)) - return -EFAULT; + return ERR_PTR(-EFAULT); table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); table = table + ((address >> 31) & 0x7ff); if (unlikely(*table & _REGION_ENTRY_INV)) - return -EFAULT; + return ERR_PTR(-EFAULT); table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); table = table + ((address >> 20) & 0x7ff); + return table; +} +/** + * __gmap_translate - translate a guest address to a user space address + * @address: guest address + * @gmap: pointer to guest mapping meta data structure + * + * Returns user space address which corresponds to the guest address or + * -EFAULT if no such mapping exists. + * This function does not establish potentially missing page table entries. + * The mmap_sem of the mm that belongs to the address space must be held + * when this function gets called. + */ +unsigned long __gmap_translate(unsigned long address, struct gmap *gmap) +{ + unsigned long *segment_ptr, vmaddr, segment; + struct gmap_pgtable *mp; + struct page *page; + + current->thread.gmap_addr = address; + segment_ptr = gmap_table_walk(address, gmap); + if (IS_ERR(segment_ptr)) + return PTR_ERR(segment_ptr); /* Convert the gmap address to an mm address. */ - segment = *table; - if (likely(!(segment & _SEGMENT_ENTRY_INV))) { + segment = *segment_ptr; + if (!(segment & _SEGMENT_ENTRY_INV)) { page = pfn_to_page(segment >> PAGE_SHIFT); mp = (struct gmap_pgtable *) page->index; return mp->vmaddr | (address & ~PMD_MASK); } else if (segment & _SEGMENT_ENTRY_RO) { vmaddr = segment & _SEGMENT_ENTRY_ORIGIN; - vma = find_vma(mm, vmaddr); - if (!vma || vma->vm_start > vmaddr) - return -EFAULT; - - /* Walk the parent mm page table */ - pgd = pgd_offset(mm, vmaddr); - pud = pud_alloc(mm, pgd, vmaddr); - if (!pud) - return -ENOMEM; - pmd = pmd_alloc(mm, pud, vmaddr); - if (!pmd) - return -ENOMEM; - if (!pmd_present(*pmd) && - __pte_alloc(mm, vma, pmd, vmaddr)) - return -ENOMEM; - /* pmd now points to a valid segment table entry. */ - rmap = kmalloc(sizeof(*rmap), GFP_KERNEL|__GFP_REPEAT); - if (!rmap) - return -ENOMEM; - /* Link gmap segment table entry location to page table. */ - page = pmd_page(*pmd); - mp = (struct gmap_pgtable *) page->index; - rmap->entry = table; - spin_lock(&mm->page_table_lock); + return vmaddr | (address & ~PMD_MASK); + } + return -EFAULT; +} +EXPORT_SYMBOL_GPL(__gmap_translate); + +/** + * gmap_translate - translate a guest address to a user space address + * @address: guest address + * @gmap: pointer to guest mapping meta data structure + * + * Returns user space address which corresponds to the guest address or + * -EFAULT if no such mapping exists. + * This function does not establish potentially missing page table entries. + */ +unsigned long gmap_translate(unsigned long address, struct gmap *gmap) +{ + unsigned long rc; + + down_read(&gmap->mm->mmap_sem); + rc = __gmap_translate(address, gmap); + up_read(&gmap->mm->mmap_sem); + return rc; +} +EXPORT_SYMBOL_GPL(gmap_translate); + +static int gmap_connect_pgtable(unsigned long address, unsigned long segment, + unsigned long *segment_ptr, struct gmap *gmap) +{ + unsigned long vmaddr; + struct vm_area_struct *vma; + struct gmap_pgtable *mp; + struct gmap_rmap *rmap; + struct mm_struct *mm; + struct page *page; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + + mm = gmap->mm; + vmaddr = segment & _SEGMENT_ENTRY_ORIGIN; + vma = find_vma(mm, vmaddr); + if (!vma || vma->vm_start > vmaddr) + return -EFAULT; + /* Walk the parent mm page table */ + pgd = pgd_offset(mm, vmaddr); + pud = pud_alloc(mm, pgd, vmaddr); + if (!pud) + return -ENOMEM; + pmd = pmd_alloc(mm, pud, vmaddr); + if (!pmd) + return -ENOMEM; + if (!pmd_present(*pmd) && + __pte_alloc(mm, vma, pmd, vmaddr)) + return -ENOMEM; + /* pmd now points to a valid segment table entry. */ + rmap = kmalloc(sizeof(*rmap), GFP_KERNEL|__GFP_REPEAT); + if (!rmap) + return -ENOMEM; + /* Link gmap segment table entry location to page table. */ + page = pmd_page(*pmd); + mp = (struct gmap_pgtable *) page->index; + rmap->gmap = gmap; + rmap->entry = segment_ptr; + rmap->vmaddr = address; + spin_lock(&mm->page_table_lock); + if (*segment_ptr == segment) { list_add(&rmap->list, &mp->mapper); - spin_unlock(&mm->page_table_lock); /* Set gmap segment table entry to page table. */ - *table = pmd_val(*pmd) & PAGE_MASK; - return vmaddr | (address & ~PMD_MASK); + *segment_ptr = pmd_val(*pmd) & PAGE_MASK; + rmap = NULL; + } + spin_unlock(&mm->page_table_lock); + kfree(rmap); + return 0; +} + +static void gmap_disconnect_pgtable(struct mm_struct *mm, unsigned long *table) +{ + struct gmap_rmap *rmap, *next; + struct gmap_pgtable *mp; + struct page *page; + int flush; + + flush = 0; + spin_lock(&mm->page_table_lock); + page = pfn_to_page(__pa(table) >> PAGE_SHIFT); + mp = (struct gmap_pgtable *) page->index; + list_for_each_entry_safe(rmap, next, &mp->mapper, list) { + *rmap->entry = + _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | mp->vmaddr; + list_del(&rmap->list); + kfree(rmap); + flush = 1; + } + spin_unlock(&mm->page_table_lock); + if (flush) + __tlb_flush_global(); +} + +/* + * this function is assumed to be called with mmap_sem held + */ +unsigned long __gmap_fault(unsigned long address, struct gmap *gmap) +{ + unsigned long *segment_ptr, segment; + struct gmap_pgtable *mp; + struct page *page; + int rc; + + current->thread.gmap_addr = address; + segment_ptr = gmap_table_walk(address, gmap); + if (IS_ERR(segment_ptr)) + return -EFAULT; + /* Convert the gmap address to an mm address. */ + while (1) { + segment = *segment_ptr; + if (!(segment & _SEGMENT_ENTRY_INV)) { + /* Page table is present */ + page = pfn_to_page(segment >> PAGE_SHIFT); + mp = (struct gmap_pgtable *) page->index; + return mp->vmaddr | (address & ~PMD_MASK); + } + if (!(segment & _SEGMENT_ENTRY_RO)) + /* Nothing mapped in the gmap address space. */ + break; + rc = gmap_connect_pgtable(address, segment, segment_ptr, gmap); + if (rc) + return rc; } return -EFAULT; } @@ -511,27 +620,116 @@ void gmap_discard(unsigned long from, unsigned long to, struct gmap *gmap) } EXPORT_SYMBOL_GPL(gmap_discard); -void gmap_unmap_notifier(struct mm_struct *mm, unsigned long *table) +static LIST_HEAD(gmap_notifier_list); +static DEFINE_SPINLOCK(gmap_notifier_lock); + +/** + * gmap_register_ipte_notifier - register a pte invalidation callback + * @nb: pointer to the gmap notifier block + */ +void gmap_register_ipte_notifier(struct gmap_notifier *nb) { - struct gmap_rmap *rmap, *next; + spin_lock(&gmap_notifier_lock); + list_add(&nb->list, &gmap_notifier_list); + spin_unlock(&gmap_notifier_lock); +} +EXPORT_SYMBOL_GPL(gmap_register_ipte_notifier); + +/** + * gmap_unregister_ipte_notifier - remove a pte invalidation callback + * @nb: pointer to the gmap notifier block + */ +void gmap_unregister_ipte_notifier(struct gmap_notifier *nb) +{ + spin_lock(&gmap_notifier_lock); + list_del_init(&nb->list); + spin_unlock(&gmap_notifier_lock); +} +EXPORT_SYMBOL_GPL(gmap_unregister_ipte_notifier); + +/** + * gmap_ipte_notify - mark a range of ptes for invalidation notification + * @gmap: pointer to guest mapping meta data structure + * @address: virtual address in the guest address space + * @len: size of area + * + * Returns 0 if for each page in the given range a gmap mapping exists and + * the invalidation notification could be set. If the gmap mapping is missing + * for one or more pages -EFAULT is returned. If no memory could be allocated + * -ENOMEM is returned. This function establishes missing page table entries. + */ +int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len) +{ + unsigned long addr; + spinlock_t *ptl; + pte_t *ptep, entry; + pgste_t pgste; + int rc = 0; + + if ((start & ~PAGE_MASK) || (len & ~PAGE_MASK)) + return -EINVAL; + down_read(&gmap->mm->mmap_sem); + while (len) { + /* Convert gmap address and connect the page tables */ + addr = __gmap_fault(start, gmap); + if (IS_ERR_VALUE(addr)) { + rc = addr; + break; + } + /* Get the page mapped */ + if (get_user_pages(current, gmap->mm, addr, 1, 1, 0, + NULL, NULL) != 1) { + rc = -EFAULT; + break; + } + /* Walk the process page table, lock and get pte pointer */ + ptep = get_locked_pte(gmap->mm, addr, &ptl); + if (unlikely(!ptep)) + continue; + /* Set notification bit in the pgste of the pte */ + entry = *ptep; + if ((pte_val(entry) & (_PAGE_INVALID | _PAGE_RO)) == 0) { + pgste = pgste_get_lock(ptep); + pgste_val(pgste) |= RCP_IN_BIT; + pgste_set_unlock(ptep, pgste); + start += PAGE_SIZE; + len -= PAGE_SIZE; + } + spin_unlock(ptl); + } + up_read(&gmap->mm->mmap_sem); + return rc; +} +EXPORT_SYMBOL_GPL(gmap_ipte_notify); + +/** + * gmap_do_ipte_notify - call all invalidation callbacks for a specific pte. + * @mm: pointer to the process mm_struct + * @addr: virtual address in the process address space + * @pte: pointer to the page table entry + * + * This function is assumed to be called with the page table lock held + * for the pte to notify. + */ +void gmap_do_ipte_notify(struct mm_struct *mm, unsigned long addr, pte_t *pte) +{ + unsigned long segment_offset; + struct gmap_notifier *nb; struct gmap_pgtable *mp; + struct gmap_rmap *rmap; struct page *page; - int flush; - flush = 0; - spin_lock(&mm->page_table_lock); - page = pfn_to_page(__pa(table) >> PAGE_SHIFT); + segment_offset = ((unsigned long) pte) & (255 * sizeof(pte_t)); + segment_offset = segment_offset * (4096 / sizeof(pte_t)); + page = pfn_to_page(__pa(pte) >> PAGE_SHIFT); mp = (struct gmap_pgtable *) page->index; - list_for_each_entry_safe(rmap, next, &mp->mapper, list) { - *rmap->entry = - _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | mp->vmaddr; - list_del(&rmap->list); - kfree(rmap); - flush = 1; + spin_lock(&gmap_notifier_lock); + list_for_each_entry(rmap, &mp->mapper, list) { + list_for_each_entry(nb, &gmap_notifier_list, list) + nb->notifier_call(rmap->gmap, + rmap->vmaddr + segment_offset); } - spin_unlock(&mm->page_table_lock); - if (flush) - __tlb_flush_global(); + spin_unlock(&gmap_notifier_lock); } static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, @@ -586,8 +784,8 @@ static inline void page_table_free_pgste(unsigned long *table) { } -static inline void gmap_unmap_notifier(struct mm_struct *mm, - unsigned long *table) +static inline void gmap_disconnect_pgtable(struct mm_struct *mm, + unsigned long *table) { } @@ -653,7 +851,7 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) unsigned int bit, mask; if (mm_has_pgste(mm)) { - gmap_unmap_notifier(mm, table); + gmap_disconnect_pgtable(mm, table); return page_table_free_pgste(table); } /* Free 1K/2K page table fragment of a 4K page */ @@ -696,7 +894,7 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table) mm = tlb->mm; if (mm_has_pgste(mm)) { - gmap_unmap_notifier(mm, table); + gmap_disconnect_pgtable(mm, table); table = (unsigned long *) (__pa(table) | FRAG_MASK); tlb_remove_table(tlb, table); return; diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index ffab84db6907..8b268fcc4612 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -191,19 +191,16 @@ static void vmem_remove_range(unsigned long start, unsigned long size) /* * Add a backed mem_map array to the virtual mem_map array. */ -int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node) +int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) { - unsigned long address, start_addr, end_addr; + unsigned long address = start; pgd_t *pg_dir; pud_t *pu_dir; pmd_t *pm_dir; pte_t *pt_dir; int ret = -ENOMEM; - start_addr = (unsigned long) start; - end_addr = (unsigned long) (start + nr); - - for (address = start_addr; address < end_addr;) { + for (address = start; address < end;) { pg_dir = pgd_offset_k(address); if (pgd_none(*pg_dir)) { pu_dir = vmem_pud_alloc(); @@ -262,14 +259,14 @@ int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node) } address += PAGE_SIZE; } - memset(start, 0, nr * sizeof(struct page)); + memset((void *)start, 0, end - start); ret = 0; out: - flush_tlb_kernel_range(start_addr, end_addr); + flush_tlb_kernel_range(start, end); return ret; } -void vmemmap_free(struct page *memmap, unsigned long nr_pages) +void vmemmap_free(unsigned long start, unsigned long end) { } @@ -378,9 +375,8 @@ void __init vmem_map_init(void) ro_start = PFN_ALIGN((unsigned long)&_stext); ro_end = (unsigned long)&_eshared & PAGE_MASK; - for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) { - if (memory_chunk[i].type == CHUNK_CRASHK || - memory_chunk[i].type == CHUNK_OLDMEM) + for (i = 0; i < MEMORY_CHUNKS; i++) { + if (!memory_chunk[i].size) continue; start = memory_chunk[i].addr; end = memory_chunk[i].addr + memory_chunk[i].size; @@ -415,9 +411,6 @@ static int __init vmem_convert_memory_chunk(void) for (i = 0; i < MEMORY_CHUNKS; i++) { if (!memory_chunk[i].size) continue; - if (memory_chunk[i].type == CHUNK_CRASHK || - memory_chunk[i].type == CHUNK_OLDMEM) - continue; seg = kzalloc(sizeof(*seg), GFP_KERNEL); if (!seg) panic("Out of memory...\n"); diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 0972e91cced2..82f165f8078c 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -747,10 +747,9 @@ void bpf_jit_compile(struct sk_filter *fp) if (!bpf_jit_enable) return; - addrs = kmalloc(fp->len * sizeof(*addrs), GFP_KERNEL); + addrs = kcalloc(fp->len, sizeof(*addrs), GFP_KERNEL); if (addrs == NULL) return; - memset(addrs, 0, fp->len * sizeof(*addrs)); memset(&jit, 0, sizeof(cjit)); memset(&cjit, 0, sizeof(cjit)); diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c index 584b93674ea4..ffeb17ce7f31 100644 --- a/arch/s390/oprofile/init.c +++ b/arch/s390/oprofile/init.c @@ -440,6 +440,7 @@ static int oprofile_hwsampler_init(struct oprofile_operations *ops) switch (id.machine) { case 0x2097: case 0x2098: ops->cpu_type = "s390/z10"; break; case 0x2817: case 0x2818: ops->cpu_type = "s390/z196"; break; + case 0x2827: ops->cpu_type = "s390/zEC12"; break; default: return -ENODEV; } } diff --git a/arch/s390/pci/Makefile b/arch/s390/pci/Makefile index f0f426a113ce..086a2e37935d 100644 --- a/arch/s390/pci/Makefile +++ b/arch/s390/pci/Makefile @@ -2,5 +2,5 @@ # Makefile for the s390 PCI subsystem. # -obj-$(CONFIG_PCI) += pci.o pci_dma.o pci_clp.o pci_msi.o \ - pci_sysfs.o pci_event.o pci_debug.o +obj-$(CONFIG_PCI) += pci.o pci_dma.o pci_clp.o pci_msi.o pci_sysfs.o \ + pci_event.o pci_debug.o pci_insn.o diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 27b4c17855b9..e6f15b5d8b7d 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -99,9 +99,6 @@ static int __read_mostly aisb_max; static struct kmem_cache *zdev_irq_cache; static struct kmem_cache *zdev_fmb_cache; -debug_info_t *pci_debug_msg_id; -debug_info_t *pci_debug_err_id; - static inline int irq_to_msi_nr(unsigned int irq) { return irq & ZPCI_MSI_MASK; @@ -179,7 +176,7 @@ static int zpci_register_airq(struct zpci_dev *zdev, unsigned int aisb, fib->aisb = (u64) bucket->aisb + aisb / 8; fib->aisbo = aisb & ZPCI_MSI_MASK; - rc = mpcifc_instr(req, fib); + rc = s390pci_mod_fc(req, fib); pr_debug("%s mpcifc returned noi: %d\n", __func__, fib->noi); free_page((unsigned long) fib); @@ -209,7 +206,7 @@ static int mod_pci(struct zpci_dev *zdev, int fn, u8 dmaas, struct mod_pci_args fib->iota = args->iota; fib->fmb_addr = args->fmb_addr; - rc = mpcifc_instr(req, fib); + rc = s390pci_mod_fc(req, fib); free_page((unsigned long) fib); return rc; } @@ -249,10 +246,9 @@ int zpci_fmb_enable_device(struct zpci_dev *zdev) if (zdev->fmb) return -EINVAL; - zdev->fmb = kmem_cache_alloc(zdev_fmb_cache, GFP_KERNEL); + zdev->fmb = kmem_cache_zalloc(zdev_fmb_cache, GFP_KERNEL); if (!zdev->fmb) return -ENOMEM; - memset(zdev->fmb, 0, sizeof(*zdev->fmb)); WARN_ON((u64) zdev->fmb & 0xf); args.fmb_addr = virt_to_phys(zdev->fmb); @@ -284,12 +280,12 @@ static int zpci_cfg_load(struct zpci_dev *zdev, int offset, u32 *val, u8 len) u64 data; int rc; - rc = pcilg_instr(&data, req, offset); - data = data << ((8 - len) * 8); - data = le64_to_cpu(data); - if (!rc) + rc = s390pci_load(&data, req, offset); + if (!rc) { + data = data << ((8 - len) * 8); + data = le64_to_cpu(data); *val = (u32) data; - else + } else *val = 0xffffffff; return rc; } @@ -302,7 +298,7 @@ static int zpci_cfg_store(struct zpci_dev *zdev, int offset, u32 val, u8 len) data = cpu_to_le64(data); data = data >> ((8 - len) * 8); - rc = pcistg_instr(data, req, offset); + rc = s390pci_store(data, req, offset); return rc; } @@ -409,20 +405,28 @@ static int pci_read(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *val) { struct zpci_dev *zdev = get_zdev_by_bus(bus); + int ret; if (!zdev || devfn != ZPCI_DEVFN) - return 0; - return zpci_cfg_load(zdev, where, val, size); + ret = -ENODEV; + else + ret = zpci_cfg_load(zdev, where, val, size); + + return ret; } static int pci_write(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 val) { struct zpci_dev *zdev = get_zdev_by_bus(bus); + int ret; if (!zdev || devfn != ZPCI_DEVFN) - return 0; - return zpci_cfg_store(zdev, where, val, size); + ret = -ENODEV; + else + ret = zpci_cfg_store(zdev, where, val, size); + + return ret; } static struct pci_ops pci_root_ops = { @@ -474,7 +478,7 @@ scan: } /* enable interrupts again */ - sic_instr(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC); + set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC); /* check again to not lose initiative */ rmb(); @@ -596,19 +600,6 @@ static void zpci_map_resources(struct zpci_dev *zdev) } }; -static void zpci_unmap_resources(struct pci_dev *pdev) -{ - resource_size_t len; - int i; - - for (i = 0; i < PCI_BAR_COUNT; i++) { - len = pci_resource_len(pdev, i); - if (!len) - continue; - pci_iounmap(pdev, (void *) pdev->resource[i].start); - } -}; - struct zpci_dev *zpci_alloc_device(void) { struct zpci_dev *zdev; @@ -636,32 +627,6 @@ void zpci_free_device(struct zpci_dev *zdev) kfree(zdev); } -/* Called on removal of pci_dev, leaves zpci and bus device */ -static void zpci_remove_device(struct pci_dev *pdev) -{ - struct zpci_dev *zdev = get_zdev(pdev); - - dev_info(&pdev->dev, "Removing device %u\n", zdev->domain); - zdev->state = ZPCI_FN_STATE_CONFIGURED; - zpci_dma_exit_device(zdev); - zpci_fmb_disable_device(zdev); - zpci_sysfs_remove_device(&pdev->dev); - zpci_unmap_resources(pdev); - list_del(&zdev->entry); /* can be called from init */ - zdev->pdev = NULL; -} - -static void zpci_scan_devices(void) -{ - struct zpci_dev *zdev; - - mutex_lock(&zpci_list_lock); - list_for_each_entry(zdev, &zpci_list, entry) - if (zdev->state == ZPCI_FN_STATE_CONFIGURED) - zpci_scan_device(zdev); - mutex_unlock(&zpci_list_lock); -} - /* * Too late for any s390 specific setup, since interrupts must be set up * already which requires DMA setup too and the pci scan will access the @@ -688,12 +653,6 @@ int pcibios_enable_device(struct pci_dev *pdev, int mask) return 0; } -void pcibios_disable_device(struct pci_dev *pdev) -{ - zpci_remove_device(pdev); - pdev->sysdata = NULL; -} - int pcibios_add_platform_entries(struct pci_dev *pdev) { return zpci_sysfs_add_device(&pdev->dev); @@ -789,7 +748,7 @@ static int __init zpci_irq_init(void) spin_lock_init(&bucket->lock); /* set summary to 1 to be called every time for the ISC */ *zpci_irq_si = 1; - sic_instr(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC); + set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC); return 0; out_ai: @@ -872,7 +831,19 @@ static void zpci_free_iomap(struct zpci_dev *zdev, int entry) spin_unlock(&zpci_iomap_lock); } -static int zpci_create_device_bus(struct zpci_dev *zdev) +int pcibios_add_device(struct pci_dev *pdev) +{ + struct zpci_dev *zdev = get_zdev(pdev); + + zdev->pdev = pdev; + zpci_debug_init_device(zdev); + zpci_fmb_enable_device(zdev); + zpci_map_resources(zdev); + + return 0; +} + +static int zpci_scan_bus(struct zpci_dev *zdev) { struct resource *res; LIST_HEAD(resources); @@ -909,8 +880,8 @@ static int zpci_create_device_bus(struct zpci_dev *zdev) pci_add_resource(&resources, res); } - zdev->bus = pci_create_root_bus(NULL, ZPCI_BUS_NR, &pci_root_ops, - zdev, &resources); + zdev->bus = pci_scan_root_bus(NULL, ZPCI_BUS_NR, &pci_root_ops, + zdev, &resources); if (!zdev->bus) return -EIO; @@ -959,6 +930,13 @@ out: } EXPORT_SYMBOL_GPL(zpci_enable_device); +int zpci_disable_device(struct zpci_dev *zdev) +{ + zpci_dma_exit_device(zdev); + return clp_disable_fh(zdev); +} +EXPORT_SYMBOL_GPL(zpci_disable_device); + int zpci_create_device(struct zpci_dev *zdev) { int rc; @@ -967,9 +945,16 @@ int zpci_create_device(struct zpci_dev *zdev) if (rc) goto out; - rc = zpci_create_device_bus(zdev); + if (zdev->state == ZPCI_FN_STATE_CONFIGURED) { + rc = zpci_enable_device(zdev); + if (rc) + goto out_free; + + zdev->state = ZPCI_FN_STATE_ONLINE; + } + rc = zpci_scan_bus(zdev); if (rc) - goto out_bus; + goto out_disable; mutex_lock(&zpci_list_lock); list_add_tail(&zdev->entry, &zpci_list); @@ -977,21 +962,12 @@ int zpci_create_device(struct zpci_dev *zdev) hotplug_ops->create_slot(zdev); mutex_unlock(&zpci_list_lock); - if (zdev->state == ZPCI_FN_STATE_STANDBY) - return 0; - - rc = zpci_enable_device(zdev); - if (rc) - goto out_start; return 0; -out_start: - mutex_lock(&zpci_list_lock); - list_del(&zdev->entry); - if (hotplug_ops) - hotplug_ops->remove_slot(zdev); - mutex_unlock(&zpci_list_lock); -out_bus: +out_disable: + if (zdev->state == ZPCI_FN_STATE_ONLINE) + zpci_disable_device(zdev); +out_free: zpci_free_domain(zdev); out: return rc; @@ -1016,15 +992,9 @@ int zpci_scan_device(struct zpci_dev *zdev) goto out; } - zpci_debug_init_device(zdev); - zpci_fmb_enable_device(zdev); - zpci_map_resources(zdev); pci_bus_add_devices(zdev->bus); - /* now that pdev was added to the bus mark it as used */ - zdev->state = ZPCI_FN_STATE_ONLINE; return 0; - out: zpci_dma_exit_device(zdev); clp_disable_fh(zdev); @@ -1087,13 +1057,13 @@ void zpci_deregister_hp_ops(void) } EXPORT_SYMBOL_GPL(zpci_deregister_hp_ops); -unsigned int s390_pci_probe = 1; +unsigned int s390_pci_probe; EXPORT_SYMBOL_GPL(s390_pci_probe); char * __init pcibios_setup(char *str) { - if (!strcmp(str, "off")) { - s390_pci_probe = 0; + if (!strcmp(str, "on")) { + s390_pci_probe = 1; return NULL; } return str; @@ -1138,7 +1108,6 @@ static int __init pci_base_init(void) if (rc) goto out_find; - zpci_scan_devices(); return 0; out_find: diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index f339fe2feb15..bd34359d1546 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -13,6 +13,7 @@ #include <linux/err.h> #include <linux/delay.h> #include <linux/pci.h> +#include <asm/pci_debug.h> #include <asm/pci_clp.h> /* @@ -144,6 +145,7 @@ int clp_add_pci_device(u32 fid, u32 fh, int configured) struct zpci_dev *zdev; int rc; + zpci_dbg(3, "add fid:%x, fh:%x, c:%d\n", fid, fh, configured); zdev = zpci_alloc_device(); if (IS_ERR(zdev)) return PTR_ERR(zdev); @@ -204,8 +206,8 @@ static int clp_set_pci_fn(u32 *fh, u8 nr_dma_as, u8 command) if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) *fh = rrb->response.fh; else { - pr_err("Set PCI FN failed with response: %x cc: %d\n", - rrb->response.hdr.rsp, rc); + zpci_dbg(0, "SPF fh:%x, cc:%d, resp:%x\n", *fh, rc, + rrb->response.hdr.rsp); rc = -EIO; } clp_free_block(rrb); @@ -221,6 +223,8 @@ int clp_enable_fh(struct zpci_dev *zdev, u8 nr_dma_as) if (!rc) /* Success -> store enabled handle in zdev */ zdev->fh = fh; + + zpci_dbg(3, "ena fid:%x, fh:%x, rc:%d\n", zdev->fid, zdev->fh, rc); return rc; } @@ -237,9 +241,8 @@ int clp_disable_fh(struct zpci_dev *zdev) if (!rc) /* Success -> store disabled handle in zdev */ zdev->fh = fh; - else - dev_err(&zdev->pdev->dev, - "Failed to disable fn handle: 0x%x\n", fh); + + zpci_dbg(3, "dis fid:%x, fh:%x, rc:%d\n", zdev->fid, zdev->fh, rc); return rc; } diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c index a5d07bc2a547..771b82359af4 100644 --- a/arch/s390/pci/pci_debug.c +++ b/arch/s390/pci/pci_debug.c @@ -11,12 +11,17 @@ #include <linux/kernel.h> #include <linux/seq_file.h> #include <linux/debugfs.h> +#include <linux/export.h> #include <linux/pci.h> #include <asm/debug.h> #include <asm/pci_dma.h> static struct dentry *debugfs_root; +debug_info_t *pci_debug_msg_id; +EXPORT_SYMBOL_GPL(pci_debug_msg_id); +debug_info_t *pci_debug_err_id; +EXPORT_SYMBOL_GPL(pci_debug_err_id); static char *pci_perf_names[] = { /* hardware counters */ @@ -168,7 +173,6 @@ int __init zpci_debug_init(void) return -EINVAL; debug_register_view(pci_debug_msg_id, &debug_sprintf_view); debug_set_level(pci_debug_msg_id, 3); - zpci_dbg("Debug view initialized\n"); /* error log */ pci_debug_err_id = debug_register("pci_error", 2, 1, 16); @@ -176,7 +180,6 @@ int __init zpci_debug_init(void) return -EINVAL; debug_register_view(pci_debug_err_id, &debug_hex_ascii_view); debug_set_level(pci_debug_err_id, 6); - zpci_err("Debug view initialized\n"); debugfs_root = debugfs_create_dir("pci", NULL); return 0; diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index a547419907c3..f8e69d5bc0a9 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -169,8 +169,9 @@ static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa, * needs to be redone! */ goto no_refresh; - rc = rpcit_instr((u64) zdev->fh << 32, start_dma_addr, - nr_pages * PAGE_SIZE); + + rc = s390pci_refresh_trans((u64) zdev->fh << 32, start_dma_addr, + nr_pages * PAGE_SIZE); no_refresh: spin_unlock_irqrestore(&zdev->dma_table_lock, irq_flags); @@ -268,8 +269,6 @@ static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page, int flags = ZPCI_PTE_VALID; dma_addr_t dma_addr; - WARN_ON_ONCE(offset > PAGE_SIZE); - /* This rounds up number of pages based on size and offset */ nr_pages = iommu_num_pages(pa, size, PAGE_SIZE); iommu_page_index = dma_alloc_iommu(zdev, nr_pages); @@ -291,7 +290,7 @@ static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page, if (!dma_update_trans(zdev, pa, dma_addr, size, flags)) { atomic64_add(nr_pages, (atomic64_t *) &zdev->fmb->mapped_pages); - return dma_addr + offset; + return dma_addr + (offset & ~PAGE_MASK); } out_free: diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c new file mode 100644 index 000000000000..22eeb9d7ffeb --- /dev/null +++ b/arch/s390/pci/pci_insn.c @@ -0,0 +1,202 @@ +/* + * s390 specific pci instructions + * + * Copyright IBM Corp. 2013 + */ + +#include <linux/export.h> +#include <linux/errno.h> +#include <linux/delay.h> +#include <asm/pci_insn.h> +#include <asm/processor.h> + +#define ZPCI_INSN_BUSY_DELAY 1 /* 1 microsecond */ + +/* Modify PCI Function Controls */ +static inline u8 __mpcifc(u64 req, struct zpci_fib *fib, u8 *status) +{ + u8 cc; + + asm volatile ( + " .insn rxy,0xe300000000d0,%[req],%[fib]\n" + " ipm %[cc]\n" + " srl %[cc],28\n" + : [cc] "=d" (cc), [req] "+d" (req), [fib] "+Q" (*fib) + : : "cc"); + *status = req >> 24 & 0xff; + return cc; +} + +int s390pci_mod_fc(u64 req, struct zpci_fib *fib) +{ + u8 cc, status; + + do { + cc = __mpcifc(req, fib, &status); + if (cc == 2) + msleep(ZPCI_INSN_BUSY_DELAY); + } while (cc == 2); + + if (cc) + printk_once(KERN_ERR "%s: error cc: %d status: %d\n", + __func__, cc, status); + return (cc) ? -EIO : 0; +} + +/* Refresh PCI Translations */ +static inline u8 __rpcit(u64 fn, u64 addr, u64 range, u8 *status) +{ + register u64 __addr asm("2") = addr; + register u64 __range asm("3") = range; + u8 cc; + + asm volatile ( + " .insn rre,0xb9d30000,%[fn],%[addr]\n" + " ipm %[cc]\n" + " srl %[cc],28\n" + : [cc] "=d" (cc), [fn] "+d" (fn) + : [addr] "d" (__addr), "d" (__range) + : "cc"); + *status = fn >> 24 & 0xff; + return cc; +} + +int s390pci_refresh_trans(u64 fn, u64 addr, u64 range) +{ + u8 cc, status; + + do { + cc = __rpcit(fn, addr, range, &status); + if (cc == 2) + udelay(ZPCI_INSN_BUSY_DELAY); + } while (cc == 2); + + if (cc) + printk_once(KERN_ERR "%s: error cc: %d status: %d dma_addr: %Lx size: %Lx\n", + __func__, cc, status, addr, range); + return (cc) ? -EIO : 0; +} + +/* Set Interruption Controls */ +void set_irq_ctrl(u16 ctl, char *unused, u8 isc) +{ + asm volatile ( + " .insn rsy,0xeb00000000d1,%[ctl],%[isc],%[u]\n" + : : [ctl] "d" (ctl), [isc] "d" (isc << 27), [u] "Q" (*unused)); +} + +/* PCI Load */ +static inline int __pcilg(u64 *data, u64 req, u64 offset, u8 *status) +{ + register u64 __req asm("2") = req; + register u64 __offset asm("3") = offset; + int cc = -ENXIO; + u64 __data; + + asm volatile ( + " .insn rre,0xb9d20000,%[data],%[req]\n" + "0: ipm %[cc]\n" + " srl %[cc],28\n" + "1:\n" + EX_TABLE(0b, 1b) + : [cc] "+d" (cc), [data] "=d" (__data), [req] "+d" (__req) + : "d" (__offset) + : "cc"); + *status = __req >> 24 & 0xff; + if (!cc) + *data = __data; + + return cc; +} + +int s390pci_load(u64 *data, u64 req, u64 offset) +{ + u8 status; + int cc; + + do { + cc = __pcilg(data, req, offset, &status); + if (cc == 2) + udelay(ZPCI_INSN_BUSY_DELAY); + } while (cc == 2); + + if (cc) + printk_once(KERN_ERR "%s: error cc: %d status: %d req: %Lx offset: %Lx\n", + __func__, cc, status, req, offset); + return (cc > 0) ? -EIO : cc; +} +EXPORT_SYMBOL_GPL(s390pci_load); + +/* PCI Store */ +static inline int __pcistg(u64 data, u64 req, u64 offset, u8 *status) +{ + register u64 __req asm("2") = req; + register u64 __offset asm("3") = offset; + int cc = -ENXIO; + + asm volatile ( + " .insn rre,0xb9d00000,%[data],%[req]\n" + "0: ipm %[cc]\n" + " srl %[cc],28\n" + "1:\n" + EX_TABLE(0b, 1b) + : [cc] "+d" (cc), [req] "+d" (__req) + : "d" (__offset), [data] "d" (data) + : "cc"); + *status = __req >> 24 & 0xff; + return cc; +} + +int s390pci_store(u64 data, u64 req, u64 offset) +{ + u8 status; + int cc; + + do { + cc = __pcistg(data, req, offset, &status); + if (cc == 2) + udelay(ZPCI_INSN_BUSY_DELAY); + } while (cc == 2); + + if (cc) + printk_once(KERN_ERR "%s: error cc: %d status: %d req: %Lx offset: %Lx\n", + __func__, cc, status, req, offset); + return (cc > 0) ? -EIO : cc; +} +EXPORT_SYMBOL_GPL(s390pci_store); + +/* PCI Store Block */ +static inline int __pcistb(const u64 *data, u64 req, u64 offset, u8 *status) +{ + int cc = -ENXIO; + + asm volatile ( + " .insn rsy,0xeb00000000d0,%[req],%[offset],%[data]\n" + "0: ipm %[cc]\n" + " srl %[cc],28\n" + "1:\n" + EX_TABLE(0b, 1b) + : [cc] "+d" (cc), [req] "+d" (req) + : [offset] "d" (offset), [data] "Q" (*data) + : "cc"); + *status = req >> 24 & 0xff; + return cc; +} + +int s390pci_store_block(const u64 *data, u64 req, u64 offset) +{ + u8 status; + int cc; + + do { + cc = __pcistb(data, req, offset, &status); + if (cc == 2) + udelay(ZPCI_INSN_BUSY_DELAY); + } while (cc == 2); + + if (cc) + printk_once(KERN_ERR "%s: error cc: %d status: %d req: %Lx offset: %Lx\n", + __func__, cc, status, req, offset); + return (cc > 0) ? -EIO : cc; +} +EXPORT_SYMBOL_GPL(s390pci_store_block); diff --git a/arch/s390/pci/pci_msi.c b/arch/s390/pci/pci_msi.c index 0297931335e1..b097aed05a9b 100644 --- a/arch/s390/pci/pci_msi.c +++ b/arch/s390/pci/pci_msi.c @@ -18,8 +18,9 @@ /* mapping of irq numbers to msi_desc */ static struct hlist_head *msi_hash; -static unsigned int msihash_shift = 6; -#define msi_hashfn(nr) hash_long(nr, msihash_shift) +static const unsigned int msi_hash_bits = 8; +#define MSI_HASH_BUCKETS (1U << msi_hash_bits) +#define msi_hashfn(nr) hash_long(nr, msi_hash_bits) static DEFINE_SPINLOCK(msi_map_lock); @@ -74,6 +75,7 @@ int zpci_setup_msi_irq(struct zpci_dev *zdev, struct msi_desc *msi, map->irq = nr; map->msi = msi; zdev->msi_map[nr & ZPCI_MSI_MASK] = map; + INIT_HLIST_NODE(&map->msi_chain); pr_debug("%s hashing irq: %u to bucket nr: %llu\n", __func__, nr, msi_hashfn(nr)); @@ -125,11 +127,11 @@ int __init zpci_msihash_init(void) { unsigned int i; - msi_hash = kmalloc(256 * sizeof(*msi_hash), GFP_KERNEL); + msi_hash = kmalloc(MSI_HASH_BUCKETS * sizeof(*msi_hash), GFP_KERNEL); if (!msi_hash) return -ENOMEM; - for (i = 0; i < (1U << msihash_shift); i++) + for (i = 0; i < MSI_HASH_BUCKETS; i++) INIT_HLIST_HEAD(&msi_hash[i]); return 0; } |