From df9ceff906abf5edc284c11ac899b3b3f32b4dd3 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 17 Mar 2016 12:45:33 +0100 Subject: s390: disable postinit-readonly for now This is a temporary fix to let lkdtm run again on s390, though it'll still fail the ro_after_init tests. Until rodata and ro_after_init sections can be split on s390, disable special handling of ro_after_init. Signed-off-by: Kees Cook Reported-by: Christian Borntraeger Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/cache.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/s390/include') diff --git a/arch/s390/include/asm/cache.h b/arch/s390/include/asm/cache.h index 4d7ccac5fd1d..22da3b34c655 100644 --- a/arch/s390/include/asm/cache.h +++ b/arch/s390/include/asm/cache.h @@ -15,4 +15,7 @@ #define __read_mostly __attribute__((__section__(".data..read_mostly"))) +/* Read-only memory is marked before mark_rodata_ro() is called. */ +#define __ro_after_init __read_mostly + #endif -- cgit v1.2.3 From 3358999a8e7ae82912e5908adb7150a6558d6702 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 21 Mar 2016 13:48:43 +0100 Subject: s390: wire up preadv2/pwritev2 syscalls Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/uapi/asm/unistd.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/s390/include') diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h index ab3aa6875a59..4384bc797a54 100644 --- a/arch/s390/include/uapi/asm/unistd.h +++ b/arch/s390/include/uapi/asm/unistd.h @@ -311,7 +311,9 @@ #define __NR_shutdown 373 #define __NR_mlock2 374 #define __NR_copy_file_range 375 -#define NR_syscalls 376 +#define __NR_preadv2 376 +#define __NR_pwritev2 377 +#define NR_syscalls 378 /* * There are some system calls that are not present on 64 bit, some -- cgit v1.2.3 From 9d89d9e61d361f3adb75e1aebe4bb367faf16cfa Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Thu, 31 Mar 2016 11:48:31 +0200 Subject: s390/pci: add extra padding to function measurement block Newer machines might use a different (larger) format for function measurement blocks. To ensure that we comply with the alignment requirement on these machines and prevent memory corruption (when firmware writes more data than we expect) add 16 padding bytes at the end of the fmb. Cc: stable@vger.kernel.org # v4.1+ Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pci.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/s390/include') diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index b6bfa169a002..535a46d46d28 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -44,7 +44,8 @@ struct zpci_fmb { u64 rpcit_ops; u64 dma_rbytes; u64 dma_wbytes; -} __packed __aligned(64); + u64 pad[2]; +} __packed __aligned(128); enum zpci_state { ZPCI_FN_STATE_RESERVED, -- cgit v1.2.3 From 4f375903ccbff78e1da02133e51e37a097cd3313 Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Fri, 1 Apr 2016 13:35:16 +0100 Subject: s390/seccomp: include generic seccomp header file Fixes this build error on linux-next: kernel/seccomp.c: In function '__secure_computing_strict': kernel/seccomp.c:526:3: error: implicit declaration of function 'get_compat_mode1_syscalls' The retrieval of compat syscall numbers were moved into inline function defined in asm-generic header but the asm-generic header is not being used by s390. [heiko.carstens@de.ibm.com]: even though the build error will trigger only in the next merge window it makes sense to include the generic header file already now. Fixes: ("seccomp: Get compat syscalls from asm-generic header") Cc: Matt Redfearn Signed-off-by: Sudip Mukherjee Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/seccomp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/s390/include') diff --git a/arch/s390/include/asm/seccomp.h b/arch/s390/include/asm/seccomp.h index 781a9cf9b002..e10f8337367b 100644 --- a/arch/s390/include/asm/seccomp.h +++ b/arch/s390/include/asm/seccomp.h @@ -13,4 +13,6 @@ #define __NR_seccomp_exit_32 __NR_exit #define __NR_seccomp_sigreturn_32 __NR_sigreturn +#include + #endif /* _ASM_S390_SECCOMP_H */ -- cgit v1.2.3 From 723cacbd9dc79582e562c123a0bacf8bfc69e72a Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Fri, 15 Apr 2016 16:38:40 +0200 Subject: s390/mm: fix asce_bits handling with dynamic pagetable levels There is a race with multi-threaded applications between context switch and pagetable upgrade. In switch_mm() a new user_asce is built from mm->pgd and mm->context.asce_bits, w/o holding any locks. A concurrent mmap with a pagetable upgrade on another thread in crst_table_upgrade() could already have set new asce_bits, but not yet the new mm->pgd. This would result in a corrupt user_asce in switch_mm(), and eventually in a kernel panic from a translation exception. Fix this by storing the complete asce instead of just the asce_bits, which can then be read atomically from switch_mm(), so that it either sees the old value or the new value, but no mixture. Both cases are OK. Having the old value would result in a page fault on access to the higher level memory, but the fault handler would see the new mm->pgd, if it was a valid access after the mmap on the other thread has completed. So as worst-case scenario we would have a page fault loop for the racing thread until the next time slice. Also remove dead code and simplify the upgrade/downgrade path, there are no upgrades from 2 levels, and only downgrades from 3 levels for compat tasks. There are also no concurrent upgrades, because the mmap_sem is held with down_write() in do_mmap, so the flush and table checks during upgrade can be removed. Reported-by: Michael Munday Reviewed-by: Martin Schwidefsky Signed-off-by: Gerald Schaefer Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/mmu.h | 2 +- arch/s390/include/asm/mmu_context.h | 28 ++++++++++++++++++++++------ arch/s390/include/asm/pgalloc.h | 4 ++-- arch/s390/include/asm/processor.h | 2 +- arch/s390/include/asm/tlbflush.h | 9 +++------ 5 files changed, 29 insertions(+), 16 deletions(-) (limited to 'arch/s390/include') diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h index d29ad9545b41..081b2ad99d73 100644 --- a/arch/s390/include/asm/mmu.h +++ b/arch/s390/include/asm/mmu.h @@ -11,7 +11,7 @@ typedef struct { spinlock_t list_lock; struct list_head pgtable_list; struct list_head gmap_list; - unsigned long asce_bits; + unsigned long asce; unsigned long asce_limit; unsigned long vdso_base; /* The mmu context allocates 4K page tables. */ diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index d321469eeda7..c837b79b455d 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -26,12 +26,28 @@ static inline int init_new_context(struct task_struct *tsk, mm->context.has_pgste = 0; mm->context.use_skey = 0; #endif - if (mm->context.asce_limit == 0) { + switch (mm->context.asce_limit) { + case 1UL << 42: + /* + * forked 3-level task, fall through to set new asce with new + * mm->pgd + */ + case 0: /* context created by exec, set asce limit to 4TB */ - mm->context.asce_bits = _ASCE_TABLE_LENGTH | - _ASCE_USER_BITS | _ASCE_TYPE_REGION3; mm->context.asce_limit = STACK_TOP_MAX; - } else if (mm->context.asce_limit == (1UL << 31)) { + mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | + _ASCE_USER_BITS | _ASCE_TYPE_REGION3; + break; + case 1UL << 53: + /* forked 4-level task, set new asce with new mm->pgd */ + mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | + _ASCE_USER_BITS | _ASCE_TYPE_REGION2; + break; + case 1UL << 31: + /* forked 2-level compat task, set new asce with new mm->pgd */ + mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | + _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT; + /* pgd_alloc() did not increase mm->nr_pmds */ mm_inc_nr_pmds(mm); } crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm)); @@ -42,7 +58,7 @@ static inline int init_new_context(struct task_struct *tsk, static inline void set_user_asce(struct mm_struct *mm) { - S390_lowcore.user_asce = mm->context.asce_bits | __pa(mm->pgd); + S390_lowcore.user_asce = mm->context.asce; if (current->thread.mm_segment.ar4) __ctl_load(S390_lowcore.user_asce, 7, 7); set_cpu_flag(CIF_ASCE); @@ -71,7 +87,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, { int cpu = smp_processor_id(); - S390_lowcore.user_asce = next->context.asce_bits | __pa(next->pgd); + S390_lowcore.user_asce = next->context.asce; if (prev == next) return; if (MACHINE_HAS_TLB_LC) diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index 9b3d9b6099f2..da34cb6b1f3b 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -52,8 +52,8 @@ static inline unsigned long pgd_entry_type(struct mm_struct *mm) return _REGION2_ENTRY_EMPTY; } -int crst_table_upgrade(struct mm_struct *, unsigned long limit); -void crst_table_downgrade(struct mm_struct *, unsigned long limit); +int crst_table_upgrade(struct mm_struct *); +void crst_table_downgrade(struct mm_struct *); static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) { diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index d6fd22ea270d..18cdede1aeda 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -175,7 +175,7 @@ extern __vector128 init_task_fpu_regs[__NUM_VXRS]; regs->psw.mask = PSW_USER_BITS | PSW_MASK_BA; \ regs->psw.addr = new_psw; \ regs->gprs[15] = new_stackp; \ - crst_table_downgrade(current->mm, 1UL << 31); \ + crst_table_downgrade(current->mm); \ execve_tail(); \ } while (0) diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h index ca148f7c3eaa..a2e6ef32e054 100644 --- a/arch/s390/include/asm/tlbflush.h +++ b/arch/s390/include/asm/tlbflush.h @@ -110,8 +110,7 @@ static inline void __tlb_flush_asce(struct mm_struct *mm, unsigned long asce) static inline void __tlb_flush_kernel(void) { if (MACHINE_HAS_IDTE) - __tlb_flush_idte((unsigned long) init_mm.pgd | - init_mm.context.asce_bits); + __tlb_flush_idte(init_mm.context.asce); else __tlb_flush_global(); } @@ -133,8 +132,7 @@ static inline void __tlb_flush_asce(struct mm_struct *mm, unsigned long asce) static inline void __tlb_flush_kernel(void) { if (MACHINE_HAS_TLB_LC) - __tlb_flush_idte_local((unsigned long) init_mm.pgd | - init_mm.context.asce_bits); + __tlb_flush_idte_local(init_mm.context.asce); else __tlb_flush_local(); } @@ -148,8 +146,7 @@ static inline void __tlb_flush_mm(struct mm_struct * mm) * only ran on the local cpu. */ if (MACHINE_HAS_IDTE && list_empty(&mm->context.gmap_list)) - __tlb_flush_asce(mm, (unsigned long) mm->pgd | - mm->context.asce_bits); + __tlb_flush_asce(mm, mm->context.asce); else __tlb_flush_full(mm); } -- cgit v1.2.3