From 2688905e6a9b3647bf7b452cb0ff2bdb166bd8fe Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 7 May 2008 09:22:52 +0200 Subject: [S390] s390: Optimize user and work TIF check On return from syscall or interrupt, we have to check if we return to userspace (likely) and if there is work todo (less likely) to decide if we handle the work. We can optimize this check: we first check for the less likely work case and then check for userspace. This patch is also a preparation for an additional patch, that fixes a bug in KVM dealing with cpu bound guests. Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/entry.S | 29 ++++++++++++++--------------- arch/s390/kernel/entry64.S | 29 ++++++++++++++--------------- 2 files changed, 28 insertions(+), 30 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index bdbb3bcd78a5..708cf9cf9a35 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -279,8 +279,6 @@ sysc_do_restart: st %r2,SP_R2(%r15) # store return value (change R2 on stack) sysc_return: - tm SP_PSW+1(%r15),0x01 # returning to user ? - bno BASED(sysc_restore) tm __TI_flags+3(%r9),_TIF_WORK_SVC bnz BASED(sysc_work) # there is work to do (signals etc.) sysc_restore: @@ -312,6 +310,8 @@ sysc_work_loop: # One of the work bits is on. Find out which one. # sysc_work: + tm SP_PSW+1(%r15),0x01 # returning to user ? + bno BASED(sysc_restore) tm __TI_flags+3(%r9),_TIF_MCCK_PENDING bo BASED(sysc_mcck_pending) tm __TI_flags+3(%r9),_TIF_NEED_RESCHED @@ -602,12 +602,6 @@ io_no_vtime: la %r2,SP_PTREGS(%r15) # address of register-save area basr %r14,%r1 # branch to standard irq handler io_return: - tm SP_PSW+1(%r15),0x01 # returning to user ? -#ifdef CONFIG_PREEMPT - bno BASED(io_preempt) # no -> check for preemptive scheduling -#else - bno BASED(io_restore) # no-> skip resched & signal -#endif tm __TI_flags+3(%r9),_TIF_WORK_INT bnz BASED(io_work) # there is work to do (signals etc.) io_restore: @@ -629,10 +623,18 @@ io_restore_trace_psw: .long 0, io_restore_trace + 0x80000000 #endif -#ifdef CONFIG_PREEMPT -io_preempt: +# +# switch to kernel stack, then check the TIF bits +# +io_work: + tm SP_PSW+1(%r15),0x01 # returning to user ? +#ifndef CONFIG_PREEMPT + bno BASED(io_restore) # no-> skip resched & signal +#else + bnz BASED(io_work_user) # no -> check for preemptive scheduling + # check for preemptive scheduling icm %r0,15,__TI_precount(%r9) - bnz BASED(io_restore) + bnz BASED(io_restore) # preemption disabled l %r1,SP_R15(%r15) s %r1,BASED(.Lc_spsize) mvc SP_PTREGS(__PT_SIZE,%r1),SP_PTREGS(%r15) @@ -646,10 +648,7 @@ io_resume_loop: br %r1 # call schedule #endif -# -# switch to kernel stack, then check the TIF bits -# -io_work: +io_work_user: l %r1,__LC_KERNEL_STACK s %r1,BASED(.Lc_spsize) mvc SP_PTREGS(__PT_SIZE,%r1),SP_PTREGS(%r15) diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 5a4a7bcd2bba..a57909d63149 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -271,8 +271,6 @@ sysc_noemu: stg %r2,SP_R2(%r15) # store return value (change R2 on stack) sysc_return: - tm SP_PSW+1(%r15),0x01 # returning to user ? - jno sysc_restore tm __TI_flags+7(%r9),_TIF_WORK_SVC jnz sysc_work # there is work to do (signals etc.) sysc_restore: @@ -304,6 +302,8 @@ sysc_work_loop: # One of the work bits is on. Find out which one. # sysc_work: + tm SP_PSW+1(%r15),0x01 # returning to user ? + jno sysc_restore tm __TI_flags+7(%r9),_TIF_MCCK_PENDING jo sysc_mcck_pending tm __TI_flags+7(%r9),_TIF_NEED_RESCHED @@ -585,12 +585,6 @@ io_no_vtime: la %r2,SP_PTREGS(%r15) # address of register-save area brasl %r14,do_IRQ # call standard irq handler io_return: - tm SP_PSW+1(%r15),0x01 # returning to user ? -#ifdef CONFIG_PREEMPT - jno io_preempt # no -> check for preemptive scheduling -#else - jno io_restore # no-> skip resched & signal -#endif tm __TI_flags+7(%r9),_TIF_WORK_INT jnz io_work # there is work to do (signals etc.) io_restore: @@ -612,10 +606,18 @@ io_restore_trace_psw: .quad 0, io_restore_trace #endif -#ifdef CONFIG_PREEMPT -io_preempt: +# +# switch to kernel stack, then check TIF bits +# +io_work: + tm SP_PSW+1(%r15),0x01 # returning to user ? +#ifndef CONFIG_PREEMPT + jno io_restore # no-> skip resched & signal +#else + jnz io_work_user # yes -> do resched & signal + # check for preemptive scheduling icm %r0,15,__TI_precount(%r9) - jnz io_restore + jnz io_restore # preemption is disabled # switch to kernel stack lg %r1,SP_R15(%r15) aghi %r1,-SP_SIZE @@ -629,10 +631,7 @@ io_resume_loop: jg preempt_schedule_irq #endif -# -# switch to kernel stack, then check TIF bits -# -io_work: +io_work_user: lg %r1,__LC_KERNEL_STACK aghi %r1,-SP_SIZE mvc SP_PTREGS(__PT_SIZE,%r1),SP_PTREGS(%r15) -- cgit v1.2.3 From 0eaeafa10f3b2bd027e95859a6785d4c7fcc174c Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Wed, 7 May 2008 09:22:53 +0200 Subject: [S390] s390-kvm: leave sie context on work. Removes preemption requirement From: Martin Schwidefsky This patch fixes a bug with cpu bound guest on kvm-s390. Sometimes it was impossible to deliver a signal to a spinning guest. We used preemption as a circumvention. The preemption notifiers called vcpu_load, which checked for pending signals and triggered a host intercept. But even with preemption, a sigkill was not delivered immediately. This patch changes the low level host interrupt handler to check for the SIE instruction, if TIF_WORK is set. In that case we change the instruction pointer of the return PSW to rerun the vcpu_run loop. The kvm code sees an intercept reason 0 if that happens. This patch adds accounting for these types of intercept as well. The advantages: - works with and without preemption - signals are delivered immediately - much better host latencies without preemption Acked-by: Carsten Otte Signed-off-by: Christian Borntraeger Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/entry64.S | 30 +++++++++++++++++++++++++++++- arch/s390/kvm/Kconfig | 1 - arch/s390/kvm/intercept.c | 3 +++ arch/s390/kvm/kvm-s390.c | 5 +---- 4 files changed, 33 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index a57909d63149..fee10177dbfc 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -607,14 +607,37 @@ io_restore_trace_psw: #endif # -# switch to kernel stack, then check TIF bits +# There is work todo, we need to check if we return to userspace, then +# check, if we are in SIE, if yes leave it # io_work: tm SP_PSW+1(%r15),0x01 # returning to user ? #ifndef CONFIG_PREEMPT +#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE) + jnz io_work_user # yes -> no need to check for SIE + la %r1, BASED(sie_opcode) # we return to kernel here + lg %r2, SP_PSW+8(%r15) + clc 0(2,%r1), 0(%r2) # is current instruction = SIE? + jne io_restore # no-> return to kernel + lg %r1, SP_PSW+8(%r15) # yes-> add 4 bytes to leave SIE + aghi %r1, 4 + stg %r1, SP_PSW+8(%r15) + j io_restore # return to kernel +#else jno io_restore # no-> skip resched & signal +#endif #else jnz io_work_user # yes -> do resched & signal +#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE) + la %r1, BASED(sie_opcode) + lg %r2, SP_PSW+8(%r15) + clc 0(2,%r1), 0(%r2) # is current instruction = SIE? + jne 0f # no -> leave PSW alone + lg %r1, SP_PSW+8(%r15) # yes-> add 4 bytes to leave SIE + aghi %r1, 4 + stg %r1, SP_PSW+8(%r15) +0: +#endif # check for preemptive scheduling icm %r0,15,__TI_precount(%r9) jnz io_restore # preemption is disabled @@ -652,6 +675,11 @@ io_work_loop: j io_restore io_work_done: +#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE) +sie_opcode: + .long 0xb2140000 +#endif + # # _TIF_MCCK_PENDING is set, call handler # diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig index 1761b74d639b..e051cad1f1e0 100644 --- a/arch/s390/kvm/Kconfig +++ b/arch/s390/kvm/Kconfig @@ -22,7 +22,6 @@ config KVM select PREEMPT_NOTIFIERS select ANON_INODES select S390_SWITCH_AMODE - select PREEMPT ---help--- Support hosting paravirtualized guest machines using the SIE virtualization capability on the mainframe. This should work diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 349581a26103..47a0b642174c 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -105,6 +105,9 @@ static intercept_handler_t instruction_handlers[256] = { static int handle_noop(struct kvm_vcpu *vcpu) { switch (vcpu->arch.sie_block->icptcode) { + case 0x0: + vcpu->stat.exit_null++; + break; case 0x10: vcpu->stat.exit_external_request++; break; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 98d1e73e01f1..0ac36a649eba 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -31,6 +31,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "userspace_handled", VCPU_STAT(exit_userspace) }, + { "exit_null", VCPU_STAT(exit_null) }, { "exit_validity", VCPU_STAT(exit_validity) }, { "exit_stop_request", VCPU_STAT(exit_stop_request) }, { "exit_external_request", VCPU_STAT(exit_external_request) }, @@ -221,10 +222,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) vcpu->arch.guest_fpregs.fpc &= FPC_VALID_MASK; restore_fp_regs(&vcpu->arch.guest_fpregs); restore_access_regs(vcpu->arch.guest_acrs); - - if (signal_pending(current)) - atomic_set_mask(CPUSTAT_STOP_INT, - &vcpu->arch.sie_block->cpuflags); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) -- cgit v1.2.3 From b499d76bfd78e900039155247e1c21bfdf807b7b Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Wed, 7 May 2008 09:22:57 +0200 Subject: [S390] compat ptrace cleanup This removes redundant arch code for generic ptrace requests already handled by ptrace_request and compat_ptrace_request. It simplifies things to just have the standard entry points, and use the generic compat_sys_ptrace. Signed-off-by: Roland McGrath Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/compat_wrapper.S | 2 +- arch/s390/kernel/ptrace.c | 100 +++----------------------------------- 2 files changed, 7 insertions(+), 95 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index 743d54f0b8db..d003a6e16afb 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -121,7 +121,7 @@ sys32_ptrace_wrapper: lgfr %r3,%r3 # long llgtr %r4,%r4 # long llgfr %r5,%r5 # long - jg sys_ptrace # branch to system call + jg compat_sys_ptrace # branch to system call .globl sys32_alarm_wrapper sys32_alarm_wrapper: diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 7f4270163744..35827b9bd4d1 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -292,8 +292,7 @@ poke_user(struct task_struct *child, addr_t addr, addr_t data) return 0; } -static int -do_ptrace_normal(struct task_struct *child, long request, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, long addr, long data) { ptrace_area parea; int copied, ret; @@ -529,35 +528,19 @@ poke_user_emu31(struct task_struct *child, addr_t addr, addr_t data) return 0; } -static int -do_ptrace_emu31(struct task_struct *child, long request, long addr, long data) +long compat_arch_ptrace(struct task_struct *child, compat_long_t request, + compat_ulong_t caddr, compat_ulong_t cdata) { - unsigned int tmp; /* 4 bytes !! */ + unsigned long addr = caddr; + unsigned long data = cdata; ptrace_area_emu31 parea; int copied, ret; switch (request) { - case PTRACE_PEEKTEXT: - case PTRACE_PEEKDATA: - /* read word at location addr. */ - copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0); - if (copied != sizeof(tmp)) - return -EIO; - return put_user(tmp, (unsigned int __force __user *) data); - case PTRACE_PEEKUSR: /* read the word at location addr in the USER area. */ return peek_user_emu31(child, addr, data); - case PTRACE_POKETEXT: - case PTRACE_POKEDATA: - /* write the word at location addr. */ - tmp = data; - copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 1); - if (copied != sizeof(tmp)) - return -EIO; - return 0; - case PTRACE_POKEUSR: /* write the word at location addr in the USER area */ return poke_user_emu31(child, addr, data); @@ -587,82 +570,11 @@ do_ptrace_emu31(struct task_struct *child, long request, long addr, long data) copied += sizeof(unsigned int); } return 0; - case PTRACE_GETEVENTMSG: - return put_user((__u32) child->ptrace_message, - (unsigned int __force __user *) data); - case PTRACE_GETSIGINFO: - if (child->last_siginfo == NULL) - return -EINVAL; - return copy_siginfo_to_user32((compat_siginfo_t - __force __user *) data, - child->last_siginfo); - case PTRACE_SETSIGINFO: - if (child->last_siginfo == NULL) - return -EINVAL; - return copy_siginfo_from_user32(child->last_siginfo, - (compat_siginfo_t - __force __user *) data); } - return ptrace_request(child, request, addr, data); + return compat_ptrace_request(child, request, addr, data); } #endif -long arch_ptrace(struct task_struct *child, long request, long addr, long data) -{ - switch (request) { - case PTRACE_SYSCALL: - /* continue and stop at next (return from) syscall */ - case PTRACE_CONT: - /* restart after signal. */ - if (!valid_signal(data)) - return -EIO; - if (request == PTRACE_SYSCALL) - set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); - else - clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); - child->exit_code = data; - /* make sure the single step bit is not set. */ - user_disable_single_step(child); - wake_up_process(child); - return 0; - - case PTRACE_KILL: - /* - * make the child exit. Best I can do is send it a sigkill. - * perhaps it should be put in the status that it wants to - * exit. - */ - if (child->exit_state == EXIT_ZOMBIE) /* already dead */ - return 0; - child->exit_code = SIGKILL; - /* make sure the single step bit is not set. */ - user_disable_single_step(child); - wake_up_process(child); - return 0; - - case PTRACE_SINGLESTEP: - /* set the trap flag. */ - if (!valid_signal(data)) - return -EIO; - clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); - child->exit_code = data; - user_enable_single_step(child); - /* give it a chance to run. */ - wake_up_process(child); - return 0; - - /* Do requests that differ for 31/64 bit */ - default: -#ifdef CONFIG_COMPAT - if (test_thread_flag(TIF_31BIT)) - return do_ptrace_emu31(child, request, addr, data); -#endif - return do_ptrace_normal(child, request, addr, data); - } - /* Not reached. */ - return -EIO; -} - asmlinkage void syscall_trace(struct pt_regs *regs, int entryexit) { -- cgit v1.2.3 From 45e576b1c3d0020607b8666c0247164e92c7d719 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 7 May 2008 09:22:59 +0200 Subject: [S390] guest page hinting light Use the existing arch_alloc_page/arch_free_page callbacks to do the guest page state transitions between stable and unused. Acked-by: Rik van Riel Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 7 ++++ arch/s390/mm/Makefile | 1 + arch/s390/mm/init.c | 3 ++ arch/s390/mm/page-states.c | 79 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 90 insertions(+) create mode 100644 arch/s390/mm/page-states.c (limited to 'arch') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 29a7940f284f..1d035082e78e 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -430,6 +430,13 @@ config CMM_IUCV Select this option to enable the special message interface to the cooperative memory management. +config PAGE_STATES + bool "Unused page notification" + help + This enables the notification of unused pages to the + hypervisor. The ESSA instruction is used to do the states + changes between a page that has content and the unused state. + config VIRT_TIMER bool "Virtual CPU timer support" help diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile index fb988a48a754..2a7458134544 100644 --- a/arch/s390/mm/Makefile +++ b/arch/s390/mm/Makefile @@ -5,3 +5,4 @@ obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o obj-$(CONFIG_CMM) += cmm.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o +obj-$(CONFIG_PAGE_STATES) += page-states.o diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index fa31de6ae97a..29f3a63806b9 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -126,6 +126,9 @@ void __init mem_init(void) /* clear the zero-page */ memset(empty_zero_page, 0, PAGE_SIZE); + /* Setup guest page hinting */ + cmma_init(); + /* this will put all low memory onto the freelists */ totalram_pages += free_all_bootmem(); diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c new file mode 100644 index 000000000000..fc0ad73ffd90 --- /dev/null +++ b/arch/s390/mm/page-states.c @@ -0,0 +1,79 @@ +/* + * arch/s390/mm/page-states.c + * + * Copyright IBM Corp. 2008 + * + * Guest page hinting for unused pages. + * + * Author(s): Martin Schwidefsky + */ + +#include +#include +#include +#include +#include + +#define ESSA_SET_STABLE 1 +#define ESSA_SET_UNUSED 2 + +static int cmma_flag; + +static int __init cmma(char *str) +{ + char *parm; + parm = strstrip(str); + if (strcmp(parm, "yes") == 0 || strcmp(parm, "on") == 0) { + cmma_flag = 1; + return 1; + } + cmma_flag = 0; + if (strcmp(parm, "no") == 0 || strcmp(parm, "off") == 0) + return 1; + return 0; +} + +__setup("cmma=", cmma); + +void __init cmma_init(void) +{ + register unsigned long tmp asm("0") = 0; + register int rc asm("1") = -EOPNOTSUPP; + + if (!cmma_flag) + return; + asm volatile( + " .insn rrf,0xb9ab0000,%1,%1,0,0\n" + "0: la %0,0\n" + "1:\n" + EX_TABLE(0b,1b) + : "+&d" (rc), "+&d" (tmp)); + if (rc) + cmma_flag = 0; +} + +void arch_free_page(struct page *page, int order) +{ + int i, rc; + + if (!cmma_flag) + return; + for (i = 0; i < (1 << order); i++) + asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0" + : "=&d" (rc) + : "a" ((page_to_pfn(page) + i) << PAGE_SHIFT), + "i" (ESSA_SET_UNUSED)); +} + +void arch_alloc_page(struct page *page, int order) +{ + int i, rc; + + if (!cmma_flag) + return; + for (i = 0; i < (1 << order); i++) + asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0" + : "=&d" (rc) + : "a" ((page_to_pfn(page) + i) << PAGE_SHIFT), + "i" (ESSA_SET_STABLE)); +} -- cgit v1.2.3