From dfe52244e004f5103478966cd88351feb5c50d79 Mon Sep 17 00:00:00 2001 From: Robert Love Date: Thu, 23 Jun 2005 00:09:04 -0700 Subject: [PATCH] kstrdup: convert a few existing implementations Convert a bunch of strdup() implementations and their callers to the new kstrdup(). A few remain, for example see sound/core, and there are tons of open coded strdup()'s around. Sigh. But this is a start. Signed-off-by: Robert Love Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/kernel/process_kern.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'arch/um/kernel') diff --git a/arch/um/kernel/process_kern.c b/arch/um/kernel/process_kern.c index 804c6bbdf67c..157584ae4792 100644 --- a/arch/um/kernel/process_kern.c +++ b/arch/um/kernel/process_kern.c @@ -8,6 +8,7 @@ #include "linux/kernel.h" #include "linux/sched.h" #include "linux/interrupt.h" +#include "linux/string.h" #include "linux/mm.h" #include "linux/slab.h" #include "linux/utsname.h" @@ -322,12 +323,7 @@ void do_uml_exitcalls(void) char *uml_strdup(char *string) { - char *new; - - new = kmalloc(strlen(string) + 1, GFP_KERNEL); - if(new == NULL) return(NULL); - strcpy(new, string); - return(new); + return kstrdup(string, GFP_KERNEL); } int copy_to_user_proc(void __user *to, void *from, int size) -- cgit v1.2.3 From e0877f07e85a46e4fde32bd84f08551d360839fe Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Sat, 25 Jun 2005 14:55:21 -0700 Subject: [PATCH] uml: fork cleanup Fix the do_fork calling convention: normal arch pass the regs and the new sp value to do_fork instead of NULL. Currently the arch-independent code ignores these values, while the UML code (actually it's copy_thread) gets the right values by itself. With this patch, things are fixed up. Signed-off-by: Paolo 'Blaisorblade' Giarrusso Signed-off-by: Jeff Dike Cc: Paolo Giarrusso Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/kernel/process_kern.c | 4 ++-- arch/um/kernel/skas/process_kern.c | 14 +------------- arch/um/kernel/syscall_kern.c | 19 +++++-------------- arch/um/kernel/tt/process_kern.c | 17 +++-------------- 4 files changed, 11 insertions(+), 43 deletions(-) (limited to 'arch/um/kernel') diff --git a/arch/um/kernel/process_kern.c b/arch/um/kernel/process_kern.c index 157584ae4792..0c57bc6e3747 100644 --- a/arch/um/kernel/process_kern.c +++ b/arch/um/kernel/process_kern.c @@ -96,8 +96,8 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) current->thread.request.u.thread.proc = fn; current->thread.request.u.thread.arg = arg; - pid = do_fork(CLONE_VM | CLONE_UNTRACED | flags, 0, NULL, 0, NULL, - NULL); + pid = do_fork(CLONE_VM | CLONE_UNTRACED | flags, 0, + ¤t->thread.regs, 0, NULL, NULL); if(pid < 0) panic("do_fork failed in kernel_thread, errno = %d", pid); return(pid); diff --git a/arch/um/kernel/skas/process_kern.c b/arch/um/kernel/skas/process_kern.c index fc71ef295782..8f0607f54b7a 100644 --- a/arch/um/kernel/skas/process_kern.c +++ b/arch/um/kernel/skas/process_kern.c @@ -111,8 +111,7 @@ int copy_thread_skas(int nr, unsigned long clone_flags, unsigned long sp, void (*handler)(int); if(current->thread.forking){ - memcpy(&p->thread.regs.regs.skas, - ¤t->thread.regs.regs.skas, + memcpy(&p->thread.regs.regs.skas, ®s->regs.skas, sizeof(p->thread.regs.regs.skas)); REGS_SET_SYSCALL_RETURN(p->thread.regs.regs.skas.regs, 0); if(sp != 0) REGS_SP(p->thread.regs.regs.skas.regs) = sp; @@ -201,14 +200,3 @@ int thread_pid_skas(struct task_struct *task) #warning Need to look up userspace_pid by cpu return(userspace_pid[0]); } - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/kernel/syscall_kern.c b/arch/um/kernel/syscall_kern.c index b7a55251e897..8e1a3501ff46 100644 --- a/arch/um/kernel/syscall_kern.c +++ b/arch/um/kernel/syscall_kern.c @@ -31,7 +31,8 @@ long sys_fork(void) long ret; current->thread.forking = 1; - ret = do_fork(SIGCHLD, 0, NULL, 0, NULL, NULL); + ret = do_fork(SIGCHLD, UPT_SP(¤t->thread.regs.regs), + ¤t->thread.regs, 0, NULL, NULL); current->thread.forking = 0; return(ret); } @@ -41,8 +42,9 @@ long sys_vfork(void) long ret; current->thread.forking = 1; - ret = do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0, NULL, 0, NULL, - NULL); + ret = do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, + UPT_SP(¤t->thread.regs.regs), + ¤t->thread.regs, 0, NULL, NULL); current->thread.forking = 0; return(ret); } @@ -162,14 +164,3 @@ int next_syscall_index(int limit) spin_unlock(&syscall_lock); return(ret); } - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/kernel/tt/process_kern.c b/arch/um/kernel/tt/process_kern.c index 776310fd5b8b..a189a2b92935 100644 --- a/arch/um/kernel/tt/process_kern.c +++ b/arch/um/kernel/tt/process_kern.c @@ -266,10 +266,10 @@ int copy_thread_tt(int nr, unsigned long clone_flags, unsigned long sp, } if(current->thread.forking){ - sc_to_sc(UPT_SC(&p->thread.regs.regs), - UPT_SC(¤t->thread.regs.regs)); + sc_to_sc(UPT_SC(&p->thread.regs.regs), UPT_SC(®s->regs)); SC_SET_SYSCALL_RETURN(UPT_SC(&p->thread.regs.regs), 0); - if(sp != 0) SC_SP(UPT_SC(&p->thread.regs.regs)) = sp; + if(sp != 0) + SC_SP(UPT_SC(&p->thread.regs.regs)) = sp; } p->thread.mode.tt.extern_pid = new_pid; @@ -459,14 +459,3 @@ int is_valid_pid(int pid) read_unlock(&tasklist_lock); return(0); } - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ -- cgit v1.2.3 From a6f4e3cf75538a75ee4fab620e3c8466f2154458 Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Sat, 25 Jun 2005 14:55:22 -0700 Subject: [PATCH] uml: fix timer initialization In skas mode, the call to uml_idle_timer permanently shut off the virtual timer, resulting in no timer ticks to anything but the idle thread. This is likely the cause of the soft lockups that are seen sporadically in recent UMLs. Signed-off-by: Jeff Dike Cc: Paolo Giarrusso Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/kernel/process_kern.c | 2 +- arch/um/kernel/skas/process_kern.c | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/um/kernel') diff --git a/arch/um/kernel/process_kern.c b/arch/um/kernel/process_kern.c index 0c57bc6e3747..d4036ed680bc 100644 --- a/arch/um/kernel/process_kern.c +++ b/arch/um/kernel/process_kern.c @@ -169,7 +169,7 @@ int current_pid(void) void default_idle(void) { - uml_idle_timer(); + CHOOSE_MODE(uml_idle_timer(), (void) 0); atomic_inc(&init_mm.mm_count); current->mm = &init_mm; diff --git a/arch/um/kernel/skas/process_kern.c b/arch/um/kernel/skas/process_kern.c index 8f0607f54b7a..0a7b8aa55db8 100644 --- a/arch/um/kernel/skas/process_kern.c +++ b/arch/um/kernel/skas/process_kern.c @@ -180,7 +180,6 @@ int start_uml_skas(void) start_userspace(0); init_new_thread_signals(1); - uml_idle_timer(); init_task.thread.request.u.thread.proc = start_kernel_proc; init_task.thread.request.u.thread.arg = NULL; -- cgit v1.2.3 From 026549d28469f7d4ca7e5a4707f0d2dc4f2c164c Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Sat, 25 Jun 2005 14:55:23 -0700 Subject: [PATCH] uml: always disable kmalloc during shutdown kmalloc wasn't being disabled during panic. This patch ensures that, no matter how UML is exiting, it is disabled. This matters because part of the cleanup is to remove the umid file, which involves readdir, which calls malloc. This must map to libc malloc, rather than kmalloc or vmalloc. Signed-off-by: Jeff Dike Cc: Paolo Giarrusso Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/kernel/main.c | 1 - arch/um/kernel/reboot.c | 9 ++++----- 2 files changed, 4 insertions(+), 6 deletions(-) (limited to 'arch/um/kernel') diff --git a/arch/um/kernel/main.c b/arch/um/kernel/main.c index e59f58152678..1e1a87f1c510 100644 --- a/arch/um/kernel/main.c +++ b/arch/um/kernel/main.c @@ -69,7 +69,6 @@ static __init void do_uml_initcalls(void) static void last_ditch_exit(int sig) { - kmalloc_ok = 0; signal(SIGINT, SIG_DFL); signal(SIGTERM, SIG_DFL); signal(SIGHUP, SIG_DFL); diff --git a/arch/um/kernel/reboot.c b/arch/um/kernel/reboot.c index 207f89d74908..fcec51da1d37 100644 --- a/arch/um/kernel/reboot.c +++ b/arch/um/kernel/reboot.c @@ -38,14 +38,14 @@ static void kill_off_processes(void) void uml_cleanup(void) { - kill_off_processes(); + kmalloc_ok = 0; do_uml_exitcalls(); + kill_off_processes(); } void machine_restart(char * __unused) { - do_uml_exitcalls(); - kill_off_processes(); + uml_cleanup(); CHOOSE_MODE(reboot_tt(), reboot_skas()); } @@ -53,8 +53,7 @@ EXPORT_SYMBOL(machine_restart); void machine_power_off(void) { - do_uml_exitcalls(); - kill_off_processes(); + uml_cleanup(); CHOOSE_MODE(halt_tt(), halt_skas()); } -- cgit v1.2.3 From fc47a0d18a1994b4a18d2235fcde1b75dfa72552 Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Sat, 25 Jun 2005 14:55:24 -0700 Subject: [PATCH] uml: time initialization tidying user_time_init_skas and user_time_init_tt were essentially the same. So, this merges them, deleting the mode-specific functions and declarations. Signed-off-by: Jeff Dike Cc: Paolo Giarrusso Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/kernel/skas/Makefile | 4 ++-- arch/um/kernel/skas/include/mode-skas.h | 1 - arch/um/kernel/skas/time.c | 30 ------------------------------ arch/um/kernel/time.c | 31 ++++++++++++++----------------- arch/um/kernel/time_kern.c | 2 +- arch/um/kernel/tt/Makefile | 4 ++-- arch/um/kernel/tt/include/mode-tt.h | 1 - arch/um/kernel/tt/time.c | 28 ---------------------------- 8 files changed, 19 insertions(+), 82 deletions(-) delete mode 100644 arch/um/kernel/skas/time.c delete mode 100644 arch/um/kernel/tt/time.c (limited to 'arch/um/kernel') diff --git a/arch/um/kernel/skas/Makefile b/arch/um/kernel/skas/Makefile index d37d1bfcd6f7..ff69c4b312c0 100644 --- a/arch/um/kernel/skas/Makefile +++ b/arch/um/kernel/skas/Makefile @@ -4,10 +4,10 @@ # obj-y := exec_kern.o mem.o mem_user.o mmu.o process.o process_kern.o \ - syscall_kern.o syscall_user.o time.o tlb.o trap_user.o uaccess.o \ + syscall_kern.o syscall_user.o tlb.o trap_user.o uaccess.o \ subdir- := util -USER_OBJS := process.o time.o +USER_OBJS := process.o include arch/um/scripts/Makefile.rules diff --git a/arch/um/kernel/skas/include/mode-skas.h b/arch/um/kernel/skas/include/mode-skas.h index c1e33bd788db..bcd26a6a3888 100644 --- a/arch/um/kernel/skas/include/mode-skas.h +++ b/arch/um/kernel/skas/include/mode-skas.h @@ -13,7 +13,6 @@ extern unsigned long exec_fp_regs[]; extern unsigned long exec_fpx_regs[]; extern int have_fpx_regs; -extern void user_time_init_skas(void); extern void sig_handler_common_skas(int sig, void *sc_ptr); extern void halt_skas(void); extern void reboot_skas(void); diff --git a/arch/um/kernel/skas/time.c b/arch/um/kernel/skas/time.c deleted file mode 100644 index 98091494b897..000000000000 --- a/arch/um/kernel/skas/time.c +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) - * Licensed under the GPL - */ - -#include -#include -#include "time_user.h" -#include "process.h" -#include "user.h" - -void user_time_init_skas(void) -{ - if(signal(SIGALRM, (__sighandler_t) alarm_handler) == SIG_ERR) - panic("Couldn't set SIGALRM handler"); - if(signal(SIGVTALRM, (__sighandler_t) alarm_handler) == SIG_ERR) - panic("Couldn't set SIGVTALRM handler"); - set_interval(ITIMER_VIRTUAL); -} - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c index c40c86a3f918..f829b309b63c 100644 --- a/arch/um/kernel/time.c +++ b/arch/um/kernel/time.c @@ -33,7 +33,7 @@ void timer(void) timeradd(&xtime, &local_offset, &xtime); } -void set_interval(int timer_type) +static void set_interval(int timer_type) { int usec = 1000000/hz(); struct itimerval interval = ((struct itimerval) { { 0, usec }, @@ -45,12 +45,7 @@ void set_interval(int timer_type) void enable_timer(void) { - int usec = 1000000/hz(); - struct itimerval enable = ((struct itimerval) { { 0, usec }, - { 0, usec }}); - if(setitimer(ITIMER_VIRTUAL, &enable, NULL)) - printk("enable_timer - setitimer failed, errno = %d\n", - errno); + set_interval(ITIMER_VIRTUAL); } void disable_timer(void) @@ -155,13 +150,15 @@ void idle_sleep(int secs) nanosleep(&ts, NULL); } -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ +/* XXX This partly duplicates init_irq_signals */ + +void user_time_init(void) +{ + set_handler(SIGVTALRM, (__sighandler_t) alarm_handler, + SA_ONSTACK | SA_RESTART, SIGUSR1, SIGIO, SIGWINCH, + SIGALRM, SIGUSR2, -1); + set_handler(SIGALRM, (__sighandler_t) alarm_handler, + SA_ONSTACK | SA_RESTART, SIGUSR1, SIGIO, SIGWINCH, + SIGVTALRM, SIGUSR2, -1); + set_interval(ITIMER_VIRTUAL); +} diff --git a/arch/um/kernel/time_kern.c b/arch/um/kernel/time_kern.c index 6516fc52afe0..a8b4ef601f59 100644 --- a/arch/um/kernel/time_kern.c +++ b/arch/um/kernel/time_kern.c @@ -162,7 +162,7 @@ int __init timer_init(void) { int err; - CHOOSE_MODE(user_time_init_tt(), user_time_init_skas()); + user_time_init(); err = request_irq(TIMER_IRQ, um_timer, SA_INTERRUPT, "timer", NULL); if(err != 0) printk(KERN_ERR "timer_init : request_irq failed - " diff --git a/arch/um/kernel/tt/Makefile b/arch/um/kernel/tt/Makefile index 3fd2554e60b6..6939e5af8472 100644 --- a/arch/um/kernel/tt/Makefile +++ b/arch/um/kernel/tt/Makefile @@ -4,11 +4,11 @@ # obj-y = exec_kern.o exec_user.o gdb.o ksyms.o mem.o mem_user.o process_kern.o \ - syscall_kern.o syscall_user.o time.o tlb.o tracer.o trap_user.o \ + syscall_kern.o syscall_user.o tlb.o tracer.o trap_user.o \ uaccess.o uaccess_user.o obj-$(CONFIG_PT_PROXY) += gdb_kern.o ptproxy/ -USER_OBJS := gdb.o time.o tracer.o +USER_OBJS := gdb.o tracer.o include arch/um/scripts/Makefile.rules diff --git a/arch/um/kernel/tt/include/mode-tt.h b/arch/um/kernel/tt/include/mode-tt.h index efe462019069..e171e15fead5 100644 --- a/arch/um/kernel/tt/include/mode-tt.h +++ b/arch/um/kernel/tt/include/mode-tt.h @@ -13,7 +13,6 @@ enum { OP_NONE, OP_EXEC, OP_FORK, OP_TRACE_ON, OP_REBOOT, OP_HALT, OP_CB }; extern int tracing_pid; extern int tracer(int (*init_proc)(void *), void *sp); -extern void user_time_init_tt(void); extern void sig_handler_common_tt(int sig, void *sc); extern void syscall_handler_tt(int sig, union uml_pt_regs *regs); extern void reboot_tt(void); diff --git a/arch/um/kernel/tt/time.c b/arch/um/kernel/tt/time.c deleted file mode 100644 index 8565b71b07cd..000000000000 --- a/arch/um/kernel/tt/time.c +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) - * Licensed under the GPL - */ - -#include -#include -#include -#include "process.h" -#include "user.h" - -void user_time_init_tt(void) -{ - if(signal(SIGVTALRM, (__sighandler_t) alarm_handler) == SIG_ERR) - panic("Couldn't set SIGVTALRM handler"); - set_interval(ITIMER_VIRTUAL); -} - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ -- cgit v1.2.3 From 29d56cfe3ca599ddc3ae9156e7e469c044d97b96 Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Sat, 25 Jun 2005 14:55:25 -0700 Subject: [PATCH] uml: hot-unplug code cleanup Clean up the hot-unplugging code. There is now an id procedure which is called to figure out what device we're talking to. The error messages from that are now done from mconsole_remove instead of the driver. remove is now called with the device number, after it has been checked, so doesn't need to do sanity checking on it. Signed-off-by: Jeff Dike Cc: Paolo Giarrusso Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/kernel/tt/gdb.c | 4 ++-- arch/um/kernel/tt/gdb_kern.c | 2 +- arch/um/kernel/tt/include/debug.h | 17 +++-------------- 3 files changed, 6 insertions(+), 17 deletions(-) (limited to 'arch/um/kernel') diff --git a/arch/um/kernel/tt/gdb.c b/arch/um/kernel/tt/gdb.c index 19a0ad7b35b3..37e22d71a0d9 100644 --- a/arch/um/kernel/tt/gdb.c +++ b/arch/um/kernel/tt/gdb.c @@ -153,10 +153,10 @@ void remove_gdb_cb(void *unused) exit_debugger_cb(NULL); } -int gdb_remove(char *unused) +int gdb_remove(int unused) { initial_thread_cb(remove_gdb_cb, NULL); - return(0); + return 0; } void signal_usr1(int sig) diff --git a/arch/um/kernel/tt/gdb_kern.c b/arch/um/kernel/tt/gdb_kern.c index 93fb121f86af..26506388a6aa 100644 --- a/arch/um/kernel/tt/gdb_kern.c +++ b/arch/um/kernel/tt/gdb_kern.c @@ -10,7 +10,7 @@ #ifdef CONFIG_MCONSOLE extern int gdb_config(char *str); -extern int gdb_remove(char *unused); +extern int gdb_remove(int n); static struct mc_device gdb_mc = { .name = "gdb", diff --git a/arch/um/kernel/tt/include/debug.h b/arch/um/kernel/tt/include/debug.h index 8eff674107ca..738435461e13 100644 --- a/arch/um/kernel/tt/include/debug.h +++ b/arch/um/kernel/tt/include/debug.h @@ -4,8 +4,8 @@ * Licensed under the GPL */ -#ifndef __DEBUG_H -#define __DEBUG_H +#ifndef __UML_TT_DEBUG_H +#define __UML_TT_DEBUG_H extern int debugger_proxy(int status, pid_t pid); extern void child_proxy(pid_t pid, int status); @@ -13,17 +13,6 @@ extern void init_proxy (pid_t pid, int waiting, int status); extern int start_debugger(char *prog, int startup, int stop, int *debugger_fd); extern void fake_child_exit(void); extern int gdb_config(char *str); -extern int gdb_remove(char *unused); +extern int gdb_remove(int unused); #endif - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ -- cgit v1.2.3 From eda80228860641b7b0e963e6bd219b960c500af9 Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Thu, 7 Jul 2005 17:56:00 -0700 Subject: [PATCH] uml: kill some useless vmalloc tlb flushing There is absolutely no reason to flush the kernel's VM area during a tlb_flush_mm. This results in a noticable performance increase in the kernel build benchmark. Signed-off-by: Jeff Dike Cc: Paolo Giarrusso Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/kernel/skas/tlb.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/um/kernel') diff --git a/arch/um/kernel/skas/tlb.c b/arch/um/kernel/skas/tlb.c index b8c5e71763d1..18f9a7711de1 100644 --- a/arch/um/kernel/skas/tlb.c +++ b/arch/um/kernel/skas/tlb.c @@ -76,7 +76,6 @@ void flush_tlb_mm_skas(struct mm_struct *mm) return; fix_range(mm, 0, host_task_size, 0); - flush_tlb_kernel_range_common(start_vm, end_vm); } void force_flush_all_skas(void) -- cgit v1.2.3 From d67b569f5f620c0fb95d5212642746b7ba9d29e4 Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Thu, 7 Jul 2005 17:56:49 -0700 Subject: [PATCH] uml: skas0 - separate kernel address space on stock hosts UML has had two modes of operation - an insecure, slow mode (tt mode) in which the kernel is mapped into every process address space which requires no host kernel modifications, and a secure, faster mode (skas mode) in which the UML kernel is in a separate host address space, which requires a patch to the host kernel. This patch implements something very close to skas mode for hosts which don't support skas - I'm calling this skas0. It provides the security of the skas host patch, and some of the performance gains. The two main things that are provided by the skas patch, /proc/mm and PTRACE_FAULTINFO, are implemented in a way that require no host patch. For the remote address space changing stuff (mmap, munmap, and mprotect), we set aside two pages in the process above its stack, one of which contains a little bit of code which can call mmap et al. To update the address space, the system call information (system call number and arguments) are written to the stub page above the code. The %esp is set to the beginning of the data, the %eip is set the the start of the stub, and it repeatedly pops the information into its registers and makes the system call until it sees a system call number of zero. This is to amortize the cost of the context switch across multiple address space updates. When the updates are done, it SIGSTOPs itself, and the kernel process continues what it was doing. For a PTRACE_FAULTINFO replacement, we set up a SIGSEGV handler in the child, and let it handle segfaults rather than nullifying them. The handler is in the same page as the mmap stub. The second page is used as the stack. The handler reads cr2 and err from the sigcontext, sticks them at the base of the stack in a faultinfo struct, and SIGSTOPs itself. The kernel then reads the faultinfo and handles the fault. A complication on x86_64 is that this involves resetting the registers to the segfault values when the process is inside the kill system call. This breaks on x86_64 because %rcx will contain %rip because you tell SYSRET where to return to by putting the value in %rcx. So, this corrupts $rcx on return from the segfault. To work around this, I added an arch_finish_segv, which on x86 does nothing, but which on x86_64 ptraces the child back through the sigreturn. This causes %rcx to be restored by sigreturn and avoids the corruption. Ultimately, I think I will replace this with the trick of having it send itself a blocked signal which will be unblocked by the sigreturn. This will allow it to be stopped just after the sigreturn, and PTRACE_SYSCALLed without all the back-and-forth of PTRACE_SYSCALLing it through sigreturn. This runs on a stock host, so theoretically (and hopefully), tt mode isn't needed any more. We need to make sure that this is better in every way than tt mode, though. I'm concerned about the speed of address space updates and page fault handling, since they involve extra round-trips to the child. We can amortize the round-trip cost for large address space updates by writing all of the operations to the data page and having the child execute them all at the same time. This will help fork and exec, but not page faults, since they involve only one page. I can't think of any way to help page faults, except to add something like PTRACE_FAULTINFO to the host. There is PTRACE_SIGINFO, but UML doesn't use siginfo for SIGSEGV (or anything else) because there isn't enough information in the siginfo struct to handle page faults (the faulting operation type is missing). Adding that would make PTRACE_SIGINFO a usable equivalent to PTRACE_FAULTINFO. As for the code itself: - The system call stub is in arch/um/kernel/sys-$(SUBARCH)/stub.S. It is put in its own section of the binary along with stub_segv_handler in arch/um/kernel/skas/process.c. This is manipulated with run_syscall_stub in arch/um/kernel/skas/mem_user.c. syscall_stub will execute any system call at all, but it's only used for mmap, munmap, and mprotect. - The x86_64 stub calls sigreturn by hand rather than allowing the normal sigreturn to happen, because the normal sigreturn is a SA_RESTORER in UML's address space provided by libc. Needless to say, this is not available in the child's address space. Also, it does a couple of odd pops before that which restore the stack to the state it was in at the time the signal handler was called. - There is a new field in the arch mmu_context, which is now a union. This is the pid to be manipulated rather than the /proc/mm file descriptor. Code which deals with this now checks proc_mm to see whether it should use the usual skas code or the new code. - userspace_tramp is now used to create a new host process for every UML process, rather than one per UML processor. It checks proc_mm and ptrace_faultinfo to decide whether to map in the pages above its stack. - start_userspace now makes CLONE_VM conditional on proc_mm since we need separate address spaces now. - switch_mm_skas now just sets userspace_pid[0] to the new pid rather than PTRACE_SWITCH_MM. There is an addition to userspace which updates its idea of the pid being manipulated each time around the loop. This is important on exec, when the pid will change underneath userspace(). - The stub page has a pte, but it can't be mapped in using tlb_flush because it is part of tlb_flush. This is why it's required for it to be mapped in by userspace_tramp. Other random things: - The stub section in uml.lds.S is page aligned. This page is written out to the backing vm file in setup_physmem because it is mapped from there into user processes. - There's some confusion with TASK_SIZE now that there are a couple of extra pages that the process can't use. TASK_SIZE is considered by the elf code to be the usable process memory, which is reasonable, so it is decreased by two pages. This confuses the definition of USER_PGDS_IN_LAST_PML4, making it too small because of the rounding down of the uneven division. So we round it to the nearest PGDIR_SIZE rather than the lower one. - I added a missing PT_SYSCALL_ARG6_OFFSET macro. - um_mmu.h was made into a userspace-usable file. - proc_mm and ptrace_faultinfo are globals which say whether the host supports these features. - There is a bad interaction between the mm.nr_ptes check at the end of exit_mmap, stack randomization, and skas0. exit_mmap will stop freeing pages at the PGDIR_SIZE boundary after the last vma. If the stack isn't on the last page table page, the last pte page won't be freed, as it should be since the stub ptes are there, and exit_mmap will BUG because there is an unfreed page. To get around this, TASK_SIZE is set to the next lowest PGDIR_SIZE boundary and mm->nr_ptes is decremented after the calls to init_stub_pte. This ensures that we know the process stack (and all other process mappings) will be below the top page table page, and thus we know that mm->nr_ptes will be one too many, and can be decremented. Things that need fixing: - We may need better assurrences that the stub code is PIC. - The stub pte is set up in init_new_context_skas. - alloc_pgdir is probably the right place. Signed-off-by: Jeff Dike Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/kernel/dyn.lds.S | 6 + arch/um/kernel/physmem.c | 8 ++ arch/um/kernel/process.c | 28 ++-- arch/um/kernel/skas/exec_kern.c | 2 +- arch/um/kernel/skas/include/mm_id.h | 17 +++ arch/um/kernel/skas/include/mmu-skas.h | 7 +- arch/um/kernel/skas/include/skas.h | 15 ++- arch/um/kernel/skas/mem.c | 6 +- arch/um/kernel/skas/mem_user.c | 225 ++++++++++++++++++++++----------- arch/um/kernel/skas/mmu.c | 136 ++++++++++++++++---- arch/um/kernel/skas/process.c | 153 +++++++++++++++++----- arch/um/kernel/skas/process_kern.c | 33 ++++- arch/um/kernel/skas/tlb.c | 28 ++-- arch/um/kernel/tlb.c | 132 +++++++++---------- arch/um/kernel/tt/tlb.c | 4 +- arch/um/kernel/uml.lds.S | 7 + 16 files changed, 577 insertions(+), 230 deletions(-) create mode 100644 arch/um/kernel/skas/include/mm_id.h (limited to 'arch/um/kernel') diff --git a/arch/um/kernel/dyn.lds.S b/arch/um/kernel/dyn.lds.S index 715b0838a68c..3942a5f245de 100644 --- a/arch/um/kernel/dyn.lds.S +++ b/arch/um/kernel/dyn.lds.S @@ -67,6 +67,12 @@ SECTIONS *(.stub .text.* .gnu.linkonce.t.*) /* .gnu.warning sections are handled specially by elf32.em. */ *(.gnu.warning) + + . = ALIGN(4096); + __syscall_stub_start = .; + *(.__syscall_stub*) + __syscall_stub_end = .; + . = ALIGN(4096); } =0x90909090 .fini : { KEEP (*(.fini)) diff --git a/arch/um/kernel/physmem.c b/arch/um/kernel/physmem.c index 420e6d51fa0f..a24e3b7f4bf0 100644 --- a/arch/um/kernel/physmem.c +++ b/arch/um/kernel/physmem.c @@ -353,6 +353,8 @@ void map_memory(unsigned long virt, unsigned long phys, unsigned long len, #define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT) +extern int __syscall_stub_start, __binary_start; + void setup_physmem(unsigned long start, unsigned long reserve_end, unsigned long len, unsigned long highmem) { @@ -371,6 +373,12 @@ void setup_physmem(unsigned long start, unsigned long reserve_end, exit(1); } + /* Special kludge - This page will be mapped in to userspace processes + * from physmem_fd, so it needs to be written out there. + */ + os_seek_file(physmem_fd, __pa(&__syscall_stub_start)); + os_write_file(physmem_fd, &__syscall_stub_start, PAGE_SIZE); + bootmap_size = init_bootmem(pfn, pfn + delta); free_bootmem(__pa(reserve_end) + bootmap_size, len - bootmap_size - reserve); diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 1b5ef3e96c71..c45a60e9c92d 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -32,6 +32,7 @@ #include "uml-config.h" #include "choose-mode.h" #include "mode.h" +#include "tempfile.h" #ifdef UML_CONFIG_MODE_SKAS #include "skas.h" #include "skas_ptrace.h" @@ -358,11 +359,16 @@ void forward_pending_sigio(int target) kill(target, SIGIO); } +int ptrace_faultinfo = 0; +int proc_mm = 1; + +extern void *__syscall_stub_start, __syscall_stub_end; + #ifdef UML_CONFIG_MODE_SKAS -static inline int check_skas3_ptrace_support(void) +static inline void check_skas3_ptrace_support(void) { struct ptrace_faultinfo fi; - int pid, n, ret = 1; + int pid, n; printf("Checking for the skas3 patch in the host..."); pid = start_ptraced_child(); @@ -374,33 +380,31 @@ static inline int check_skas3_ptrace_support(void) else { perror("not found"); } - ret = 0; - } else { + } + else { + ptrace_faultinfo = 1; printf("found\n"); } init_registers(pid); stop_ptraced_child(pid, 1, 1); - - return(ret); } int can_do_skas(void) { - int ret = 1; - printf("Checking for /proc/mm..."); if (os_access("/proc/mm", OS_ACC_W_OK) < 0) { + proc_mm = 0; printf("not found\n"); - ret = 0; goto out; - } else { + } + else { printf("found\n"); } - ret = check_skas3_ptrace_support(); out: - return ret; + check_skas3_ptrace_support(); + return 1; } #else int can_do_skas(void) diff --git a/arch/um/kernel/skas/exec_kern.c b/arch/um/kernel/skas/exec_kern.c index c6b4d5dba789..77ed7bbab219 100644 --- a/arch/um/kernel/skas/exec_kern.c +++ b/arch/um/kernel/skas/exec_kern.c @@ -18,7 +18,7 @@ void flush_thread_skas(void) { force_flush_all(); - switch_mm_skas(current->mm->context.skas.mm_fd); + switch_mm_skas(¤t->mm->context.skas.id); } void start_thread_skas(struct pt_regs *regs, unsigned long eip, diff --git a/arch/um/kernel/skas/include/mm_id.h b/arch/um/kernel/skas/include/mm_id.h new file mode 100644 index 000000000000..48dd0989ddaa --- /dev/null +++ b/arch/um/kernel/skas/include/mm_id.h @@ -0,0 +1,17 @@ +/* + * Copyright (C) 2005 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __MM_ID_H +#define __MM_ID_H + +struct mm_id { + union { + int mm_fd; + int pid; + } u; + unsigned long stack; +}; + +#endif diff --git a/arch/um/kernel/skas/include/mmu-skas.h b/arch/um/kernel/skas/include/mmu-skas.h index 4cd60d7213f3..278b72f1d9ad 100644 --- a/arch/um/kernel/skas/include/mmu-skas.h +++ b/arch/um/kernel/skas/include/mmu-skas.h @@ -6,10 +6,15 @@ #ifndef __SKAS_MMU_H #define __SKAS_MMU_H +#include "mm_id.h" + struct mmu_context_skas { - int mm_fd; + struct mm_id id; + unsigned long last_page_table; }; +extern void switch_mm_skas(struct mm_id * mm_idp); + #endif /* diff --git a/arch/um/kernel/skas/include/skas.h b/arch/um/kernel/skas/include/skas.h index 96b51dba3471..d91a60f3830a 100644 --- a/arch/um/kernel/skas/include/skas.h +++ b/arch/um/kernel/skas/include/skas.h @@ -6,9 +6,11 @@ #ifndef __SKAS_H #define __SKAS_H +#include "mm_id.h" #include "sysdep/ptrace.h" extern int userspace_pid[]; +extern int proc_mm, ptrace_faultinfo; extern void switch_threads(void *me, void *next); extern void thread_wait(void *sw, void *fb); @@ -22,16 +24,17 @@ extern void new_thread_proc(void *stack, void (*handler)(int sig)); extern void remove_sigstack(void); extern void new_thread_handler(int sig); extern void handle_syscall(union uml_pt_regs *regs); -extern void map(int fd, unsigned long virt, unsigned long len, int r, int w, - int x, int phys_fd, unsigned long long offset); -extern int unmap(int fd, void *addr, unsigned long len); -extern int protect(int fd, unsigned long addr, unsigned long len, - int r, int w, int x); +extern int map(struct mm_id * mm_idp, unsigned long virt, unsigned long len, + int r, int w, int x, int phys_fd, unsigned long long offset); +extern int unmap(struct mm_id * mm_idp, void *addr, unsigned long len); +extern int protect(struct mm_id * mm_idp, unsigned long addr, + unsigned long len, int r, int w, int x); extern void user_signal(int sig, union uml_pt_regs *regs, int pid); extern int new_mm(int from); -extern void start_userspace(int cpu); +extern int start_userspace(unsigned long stub_stack); extern void get_skas_faultinfo(int pid, struct faultinfo * fi); extern long execute_syscall_skas(void *r); +extern unsigned long current_stub_stack(void); #endif diff --git a/arch/um/kernel/skas/mem.c b/arch/um/kernel/skas/mem.c index 438db2f43456..147466d7ff4f 100644 --- a/arch/um/kernel/skas/mem.c +++ b/arch/um/kernel/skas/mem.c @@ -5,7 +5,9 @@ #include "linux/config.h" #include "linux/mm.h" +#include "asm/pgtable.h" #include "mem_user.h" +#include "skas.h" unsigned long set_task_sizes_skas(int arg, unsigned long *host_size_out, unsigned long *task_size_out) @@ -18,7 +20,9 @@ unsigned long set_task_sizes_skas(int arg, unsigned long *host_size_out, *task_size_out = CONFIG_HOST_TASK_SIZE; #else *host_size_out = top; - *task_size_out = top; + if (proc_mm && ptrace_faultinfo) + *task_size_out = top; + else *task_size_out = CONFIG_STUB_START & PGDIR_MASK; #endif return(((unsigned long) set_task_sizes_skas) & ~0xffffff); } diff --git a/arch/um/kernel/skas/mem_user.c b/arch/um/kernel/skas/mem_user.c index 1310bf1e88d1..b0980ff3bd95 100644 --- a/arch/um/kernel/skas/mem_user.c +++ b/arch/um/kernel/skas/mem_user.c @@ -3,100 +3,171 @@ * Licensed under the GPL */ +#include #include #include +#include +#include +#include #include "mem_user.h" #include "mem.h" +#include "mm_id.h" #include "user.h" #include "os.h" #include "proc_mm.h" - -void map(int fd, unsigned long virt, unsigned long len, int r, int w, - int x, int phys_fd, unsigned long long offset) +#include "ptrace_user.h" +#include "user_util.h" +#include "kern_util.h" +#include "task.h" +#include "registers.h" +#include "uml-config.h" +#include "sysdep/ptrace.h" +#include "sysdep/stub.h" +#include "skas.h" + +extern unsigned long syscall_stub, __syscall_stub_start; + +extern void wait_stub_done(int pid, int sig, char * fname); + +static long run_syscall_stub(struct mm_id * mm_idp, int syscall, + unsigned long *args) { - struct proc_mm_op map; - int prot, n; - - prot = (r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) | - (x ? PROT_EXEC : 0); - - map = ((struct proc_mm_op) { .op = MM_MMAP, - .u = - { .mmap = - { .addr = virt, - .len = len, - .prot = prot, - .flags = MAP_SHARED | - MAP_FIXED, - .fd = phys_fd, - .offset = offset - } } } ); - n = os_write_file(fd, &map, sizeof(map)); - if(n != sizeof(map)) - printk("map : /proc/mm map failed, err = %d\n", -n); + int n, pid = mm_idp->u.pid; + unsigned long regs[MAX_REG_NR]; + + get_safe_registers(regs); + regs[REGS_IP_INDEX] = UML_CONFIG_STUB_CODE + + ((unsigned long) &syscall_stub - + (unsigned long) &__syscall_stub_start); + /* XXX Don't have a define for starting a syscall */ + regs[REGS_SYSCALL_NR] = syscall; + regs[REGS_SYSCALL_ARG1] = args[0]; + regs[REGS_SYSCALL_ARG2] = args[1]; + regs[REGS_SYSCALL_ARG3] = args[2]; + regs[REGS_SYSCALL_ARG4] = args[3]; + regs[REGS_SYSCALL_ARG5] = args[4]; + regs[REGS_SYSCALL_ARG6] = args[5]; + n = ptrace_setregs(pid, regs); + if(n < 0){ + printk("run_syscall_stub : PTRACE_SETREGS failed, " + "errno = %d\n", n); + return(n); + } + + wait_stub_done(pid, 0, "run_syscall_stub"); + + return(*((unsigned long *) mm_idp->stack)); } -int unmap(int fd, void *addr, unsigned long len) +int map(struct mm_id *mm_idp, unsigned long virt, unsigned long len, + int r, int w, int x, int phys_fd, unsigned long long offset) { - struct proc_mm_op unmap; - int n; - - unmap = ((struct proc_mm_op) { .op = MM_MUNMAP, - .u = - { .munmap = - { .addr = (unsigned long) addr, - .len = len } } } ); - n = os_write_file(fd, &unmap, sizeof(unmap)); - if(n != sizeof(unmap)) { - if(n < 0) - return(n); - else if(n > 0) - return(-EIO); - } - - return(0); + int prot, n; + + prot = (r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) | + (x ? PROT_EXEC : 0); + + if(proc_mm){ + struct proc_mm_op map; + int fd = mm_idp->u.mm_fd; + map = ((struct proc_mm_op) { .op = MM_MMAP, + .u = + { .mmap = + { .addr = virt, + .len = len, + .prot = prot, + .flags = MAP_SHARED | + MAP_FIXED, + .fd = phys_fd, + .offset= offset + } } } ); + n = os_write_file(fd, &map, sizeof(map)); + if(n != sizeof(map)) + printk("map : /proc/mm map failed, err = %d\n", -n); + } + else { + long res; + unsigned long args[] = { virt, len, prot, + MAP_SHARED | MAP_FIXED, phys_fd, + MMAP_OFFSET(offset) }; + + res = run_syscall_stub(mm_idp, STUB_MMAP_NR, args); + if((void *) res == MAP_FAILED) + printk("mmap stub failed, errno = %d\n", res); + } + + return 0; } -int protect(int fd, unsigned long addr, unsigned long len, int r, int w, - int x, int must_succeed) +int unmap(struct mm_id *mm_idp, void *addr, unsigned long len) { - struct proc_mm_op protect; - int prot, n; - - prot = (r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) | - (x ? PROT_EXEC : 0); - - protect = ((struct proc_mm_op) { .op = MM_MPROTECT, - .u = - { .mprotect = - { .addr = (unsigned long) addr, - .len = len, - .prot = prot } } } ); - - n = os_write_file(fd, &protect, sizeof(protect)); - if(n != sizeof(protect)) { - if(n == 0) return(0); - - if(must_succeed) - panic("protect failed, err = %d", -n); - - return(-EIO); - } + int n; + + if(proc_mm){ + struct proc_mm_op unmap; + int fd = mm_idp->u.mm_fd; + unmap = ((struct proc_mm_op) { .op = MM_MUNMAP, + .u = + { .munmap = + { .addr = + (unsigned long) addr, + .len = len } } } ); + n = os_write_file(fd, &unmap, sizeof(unmap)); + if(n != sizeof(unmap)) { + if(n < 0) + return(n); + else if(n > 0) + return(-EIO); + } + } + else { + int res; + unsigned long args[] = { (unsigned long) addr, len, 0, 0, 0, + 0 }; + + res = run_syscall_stub(mm_idp, __NR_munmap, args); + if(res < 0) + printk("munmap stub failed, errno = %d\n", res); + } + + return(0); +} - return(0); +int protect(struct mm_id *mm_idp, unsigned long addr, unsigned long len, + int r, int w, int x) +{ + struct proc_mm_op protect; + int prot, n; + + prot = (r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) | + (x ? PROT_EXEC : 0); + + if(proc_mm){ + int fd = mm_idp->u.mm_fd; + protect = ((struct proc_mm_op) { .op = MM_MPROTECT, + .u = + { .mprotect = + { .addr = + (unsigned long) addr, + .len = len, + .prot = prot } } } ); + + n = os_write_file(fd, &protect, sizeof(protect)); + if(n != sizeof(protect)) + panic("protect failed, err = %d", -n); + } + else { + int res; + unsigned long args[] = { addr, len, prot, 0, 0, 0 }; + + res = run_syscall_stub(mm_idp, __NR_mprotect, args); + if(res < 0) + panic("mprotect stub failed, errno = %d\n", res); + } + + return(0); } void before_mem_skas(unsigned long unused) { } - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c index 6cb9a6d028a9..511a855c9ec0 100644 --- a/arch/um/kernel/skas/mmu.c +++ b/arch/um/kernel/skas/mmu.c @@ -3,46 +3,138 @@ * Licensed under the GPL */ +#include "linux/config.h" #include "linux/sched.h" #include "linux/list.h" #include "linux/spinlock.h" #include "linux/slab.h" +#include "linux/errno.h" +#include "linux/mm.h" #include "asm/current.h" #include "asm/segment.h" #include "asm/mmu.h" +#include "asm/pgalloc.h" +#include "asm/pgtable.h" #include "os.h" #include "skas.h" +extern int __syscall_stub_start; + +static int init_stub_pte(struct mm_struct *mm, unsigned long proc, + unsigned long kernel) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + spin_lock(&mm->page_table_lock); + pgd = pgd_offset(mm, proc); + pud = pud_alloc(mm, pgd, proc); + if (!pud) + goto out; + + pmd = pmd_alloc(mm, pud, proc); + if (!pmd) + goto out_pmd; + + pte = pte_alloc_map(mm, pmd, proc); + if (!pte) + goto out_pte; + + /* There's an interaction between the skas0 stub pages, stack + * randomization, and the BUG at the end of exit_mmap. exit_mmap + * checks that the number of page tables freed is the same as had + * been allocated. If the stack is on the last page table page, + * then the stack pte page will be freed, and if not, it won't. To + * avoid having to know where the stack is, or if the process mapped + * something at the top of its address space for some other reason, + * we set TASK_SIZE to end at the start of the last page table. + * This keeps exit_mmap off the last page, but introduces a leak + * of that page. So, we hang onto it here and free it in + * destroy_context_skas. + */ + + mm->context.skas.last_page_table = pmd_page_kernel(*pmd); + + *pte = mk_pte(virt_to_page(kernel), __pgprot(_PAGE_PRESENT)); + *pte = pte_mkexec(*pte); + *pte = pte_wrprotect(*pte); + spin_unlock(&mm->page_table_lock); + return(0); + + out_pmd: + pud_free(pud); + out_pte: + pmd_free(pmd); + out: + spin_unlock(&mm->page_table_lock); + return(-ENOMEM); +} + int init_new_context_skas(struct task_struct *task, struct mm_struct *mm) { - int from; + struct mm_struct *cur_mm = current->mm; + struct mm_id *mm_id = &mm->context.skas.id; + unsigned long stack; + int from, ret; - if((current->mm != NULL) && (current->mm != &init_mm)) - from = current->mm->context.skas.mm_fd; - else from = -1; + if(proc_mm){ + if((cur_mm != NULL) && (cur_mm != &init_mm)) + from = cur_mm->context.skas.id.u.mm_fd; + else from = -1; - mm->context.skas.mm_fd = new_mm(from); - if(mm->context.skas.mm_fd < 0){ - printk("init_new_context_skas - new_mm failed, errno = %d\n", - mm->context.skas.mm_fd); - return(mm->context.skas.mm_fd); + ret = new_mm(from); + if(ret < 0){ + printk("init_new_context_skas - new_mm failed, " + "errno = %d\n", ret); + return ret; + } + mm_id->u.mm_fd = ret; } + else { + /* This zeros the entry that pgd_alloc didn't, needed since + * we are about to reinitialize it, and want mm.nr_ptes to + * be accurate. + */ + mm->pgd[USER_PTRS_PER_PGD] = __pgd(0); - return(0); + ret = init_stub_pte(mm, CONFIG_STUB_CODE, + (unsigned long) &__syscall_stub_start); + if(ret) + goto out; + + ret = -ENOMEM; + stack = get_zeroed_page(GFP_KERNEL); + if(stack == 0) + goto out; + mm_id->stack = stack; + + ret = init_stub_pte(mm, CONFIG_STUB_DATA, stack); + if(ret) + goto out_free; + + mm->nr_ptes--; + mm_id->u.pid = start_userspace(stack); + } + + return 0; + + out_free: + free_page(mm_id->stack); + out: + return ret; } void destroy_context_skas(struct mm_struct *mm) { - os_close_file(mm->context.skas.mm_fd); -} + struct mmu_context_skas *mmu = &mm->context.skas; -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ + if(proc_mm) + os_close_file(mmu->id.u.mm_fd); + else { + os_kill_ptraced_process(mmu->id.u.pid, 1); + free_page(mmu->id.stack); + free_page(mmu->last_page_table); + } +} diff --git a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c index 773cd2b525fc..1647abb0d1aa 100644 --- a/arch/um/kernel/skas/process.c +++ b/arch/um/kernel/skas/process.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2002- 2004 Jeff Dike (jdike@addtoit.com) * Licensed under the GPL */ @@ -14,6 +14,7 @@ #include #include #include +#include #include "user.h" #include "ptrace_user.h" #include "time_user.h" @@ -21,13 +22,17 @@ #include "user_util.h" #include "kern_util.h" #include "skas.h" +#include "mm_id.h" #include "sysdep/sigcontext.h" +#include "sysdep/stub.h" #include "os.h" #include "proc_mm.h" #include "skas_ptrace.h" #include "chan_user.h" #include "signal_user.h" #include "registers.h" +#include "mem.h" +#include "uml-config.h" #include "process.h" int is_skas_winch(int pid, int fd, void *data) @@ -39,20 +44,55 @@ int is_skas_winch(int pid, int fd, void *data) return(1); } -void get_skas_faultinfo(int pid, struct faultinfo * fi) +void wait_stub_done(int pid, int sig, char * fname) { - int err; - - err = ptrace(PTRACE_FAULTINFO, pid, 0, fi); - if(err) - panic("get_skas_faultinfo - PTRACE_FAULTINFO failed, " - "errno = %d\n", errno); + int n, status, err; + + do { + if ( sig != -1 ) { + err = ptrace(PTRACE_CONT, pid, 0, sig); + if(err) + panic("%s : continue failed, errno = %d\n", + fname, errno); + } + sig = 0; + + CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); + } while((n >= 0) && WIFSTOPPED(status) && + (WSTOPSIG(status) == SIGVTALRM)); + + if((n < 0) || !WIFSTOPPED(status) || + (WSTOPSIG(status) != SIGUSR1 && WSTOPSIG(status != SIGTRAP))){ + panic("%s : failed to wait for SIGUSR1/SIGTRAP, " + "pid = %d, n = %d, errno = %d, status = 0x%x\n", + fname, pid, n, errno, status); + } +} - /* Special handling for i386, which has different structs */ - if (sizeof(struct ptrace_faultinfo) < sizeof(struct faultinfo)) - memset((char *)fi + sizeof(struct ptrace_faultinfo), 0, - sizeof(struct faultinfo) - - sizeof(struct ptrace_faultinfo)); +void get_skas_faultinfo(int pid, struct faultinfo * fi) +{ + int err; + + if(ptrace_faultinfo){ + err = ptrace(PTRACE_FAULTINFO, pid, 0, fi); + if(err) + panic("get_skas_faultinfo - PTRACE_FAULTINFO failed, " + "errno = %d\n", errno); + + /* Special handling for i386, which has different structs */ + if (sizeof(struct ptrace_faultinfo) < sizeof(struct faultinfo)) + memset((char *)fi + sizeof(struct ptrace_faultinfo), 0, + sizeof(struct faultinfo) - + sizeof(struct ptrace_faultinfo)); + } + else { + wait_stub_done(pid, SIGSEGV, "get_skas_faultinfo"); + + /* faultinfo is prepared by the stub-segv-handler at start of + * the stub stack page. We just have to copy it. + */ + memcpy(fi, (void *)current_stub_stack(), sizeof(*fi)); + } } static void handle_segv(int pid, union uml_pt_regs * regs) @@ -91,11 +131,56 @@ static void handle_trap(int pid, union uml_pt_regs *regs, int local_using_sysemu handle_syscall(regs); } -static int userspace_tramp(void *arg) +extern int __syscall_stub_start; + +static int userspace_tramp(void *stack) { - init_new_thread_signals(0); - enable_timer(); + void *addr; + ptrace(PTRACE_TRACEME, 0, 0, 0); + + init_new_thread_signals(1); + enable_timer(); + + if(!proc_mm){ + /* This has a pte, but it can't be mapped in with the usual + * tlb_flush mechanism because this is part of that mechanism + */ + int fd; + __u64 offset; + + fd = phys_mapping(to_phys(&__syscall_stub_start), &offset); + addr = mmap64((void *) UML_CONFIG_STUB_CODE, page_size(), + PROT_EXEC, MAP_FIXED | MAP_PRIVATE, fd, offset); + if(addr == MAP_FAILED){ + printk("mapping mmap stub failed, errno = %d\n", + errno); + exit(1); + } + + if(stack != NULL){ + fd = phys_mapping(to_phys(stack), &offset); + addr = mmap((void *) UML_CONFIG_STUB_DATA, page_size(), + PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_SHARED, fd, offset); + if(addr == MAP_FAILED){ + printk("mapping segfault stack failed, " + "errno = %d\n", errno); + exit(1); + } + } + } + if(!ptrace_faultinfo && (stack != NULL)){ + unsigned long v = UML_CONFIG_STUB_CODE + + (unsigned long) stub_segv_handler - + (unsigned long) &__syscall_stub_start; + + set_sigstack((void *) UML_CONFIG_STUB_DATA, page_size()); + set_handler(SIGSEGV, (void *) v, SA_ONSTACK, + SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, + SIGUSR1, -1); + } + os_stop_process(os_getpid()); return(0); } @@ -105,11 +190,11 @@ static int userspace_tramp(void *arg) #define NR_CPUS 1 int userspace_pid[NR_CPUS]; -void start_userspace(int cpu) +int start_userspace(unsigned long stub_stack) { void *stack; unsigned long sp; - int pid, status, n; + int pid, status, n, flags; stack = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); @@ -117,8 +202,9 @@ void start_userspace(int cpu) panic("start_userspace : mmap failed, errno = %d", errno); sp = (unsigned long) stack + PAGE_SIZE - sizeof(void *); - pid = clone(userspace_tramp, (void *) sp, - CLONE_FILES | CLONE_VM | SIGCHLD, NULL); + flags = CLONE_FILES | SIGCHLD; + if(proc_mm) flags |= CLONE_VM; + pid = clone(userspace_tramp, (void *) sp, flags, (void *) stub_stack); if(pid < 0) panic("start_userspace : clone failed, errno = %d", errno); @@ -140,7 +226,7 @@ void start_userspace(int cpu) if(munmap(stack, PAGE_SIZE) < 0) panic("start_userspace : munmap failed, errno = %d\n", errno); - userspace_pid[cpu] = pid; + return(pid); } void userspace(union uml_pt_regs *regs) @@ -174,7 +260,9 @@ void userspace(union uml_pt_regs *regs) if(WIFSTOPPED(status)){ switch(WSTOPSIG(status)){ case SIGSEGV: - handle_segv(pid, regs); + if(PTRACE_FULL_FAULTINFO || !ptrace_faultinfo) + user_signal(SIGSEGV, regs, pid); + else handle_segv(pid, regs); break; case SIGTRAP + 0x80: handle_trap(pid, regs, local_using_sysemu); @@ -194,6 +282,7 @@ void userspace(union uml_pt_regs *regs) printk("userspace - child stopped with signal " "%d\n", WSTOPSIG(status)); } + pid = userspace_pid[0]; interrupt_end(); /* Avoid -ERESTARTSYS handling in host */ @@ -334,21 +423,19 @@ void reboot_skas(void) siglongjmp(initial_jmpbuf, INIT_JMP_REBOOT); } -void switch_mm_skas(int mm_fd) +void switch_mm_skas(struct mm_id *mm_idp) { int err; #warning need cpu pid in switch_mm_skas - err = ptrace(PTRACE_SWITCH_MM, userspace_pid[0], 0, mm_fd); - if(err) - panic("switch_mm_skas - PTRACE_SWITCH_MM failed, errno = %d\n", - errno); -} - -void kill_off_processes_skas(void) -{ -#warning need to loop over userspace_pids in kill_off_processes_skas - os_kill_ptraced_process(userspace_pid[0], 1); + if(proc_mm){ + err = ptrace(PTRACE_SWITCH_MM, userspace_pid[0], 0, + mm_idp->u.mm_fd); + if(err) + panic("switch_mm_skas - PTRACE_SWITCH_MM failed, " + "errno = %d\n", errno); + } + else userspace_pid[0] = mm_idp->u.pid; } /* diff --git a/arch/um/kernel/skas/process_kern.c b/arch/um/kernel/skas/process_kern.c index 0a7b8aa55db8..cbabab104ac3 100644 --- a/arch/um/kernel/skas/process_kern.c +++ b/arch/um/kernel/skas/process_kern.c @@ -175,9 +175,12 @@ static int start_kernel_proc(void *unused) return(0); } +extern int userspace_pid[]; + int start_uml_skas(void) { - start_userspace(0); + if(proc_mm) + userspace_pid[0] = start_userspace(0); init_new_thread_signals(1); @@ -199,3 +202,31 @@ int thread_pid_skas(struct task_struct *task) #warning Need to look up userspace_pid by cpu return(userspace_pid[0]); } + +void kill_off_processes_skas(void) +{ + if(proc_mm) +#warning need to loop over userspace_pids in kill_off_processes_skas + os_kill_ptraced_process(userspace_pid[0], 1); + else { + struct task_struct *p; + int pid, me; + + me = os_getpid(); + for_each_process(p){ + if(p->mm == NULL) + continue; + + pid = p->mm->context.skas.id.u.pid; + os_kill_ptraced_process(pid, 1); + } + } +} + +unsigned long current_stub_stack(void) +{ + if(current->mm == NULL) + return(0); + + return(current->mm->context.skas.id.stack); +} diff --git a/arch/um/kernel/skas/tlb.c b/arch/um/kernel/skas/tlb.c index 18f9a7711de1..6230999c672c 100644 --- a/arch/um/kernel/skas/tlb.c +++ b/arch/um/kernel/skas/tlb.c @@ -6,6 +6,7 @@ #include "linux/stddef.h" #include "linux/sched.h" +#include "linux/config.h" #include "linux/mm.h" #include "asm/page.h" #include "asm/pgtable.h" @@ -17,7 +18,7 @@ #include "os.h" #include "tlb.h" -static void do_ops(int fd, struct host_vm_op *ops, int last) +static void do_ops(union mm_context *mmu, struct host_vm_op *ops, int last) { struct host_vm_op *op; int i; @@ -26,18 +27,18 @@ static void do_ops(int fd, struct host_vm_op *ops, int last) op = &ops[i]; switch(op->type){ case MMAP: - map(fd, op->u.mmap.addr, op->u.mmap.len, + map(&mmu->skas.id, op->u.mmap.addr, op->u.mmap.len, op->u.mmap.r, op->u.mmap.w, op->u.mmap.x, op->u.mmap.fd, op->u.mmap.offset); break; case MUNMAP: - unmap(fd, (void *) op->u.munmap.addr, + unmap(&mmu->skas.id, (void *) op->u.munmap.addr, op->u.munmap.len); break; case MPROTECT: - protect(fd, op->u.mprotect.addr, op->u.mprotect.len, - op->u.mprotect.r, op->u.mprotect.w, - op->u.mprotect.x); + protect(&mmu->skas.id, op->u.mprotect.addr, + op->u.mprotect.len, op->u.mprotect.r, + op->u.mprotect.w, op->u.mprotect.x); break; default: printk("Unknown op type %d in do_ops\n", op->type); @@ -46,12 +47,15 @@ static void do_ops(int fd, struct host_vm_op *ops, int last) } } +extern int proc_mm; + static void fix_range(struct mm_struct *mm, unsigned long start_addr, unsigned long end_addr, int force) { - int fd = mm->context.skas.mm_fd; + if(!proc_mm && (end_addr > CONFIG_STUB_START)) + end_addr = CONFIG_STUB_START; - fix_range_common(mm, start_addr, end_addr, force, fd, do_ops); + fix_range_common(mm, start_addr, end_addr, force, do_ops); } void __flush_tlb_one_skas(unsigned long addr) @@ -69,16 +73,20 @@ void flush_tlb_range_skas(struct vm_area_struct *vma, unsigned long start, void flush_tlb_mm_skas(struct mm_struct *mm) { + unsigned long end; + /* Don't bother flushing if this address space is about to be * destroyed. */ if(atomic_read(&mm->mm_users) == 0) return; - fix_range(mm, 0, host_task_size, 0); + end = proc_mm ? task_size : CONFIG_STUB_START; + fix_range(mm, 0, end, 0); } void force_flush_all_skas(void) { - fix_range(current->mm, 0, host_task_size, 1); + unsigned long end = proc_mm ? task_size : CONFIG_STUB_START; + fix_range(current->mm, 0, end, 1); } diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c index eda477edfdf5..83ec8d4747fd 100644 --- a/arch/um/kernel/tlb.c +++ b/arch/um/kernel/tlb.c @@ -18,13 +18,15 @@ #define ADD_ROUND(n, inc) (((n) + (inc)) & ~((inc) - 1)) void fix_range_common(struct mm_struct *mm, unsigned long start_addr, - unsigned long end_addr, int force, int data, - void (*do_ops)(int, struct host_vm_op *, int)) + unsigned long end_addr, int force, + void (*do_ops)(union mm_context *, struct host_vm_op *, + int)) { pgd_t *npgd; pud_t *npud; pmd_t *npmd; pte_t *npte; + union mm_context *mmu = &mm->context; unsigned long addr, end; int r, w, x; struct host_vm_op ops[16]; @@ -40,7 +42,7 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr, end = end_addr; if(force || pgd_newpage(*npgd)){ op_index = add_munmap(addr, end - addr, ops, - op_index, last_op, data, + op_index, last_op, mmu, do_ops); pgd_mkuptodate(*npgd); } @@ -55,7 +57,7 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr, end = end_addr; if(force || pud_newpage(*npud)){ op_index = add_munmap(addr, end - addr, ops, - op_index, last_op, data, + op_index, last_op, mmu, do_ops); pud_mkuptodate(*npud); } @@ -70,7 +72,7 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr, end = end_addr; if(force || pmd_newpage(*npmd)){ op_index = add_munmap(addr, end - addr, ops, - op_index, last_op, data, + op_index, last_op, mmu, do_ops); pmd_mkuptodate(*npmd); } @@ -93,21 +95,21 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr, op_index = add_mmap(addr, pte_val(*npte) & PAGE_MASK, PAGE_SIZE, r, w, x, ops, - op_index, last_op, data, + op_index, last_op, mmu, do_ops); else op_index = add_munmap(addr, PAGE_SIZE, ops, - op_index, last_op, data, + op_index, last_op, mmu, do_ops); } else if(pte_newprot(*npte)) op_index = add_mprotect(addr, PAGE_SIZE, r, w, x, ops, - op_index, last_op, data, + op_index, last_op, mmu, do_ops); *npte = pte_mkuptodate(*npte); addr += PAGE_SIZE; } - (*do_ops)(data, ops, op_index); + (*do_ops)(mmu, ops, op_index); } int flush_tlb_kernel_range_common(unsigned long start, unsigned long end) @@ -195,51 +197,6 @@ int flush_tlb_kernel_range_common(unsigned long start, unsigned long end) return(updated); } -void flush_tlb_page(struct vm_area_struct *vma, unsigned long address) -{ - address &= PAGE_MASK; - flush_tlb_range(vma, address, address + PAGE_SIZE); -} - -void flush_tlb_all(void) -{ - flush_tlb_mm(current->mm); -} - -void flush_tlb_kernel_range(unsigned long start, unsigned long end) -{ - CHOOSE_MODE_PROC(flush_tlb_kernel_range_tt, - flush_tlb_kernel_range_common, start, end); -} - -void flush_tlb_kernel_vm(void) -{ - CHOOSE_MODE(flush_tlb_kernel_vm_tt(), - flush_tlb_kernel_range_common(start_vm, end_vm)); -} - -void __flush_tlb_one(unsigned long addr) -{ - CHOOSE_MODE_PROC(__flush_tlb_one_tt, __flush_tlb_one_skas, addr); -} - -void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, - unsigned long end) -{ - CHOOSE_MODE_PROC(flush_tlb_range_tt, flush_tlb_range_skas, vma, start, - end); -} - -void flush_tlb_mm(struct mm_struct *mm) -{ - CHOOSE_MODE_PROC(flush_tlb_mm_tt, flush_tlb_mm_skas, mm); -} - -void force_flush_all(void) -{ - CHOOSE_MODE(force_flush_all_tt(), force_flush_all_skas()); -} - pgd_t *pgd_offset_proc(struct mm_struct *mm, unsigned long address) { return(pgd_offset(mm, address)); @@ -270,9 +227,9 @@ pte_t *addr_pte(struct task_struct *task, unsigned long addr) } int add_mmap(unsigned long virt, unsigned long phys, unsigned long len, - int r, int w, int x, struct host_vm_op *ops, int index, - int last_filled, int data, - void (*do_ops)(int, struct host_vm_op *, int)) + int r, int w, int x, struct host_vm_op *ops, int index, + int last_filled, union mm_context *mmu, + void (*do_ops)(union mm_context *, struct host_vm_op *, int)) { __u64 offset; struct host_vm_op *last; @@ -292,7 +249,7 @@ int add_mmap(unsigned long virt, unsigned long phys, unsigned long len, } if(index == last_filled){ - (*do_ops)(data, ops, last_filled); + (*do_ops)(mmu, ops, last_filled); index = -1; } @@ -310,8 +267,8 @@ int add_mmap(unsigned long virt, unsigned long phys, unsigned long len, } int add_munmap(unsigned long addr, unsigned long len, struct host_vm_op *ops, - int index, int last_filled, int data, - void (*do_ops)(int, struct host_vm_op *, int)) + int index, int last_filled, union mm_context *mmu, + void (*do_ops)(union mm_context *, struct host_vm_op *, int)) { struct host_vm_op *last; @@ -325,7 +282,7 @@ int add_munmap(unsigned long addr, unsigned long len, struct host_vm_op *ops, } if(index == last_filled){ - (*do_ops)(data, ops, last_filled); + (*do_ops)(mmu, ops, last_filled); index = -1; } @@ -337,8 +294,9 @@ int add_munmap(unsigned long addr, unsigned long len, struct host_vm_op *ops, } int add_mprotect(unsigned long addr, unsigned long len, int r, int w, int x, - struct host_vm_op *ops, int index, int last_filled, int data, - void (*do_ops)(int, struct host_vm_op *, int)) + struct host_vm_op *ops, int index, int last_filled, + union mm_context *mmu, + void (*do_ops)(union mm_context *, struct host_vm_op *, int)) { struct host_vm_op *last; @@ -354,7 +312,7 @@ int add_mprotect(unsigned long addr, unsigned long len, int r, int w, int x, } if(index == last_filled){ - (*do_ops)(data, ops, last_filled); + (*do_ops)(mmu, ops, last_filled); index = -1; } @@ -367,3 +325,49 @@ int add_mprotect(unsigned long addr, unsigned long len, int r, int w, int x, .x = x } } }); return(index); } + +void flush_tlb_page(struct vm_area_struct *vma, unsigned long address) +{ + address &= PAGE_MASK; + flush_tlb_range(vma, address, address + PAGE_SIZE); +} + +void flush_tlb_all(void) +{ + flush_tlb_mm(current->mm); +} + +void flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ + CHOOSE_MODE_PROC(flush_tlb_kernel_range_tt, + flush_tlb_kernel_range_common, start, end); +} + +void flush_tlb_kernel_vm(void) +{ + CHOOSE_MODE(flush_tlb_kernel_vm_tt(), + flush_tlb_kernel_range_common(start_vm, end_vm)); +} + +void __flush_tlb_one(unsigned long addr) +{ + CHOOSE_MODE_PROC(__flush_tlb_one_tt, __flush_tlb_one_skas, addr); +} + +void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + CHOOSE_MODE_PROC(flush_tlb_range_tt, flush_tlb_range_skas, vma, start, + end); +} + +void flush_tlb_mm(struct mm_struct *mm) +{ + CHOOSE_MODE_PROC(flush_tlb_mm_tt, flush_tlb_mm_skas, mm); +} + +void force_flush_all(void) +{ + CHOOSE_MODE(force_flush_all_tt(), force_flush_all_skas()); +} + diff --git a/arch/um/kernel/tt/tlb.c b/arch/um/kernel/tt/tlb.c index 203216ad86f1..2eefb43bc9c2 100644 --- a/arch/um/kernel/tt/tlb.c +++ b/arch/um/kernel/tt/tlb.c @@ -17,7 +17,7 @@ #include "os.h" #include "tlb.h" -static void do_ops(int unused, struct host_vm_op *ops, int last) +static void do_ops(union mm_context *mmu, struct host_vm_op *ops, int last) { struct host_vm_op *op; int i; @@ -55,7 +55,7 @@ static void fix_range(struct mm_struct *mm, unsigned long start_addr, panic("fix_range fixing wrong address space, current = 0x%p", current); - fix_range_common(mm, start_addr, end_addr, force, 0, do_ops); + fix_range_common(mm, start_addr, end_addr, force, do_ops); } atomic_t vmchange_seq = ATOMIC_INIT(1); diff --git a/arch/um/kernel/uml.lds.S b/arch/um/kernel/uml.lds.S index 61dfd4fef752..163476a8cb1b 100644 --- a/arch/um/kernel/uml.lds.S +++ b/arch/um/kernel/uml.lds.S @@ -30,6 +30,7 @@ SECTIONS _einittext = .; } . = ALIGN(4096); + .text : { *(.text) @@ -39,6 +40,12 @@ SECTIONS /* .gnu.warning sections are handled specially by elf32.em. */ *(.gnu.warning) *(.gnu.linkonce.t*) + + . = ALIGN(4096); + __syscall_stub_start = .; + *(.__syscall_stub*) + __syscall_stub_end = .; + . = ALIGN(4096); } #include "asm/common.lds.S" -- cgit v1.2.3 From 9786a8f3cbc61f990266e23ffdb338ee3118b03d Mon Sep 17 00:00:00 2001 From: Bodo Stroesser Date: Thu, 7 Jul 2005 17:56:50 -0700 Subject: [PATCH] uml: Proper clone support for skas0 This patch implements the clone-stub mechanism, which allows skas0 to run with proc_mm==0, even if the clib in UML uses modify_ldt. Note: There is a bug in skas3.v7 host patch, that avoids UML-skas from running properly on a SMP-box. In full skas3, I never really saw problems, but in skas0 they showed up. More commentary by jdike - What this patch does is makes sure that the host parent of each new host process matches the UML parent of the corresponding UML process. This ensures that any changed LDTs are inherited. This is done by having clone actually called by the UML process from its stub, rather than by the kernel. We have special syscall stubs that are loaded onto the stub code page because that code must be completely self-contained. These stubs are given C interfaces, and used like normal C functions, but there are subtleties. Principally, we have to be careful about stack variables in stub_clone_handler after the clone. The code is written so that there aren't any - everything boils down to a fixed address. If there were any locals, references to them after the clone would be wrong because the stack just changed. Signed-off-by: Bodo Stroesser Signed-off-by: Jeff Dike Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/kernel/skas/Makefile | 7 ++-- arch/um/kernel/skas/clone.c | 44 +++++++++++++++++++++++ arch/um/kernel/skas/include/skas.h | 1 + arch/um/kernel/skas/include/stub-data.h | 18 ++++++++++ arch/um/kernel/skas/mmu.c | 7 +++- arch/um/kernel/skas/process.c | 63 +++++++++++++++++++++++++++++++++ arch/um/kernel/time.c | 7 ++++ 7 files changed, 144 insertions(+), 3 deletions(-) create mode 100644 arch/um/kernel/skas/clone.c create mode 100644 arch/um/kernel/skas/include/stub-data.h (limited to 'arch/um/kernel') diff --git a/arch/um/kernel/skas/Makefile b/arch/um/kernel/skas/Makefile index ff69c4b312c0..d296d55ade4b 100644 --- a/arch/um/kernel/skas/Makefile +++ b/arch/um/kernel/skas/Makefile @@ -3,11 +3,14 @@ # Licensed under the GPL # -obj-y := exec_kern.o mem.o mem_user.o mmu.o process.o process_kern.o \ +obj-y := clone.o exec_kern.o mem.o mem_user.o mmu.o process.o process_kern.o \ syscall_kern.o syscall_user.o tlb.o trap_user.o uaccess.o \ subdir- := util -USER_OBJS := process.o +USER_OBJS := process.o clone.o include arch/um/scripts/Makefile.rules + +# clone.o is in the stub, so it can't be built with profiling +$(obj)/clone.o : c_flags = -Wp,-MD,$(depfile) $(call unprofile,$(USER_CFLAGS)) diff --git a/arch/um/kernel/skas/clone.c b/arch/um/kernel/skas/clone.c new file mode 100644 index 000000000000..4dc55f10cd18 --- /dev/null +++ b/arch/um/kernel/skas/clone.c @@ -0,0 +1,44 @@ +#include +#include +#include +#include +#include +#include +#include "ptrace_user.h" +#include "skas.h" +#include "stub-data.h" +#include "uml-config.h" +#include "sysdep/stub.h" + +/* This is in a separate file because it needs to be compiled with any + * extraneous gcc flags (-pg, -fprofile-arcs, -ftest-coverage) disabled + */ +void __attribute__ ((__section__ (".__syscall_stub"))) +stub_clone_handler(void) +{ + long err; + struct stub_data *from = (struct stub_data *) UML_CONFIG_STUB_DATA; + + err = stub_syscall2(__NR_clone, CLONE_PARENT | CLONE_FILES | SIGCHLD, + UML_CONFIG_STUB_DATA + PAGE_SIZE / 2 - + sizeof(void *)); + if(err != 0) + goto out; + + err = stub_syscall4(__NR_ptrace, PTRACE_TRACEME, 0, 0, 0); + if(err) + goto out; + + err = stub_syscall3(__NR_setitimer, ITIMER_VIRTUAL, + (long) &from->timer, 0); + if(err) + goto out; + + err = stub_syscall6(STUB_MMAP_NR, UML_CONFIG_STUB_DATA, PAGE_SIZE, + PROT_READ | PROT_WRITE, MAP_FIXED | MAP_SHARED, + from->fd, from->offset); + out: + /* save current result. Parent: pid; child: retcode of mmap */ + from->err = err; + trap_myself(); +} diff --git a/arch/um/kernel/skas/include/skas.h b/arch/um/kernel/skas/include/skas.h index d91a60f3830a..d983ea842547 100644 --- a/arch/um/kernel/skas/include/skas.h +++ b/arch/um/kernel/skas/include/skas.h @@ -32,6 +32,7 @@ extern int protect(struct mm_id * mm_idp, unsigned long addr, extern void user_signal(int sig, union uml_pt_regs *regs, int pid); extern int new_mm(int from); extern int start_userspace(unsigned long stub_stack); +extern int copy_context_skas0(unsigned long stack, int pid); extern void get_skas_faultinfo(int pid, struct faultinfo * fi); extern long execute_syscall_skas(void *r); extern unsigned long current_stub_stack(void); diff --git a/arch/um/kernel/skas/include/stub-data.h b/arch/um/kernel/skas/include/stub-data.h new file mode 100644 index 000000000000..f6ed92c3727d --- /dev/null +++ b/arch/um/kernel/skas/include/stub-data.h @@ -0,0 +1,18 @@ +/* + * Copyright (C) 2005 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __STUB_DATA_H +#define __STUB_DATA_H + +#include + +struct stub_data { + long offset; + int fd; + struct itimerval timer; + long err; +}; + +#endif diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c index 511a855c9ec0..d232daa42c31 100644 --- a/arch/um/kernel/skas/mmu.c +++ b/arch/um/kernel/skas/mmu.c @@ -75,6 +75,7 @@ static int init_stub_pte(struct mm_struct *mm, unsigned long proc, int init_new_context_skas(struct task_struct *task, struct mm_struct *mm) { struct mm_struct *cur_mm = current->mm; + struct mm_id *cur_mm_id = &cur_mm->context.skas.id; struct mm_id *mm_id = &mm->context.skas.id; unsigned long stack; int from, ret; @@ -115,7 +116,11 @@ int init_new_context_skas(struct task_struct *task, struct mm_struct *mm) goto out_free; mm->nr_ptes--; - mm_id->u.pid = start_userspace(stack); + + if((cur_mm != NULL) && (cur_mm != &init_mm)) + mm_id->u.pid = copy_context_skas0(stack, + cur_mm_id->u.pid); + else mm_id->u.pid = start_userspace(stack); } return 0; diff --git a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c index 1647abb0d1aa..ba671dab8878 100644 --- a/arch/um/kernel/skas/process.c +++ b/arch/um/kernel/skas/process.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include "user.h" @@ -22,6 +23,7 @@ #include "user_util.h" #include "kern_util.h" #include "skas.h" +#include "stub-data.h" #include "mm_id.h" #include "sysdep/sigcontext.h" #include "sysdep/stub.h" @@ -296,6 +298,67 @@ void userspace(union uml_pt_regs *regs) #define INIT_JMP_HALT 3 #define INIT_JMP_REBOOT 4 + +int copy_context_skas0(unsigned long new_stack, int pid) +{ + int err; + unsigned long regs[MAX_REG_NR]; + unsigned long current_stack = current_stub_stack(); + struct stub_data *data = (struct stub_data *) current_stack; + struct stub_data *child_data = (struct stub_data *) new_stack; + __u64 new_offset; + int new_fd = phys_mapping(to_phys((void *)new_stack), &new_offset); + + /* prepare offset and fd of child's stack as argument for parent's + * and child's mmap2 calls + */ + *data = ((struct stub_data) { .offset = MMAP_OFFSET(new_offset), + .fd = new_fd, + .timer = ((struct itimerval) + { { 0, 1000000 / hz() }, + { 0, 1000000 / hz() }})}); + get_safe_registers(regs); + + /* Set parent's instruction pointer to start of clone-stub */ + regs[REGS_IP_INDEX] = UML_CONFIG_STUB_CODE + + (unsigned long) stub_clone_handler - + (unsigned long) &__syscall_stub_start; + regs[REGS_SP_INDEX] = UML_CONFIG_STUB_DATA + PAGE_SIZE - + sizeof(void *); + err = ptrace_setregs(pid, regs); + if(err < 0) + panic("copy_context_skas0 : PTRACE_SETREGS failed, " + "pid = %d, errno = %d\n", pid, errno); + + /* set a well known return code for detection of child write failure */ + child_data->err = 12345678; + + /* Wait, until parent has finished its work: read child's pid from + * parent's stack, and check, if bad result. + */ + wait_stub_done(pid, 0, "copy_context_skas0"); + + pid = data->err; + if(pid < 0) + panic("copy_context_skas0 - stub-parent reports error %d\n", + pid); + + /* Wait, until child has finished too: read child's result from + * child's stack and check it. + */ + wait_stub_done(pid, -1, "copy_context_skas0"); + if (child_data->err != UML_CONFIG_STUB_DATA) + panic("copy_context_skas0 - stub-child reports error %d\n", + child_data->err); + + if (ptrace(PTRACE_OLDSETOPTIONS, pid, NULL, + (void *)PTRACE_O_TRACESYSGOOD) < 0) + panic("copy_context_skas0 : PTRACE_SETOPTIONS failed, " + "errno = %d\n", errno); + + return pid; +} + void new_thread(void *stack, void **switch_buf_ptr, void **fork_buf_ptr, void (*handler)(int)) { diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c index f829b309b63c..c40b611e3d93 100644 --- a/arch/um/kernel/time.c +++ b/arch/um/kernel/time.c @@ -48,6 +48,13 @@ void enable_timer(void) set_interval(ITIMER_VIRTUAL); } +void prepare_timer(void * ptr) +{ + int usec = 1000000/hz(); + *(struct itimerval *)ptr = ((struct itimerval) { { 0, usec }, + { 0, usec }}); +} + void disable_timer(void) { struct itimerval disable = ((struct itimerval) { { 0, 0 }, { 0, 0 }}); -- cgit v1.2.3 From 1c30385ae479ec4774bdc1048726aeb15cde0d21 Mon Sep 17 00:00:00 2001 From: Paolo 'Blaisorblade' Giarrusso Date: Thu, 14 Jul 2005 00:33:38 -0700 Subject: [PATCH] uml: gcc 2.95 fix and Makefile cleanup 1) Cleanup an ugly hyper-nested code in Makefile (now only the arith. expression is passed through the host bash). 2) Fix a problem with GCC 2.95: according to a report from Raphael Bossek, .remap_data : { arch/um/sys-SUBARCH/unmap_fin.o (.data .bss) } is expanded into: .remap_data : { arch/um/sys-i386 /unmap_fin.o (.data .bss) } (because I didn't use ## to join the two tokens), thus stopping linking. Pass the whole path from the Makefile as a simple and nice fix. Signed-off-by: Paolo 'Blaisorblade' Giarrusso Cc: Raphael Bossek Cc: Jeff Dike Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/kernel/uml.lds.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/um/kernel') diff --git a/arch/um/kernel/uml.lds.S b/arch/um/kernel/uml.lds.S index 163476a8cb1b..b03326d391c9 100644 --- a/arch/um/kernel/uml.lds.S +++ b/arch/um/kernel/uml.lds.S @@ -16,8 +16,8 @@ SECTIONS __binary_start = .; #ifdef MODE_TT - .remap_data : { arch/um/sys-SUBARCH/unmap_fin.o (.data .bss) } - .remap : { arch/um/sys-SUBARCH/unmap_fin.o (.text) } + .remap_data : { UNMAP_PATH (.data .bss) } + .remap : { UNMAP_PATH (.text) } . = ALIGN(4096); /* Init code and data */ #endif -- cgit v1.2.3 From 59586e5a262a29361c45c929ea3253d4aec830b0 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 26 Jul 2005 11:36:01 -0600 Subject: [PATCH] Don't export machine_restart, machine_halt, or machine_power_off. machine_restart, machine_halt and machine_power_off are machine specific hooks deep into the reboot logic, that modules have no business messing with. Usually code should be calling kernel_restart, kernel_halt, kernel_power_off, or emergency_restart. So don't export machine_restart, machine_halt, and machine_power_off so we can catch buggy users. Signed-off-by: Eric W. Biederman Signed-off-by: Linus Torvalds --- arch/um/kernel/reboot.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'arch/um/kernel') diff --git a/arch/um/kernel/reboot.c b/arch/um/kernel/reboot.c index fcec51da1d37..a637e885c583 100644 --- a/arch/um/kernel/reboot.c +++ b/arch/um/kernel/reboot.c @@ -49,23 +49,17 @@ void machine_restart(char * __unused) CHOOSE_MODE(reboot_tt(), reboot_skas()); } -EXPORT_SYMBOL(machine_restart); - void machine_power_off(void) { uml_cleanup(); CHOOSE_MODE(halt_tt(), halt_skas()); } -EXPORT_SYMBOL(machine_power_off); - void machine_halt(void) { machine_power_off(); } -EXPORT_SYMBOL(machine_halt); - /* * Overrides for Emacs so that we follow Linus's tabbing style. * Emacs will notice this stuff at the end of the file and automatically -- cgit v1.2.3 From cb66504d65e54210d7dc55a0027d309ca5f51f83 Mon Sep 17 00:00:00 2001 From: Paolo 'Blaisorblade' Giarrusso Date: Wed, 27 Jul 2005 11:43:31 -0700 Subject: [PATCH] uml: add skas0 command-line option This adds the "skas0" parameter to force skas0 operation on SKAS3 host and shows which operating mode has been selected. Signed-off-by: Paolo 'Blaisorblade' Giarrusso Signed-off-by: Jeff Dike Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/kernel/process.c | 35 +++++++++++++++++++++++++---------- arch/um/kernel/um_arch.c | 19 +++++++++++++++++++ 2 files changed, 44 insertions(+), 10 deletions(-) (limited to 'arch/um/kernel') diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index c45a60e9c92d..8b01a5584e80 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -212,12 +212,26 @@ static int stop_ptraced_child(int pid, int exitcode, int mustexit) static int force_sysemu_disabled = 0; +int ptrace_faultinfo = 1; +int proc_mm = 1; + +static int __init skas0_cmd_param(char *str, int* add) +{ + ptrace_faultinfo = proc_mm = 0; + return 0; +} + static int __init nosysemu_cmd_param(char *str, int* add) { force_sysemu_disabled = 1; return 0; } +__uml_setup("skas0", skas0_cmd_param, + "skas0\n" + " Disables SKAS3 usage, so that SKAS0 is used, unless you \n" + " specify mode=tt.\n\n"); + __uml_setup("nosysemu", nosysemu_cmd_param, "nosysemu\n" " Turns off syscall emulation patch for ptrace (SYSEMU) on.\n" @@ -359,12 +373,10 @@ void forward_pending_sigio(int target) kill(target, SIGIO); } -int ptrace_faultinfo = 0; -int proc_mm = 1; - extern void *__syscall_stub_start, __syscall_stub_end; #ifdef UML_CONFIG_MODE_SKAS + static inline void check_skas3_ptrace_support(void) { struct ptrace_faultinfo fi; @@ -375,6 +387,7 @@ static inline void check_skas3_ptrace_support(void) n = ptrace(PTRACE_FAULTINFO, pid, 0, &fi); if (n < 0) { + ptrace_faultinfo = 0; if(errno == EIO) printf("not found\n"); else { @@ -382,8 +395,10 @@ static inline void check_skas3_ptrace_support(void) } } else { - ptrace_faultinfo = 1; - printf("found\n"); + if (!ptrace_faultinfo) + printf("found but disabled on command line\n"); + else + printf("found\n"); } init_registers(pid); @@ -396,13 +411,13 @@ int can_do_skas(void) if (os_access("/proc/mm", OS_ACC_W_OK) < 0) { proc_mm = 0; printf("not found\n"); - goto out; - } - else { - printf("found\n"); + } else { + if (!proc_mm) + printf("found but disabled on command line\n"); + else + printf("found\n"); } -out: check_skas3_ptrace_support(); return 1; } diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index 8736d098f0ee..ca2bb6f09a7d 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -38,6 +38,9 @@ #include "choose-mode.h" #include "mode_kern.h" #include "mode.h" +#ifdef UML_CONFIG_MODE_SKAS +#include "skas.h" +#endif #define DEFAULT_COMMAND_LINE "root=98:0" @@ -318,6 +321,7 @@ int linux_main(int argc, char **argv) unsigned long avail, diff; unsigned long virtmem_size, max_physmem; unsigned int i, add; + char * mode; for (i = 1; i < argc; i++){ if((i == 1) && (argv[i][0] == ' ')) continue; @@ -338,6 +342,21 @@ int linux_main(int argc, char **argv) exit(1); } #endif + +#ifndef CONFIG_MODE_SKAS + mode = "TT"; +#else + /* Show to the user the result of selection */ + if (mode_tt) + mode = "TT"; + else if (proc_mm && ptrace_faultinfo) + mode = "SKAS3"; + else + mode = "SKAS0"; +#endif + + printf("UML running in %s mode\n", mode); + uml_start = CHOOSE_MODE_PROC(set_task_sizes_tt, set_task_sizes_skas, 0, &host_task_size, &task_size); -- cgit v1.2.3 From 0d4579ed553e8bb29d580c08bfcabcb0826a89c3 Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Wed, 27 Jul 2005 11:43:33 -0700 Subject: [PATCH] uml: fix misdeclared function This fixes an interface which differed from its declaration, and includes the relevant header so that this doesn't happen again. Signed-off-by: Jeff Dike Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/kernel/helper.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) (limited to 'arch/um/kernel') diff --git a/arch/um/kernel/helper.c b/arch/um/kernel/helper.c index 13b1f5c2f7ee..f83e1e8e2392 100644 --- a/arch/um/kernel/helper.c +++ b/arch/um/kernel/helper.c @@ -13,6 +13,7 @@ #include "user.h" #include "kern_util.h" #include "user_util.h" +#include "helper.h" #include "os.h" struct helper_data { @@ -149,7 +150,7 @@ int run_helper_thread(int (*proc)(void *), void *arg, unsigned int flags, return(pid); } -int helper_wait(int pid, int block) +int helper_wait(int pid) { int ret; @@ -160,14 +161,3 @@ int helper_wait(int pid, int block) } return(ret); } - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ -- cgit v1.2.3 From 44456d37b59d8e541936ed26d8b6e08d27e88ac1 Mon Sep 17 00:00:00 2001 From: Olaf Hering Date: Wed, 27 Jul 2005 11:45:17 -0700 Subject: [PATCH] turn many #if $undefined_string into #ifdef $undefined_string turn many #if $undefined_string into #ifdef $undefined_string to fix some warnings after -Wno-def was added to global CFLAGS Signed-off-by: Olaf Hering Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/kernel/skas/syscall_user.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/um/kernel') diff --git a/arch/um/kernel/skas/syscall_user.c b/arch/um/kernel/skas/syscall_user.c index 2828e6e37721..6b0664970147 100644 --- a/arch/um/kernel/skas/syscall_user.c +++ b/arch/um/kernel/skas/syscall_user.c @@ -15,7 +15,7 @@ void handle_syscall(union uml_pt_regs *regs) { long result; -#if UML_CONFIG_SYSCALL_DEBUG +#ifdef UML_CONFIG_SYSCALL_DEBUG int index; index = record_syscall_start(UPT_SYSCALL_NR(regs)); @@ -27,7 +27,7 @@ void handle_syscall(union uml_pt_regs *regs) REGS_SET_SYSCALL_RETURN(regs->skas.regs, result); syscall_trace(regs, 1); -#if UML_CONFIG_SYSCALL_DEBUG +#ifdef UML_CONFIG_SYSCALL_DEBUG record_syscall_end(index, result); #endif } -- cgit v1.2.3 From b85e9680a33ae2df04bd493f220a76dcf183ce80 Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Thu, 28 Jul 2005 21:16:01 -0700 Subject: [PATCH] uml: fix TT mode by reverting "use fork instead of clone" With Paolo 'Blaisorblade' Giarrusso Revert the following patch, because of miscompilation problems in different environments leading to UML not working *at all* in TT mode; it was merged lately in 2.6 development cycle, a little after being written, and has caused problems to lots of people; I know it's a bit too long, but it shouldn't have been merged in first place, so I still apply for inclusion in the -stable tree. Anyone using this feature currently is either using some older kernel (some reports even used 2.6.12-rc4-mm2) or using this patch, as included in my -bs patchset. For now there's not yet a fix for this patch, so for now the best thing is to drop it (which was widely reported to give a working kernel, and as such was even merged in -stable tree). "Convert the boot-time host ptrace testing from clone to fork. They were essentially doing fork anyway. This cleans up the code a bit, and makes valgrind a bit happier about grinding it." URL: http://www.kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=98fdffccea6cc3fe9dba32c0fcc310bcb5d71529 Signed-off-by: Jeff Dike Signed-off-by: Paolo 'Blaisorblade' Giarrusso Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/kernel/process.c | 48 +++++++++++++++++++++++++++++------------------- 1 file changed, 29 insertions(+), 19 deletions(-) (limited to 'arch/um/kernel') diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 8b01a5584e80..67acd92c5322 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -131,7 +131,7 @@ int start_fork_tramp(void *thread_arg, unsigned long temp_stack, return(arg.pid); } -static int ptrace_child(void) +static int ptrace_child(void *arg) { int ret; int pid = os_getpid(), ppid = getppid(); @@ -160,16 +160,20 @@ static int ptrace_child(void) _exit(ret); } -static int start_ptraced_child(void) +static int start_ptraced_child(void **stack_out) { + void *stack; + unsigned long sp; int pid, n, status; - pid = fork(); - if(pid == 0) - ptrace_child(); - + stack = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if(stack == MAP_FAILED) + panic("check_ptrace : mmap failed, errno = %d", errno); + sp = (unsigned long) stack + PAGE_SIZE - sizeof(void *); + pid = clone(ptrace_child, (void *) sp, SIGCHLD, NULL); if(pid < 0) - panic("check_ptrace : fork failed, errno = %d", errno); + panic("check_ptrace : clone failed, errno = %d", errno); CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); if(n < 0) panic("check_ptrace : wait failed, errno = %d", errno); @@ -177,6 +181,7 @@ static int start_ptraced_child(void) panic("check_ptrace : expected SIGSTOP, got status = %d", status); + *stack_out = stack; return(pid); } @@ -184,12 +189,12 @@ static int start_ptraced_child(void) * just avoid using sysemu, not panic, but only if SYSEMU features are broken. * So only for SYSEMU features we test mustpanic, while normal host features * must work anyway!*/ -static int stop_ptraced_child(int pid, int exitcode, int mustexit) +static int stop_ptraced_child(int pid, void *stack, int exitcode, int mustpanic) { int status, n, ret = 0; if(ptrace(PTRACE_CONT, pid, 0, 0) < 0) - panic("stop_ptraced_child : ptrace failed, errno = %d", errno); + panic("check_ptrace : ptrace failed, errno = %d", errno); CATCH_EINTR(n = waitpid(pid, &status, 0)); if(!WIFEXITED(status) || (WEXITSTATUS(status) != exitcode)) { int exit_with = WEXITSTATUS(status); @@ -200,13 +205,15 @@ static int stop_ptraced_child(int pid, int exitcode, int mustexit) printk("check_ptrace : child exited with exitcode %d, while " "expecting %d; status 0x%x", exit_with, exitcode, status); - if (mustexit) + if (mustpanic) panic("\n"); else printk("\n"); ret = -1; } + if(munmap(stack, PAGE_SIZE) < 0) + panic("check_ptrace : munmap failed, errno = %d", errno); return ret; } @@ -242,11 +249,12 @@ __uml_setup("nosysemu", nosysemu_cmd_param, static void __init check_sysemu(void) { + void *stack; int pid, syscall, n, status, count=0; printk("Checking syscall emulation patch for ptrace..."); sysemu_supported = 0; - pid = start_ptraced_child(); + pid = start_ptraced_child(&stack); if(ptrace(PTRACE_SYSEMU, pid, 0, 0) < 0) goto fail; @@ -264,7 +272,7 @@ static void __init check_sysemu(void) panic("check_sysemu : failed to modify system " "call return, errno = %d", errno); - if (stop_ptraced_child(pid, 0, 0) < 0) + if (stop_ptraced_child(pid, stack, 0, 0) < 0) goto fail_stopped; sysemu_supported = 1; @@ -272,7 +280,7 @@ static void __init check_sysemu(void) set_using_sysemu(!force_sysemu_disabled); printk("Checking advanced syscall emulation patch for ptrace..."); - pid = start_ptraced_child(); + pid = start_ptraced_child(&stack); while(1){ count++; if(ptrace(PTRACE_SYSEMU_SINGLESTEP, pid, 0, 0) < 0) @@ -297,7 +305,7 @@ static void __init check_sysemu(void) break; } } - if (stop_ptraced_child(pid, 0, 0) < 0) + if (stop_ptraced_child(pid, stack, 0, 0) < 0) goto fail_stopped; sysemu_supported = 2; @@ -308,17 +316,18 @@ static void __init check_sysemu(void) return; fail: - stop_ptraced_child(pid, 1, 0); + stop_ptraced_child(pid, stack, 1, 0); fail_stopped: printk("missing\n"); } void __init check_ptrace(void) { + void *stack; int pid, syscall, n, status; printk("Checking that ptrace can change system call numbers..."); - pid = start_ptraced_child(); + pid = start_ptraced_child(&stack); if (ptrace(PTRACE_OLDSETOPTIONS, pid, 0, (void *)PTRACE_O_TRACESYSGOOD) < 0) panic("check_ptrace: PTRACE_SETOPTIONS failed, errno = %d", errno); @@ -345,7 +354,7 @@ void __init check_ptrace(void) break; } } - stop_ptraced_child(pid, 0, 1); + stop_ptraced_child(pid, stack, 0, 1); printk("OK\n"); check_sysemu(); } @@ -380,10 +389,11 @@ extern void *__syscall_stub_start, __syscall_stub_end; static inline void check_skas3_ptrace_support(void) { struct ptrace_faultinfo fi; + void *stack; int pid, n; printf("Checking for the skas3 patch in the host..."); - pid = start_ptraced_child(); + pid = start_ptraced_child(&stack); n = ptrace(PTRACE_FAULTINFO, pid, 0, &fi); if (n < 0) { @@ -402,7 +412,7 @@ static inline void check_skas3_ptrace_support(void) } init_registers(pid); - stop_ptraced_child(pid, 1, 1); + stop_ptraced_child(pid, stack, 1, 1); } int can_do_skas(void) -- cgit v1.2.3 From d9b7cc84afc7f4ebcef27691279d557e13128818 Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Thu, 28 Jul 2005 21:16:08 -0700 Subject: [PATCH] uml: Fix redundant assignment By this point, .is_user has already been set, so this assignment is useless. Signed-off-by: Jeff Dike Cc: Paolo 'Blaisorblade' Giarrusso Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/kernel/skas/trap_user.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/um/kernel') diff --git a/arch/um/kernel/skas/trap_user.c b/arch/um/kernel/skas/trap_user.c index 0dee1d95c806..9950a6716fe5 100644 --- a/arch/um/kernel/skas/trap_user.c +++ b/arch/um/kernel/skas/trap_user.c @@ -58,7 +58,6 @@ void user_signal(int sig, union uml_pt_regs *regs, int pid) int segv = ((sig == SIGFPE) || (sig == SIGSEGV) || (sig == SIGBUS) || (sig == SIGILL) || (sig == SIGTRAP)); - regs->skas.is_user = 1; if (segv) get_skas_faultinfo(pid, ®s->skas.faultinfo); info = &sig_info[sig]; -- cgit v1.2.3 From 7e1f49da6881bbf938e502d99335ad5488eb93b4 Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Thu, 28 Jul 2005 21:16:09 -0700 Subject: [PATCH] uml: Fix load average >=1 update_process_times was missing its irq_enter/irq_exit wrapper. This caused ksoftirqd to be scheduled on every clock tick. Signed-off-by: Jeff Dike Cc: Paolo 'Blaisorblade' Giarrusso Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/kernel/time_kern.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/um/kernel') diff --git a/arch/um/kernel/time_kern.c b/arch/um/kernel/time_kern.c index a8b4ef601f59..4e08f7545d63 100644 --- a/arch/um/kernel/time_kern.c +++ b/arch/um/kernel/time_kern.c @@ -137,7 +137,10 @@ long um_stime(int __user *tptr) void timer_handler(int sig, union uml_pt_regs *regs) { local_irq_disable(); - update_process_times(CHOOSE_MODE(user_context(UPT_SP(regs)), (regs)->skas.is_user)); + irq_enter(); + update_process_times(CHOOSE_MODE(user_context(UPT_SP(regs)), + (regs)->skas.is_user)); + irq_exit(); local_irq_enable(); if(current_thread->cpu == 0) timer_irq(regs); -- cgit v1.2.3 From 201134ca168c27c1f895ecfd74f9ebf1129483d0 Mon Sep 17 00:00:00 2001 From: Bodo Stroesser Date: Thu, 28 Jul 2005 21:16:11 -0700 Subject: [PATCH] uml: Fix typo Fix a typo in wait_stub_done. Signed-off-by: Bodo Stroesser Signed-off-by: Jeff Dike Cc: Paolo 'Blaisorblade' Giarrusso Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/kernel/skas/process.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/um/kernel') diff --git a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c index ba671dab8878..6dd9e5bf18ed 100644 --- a/arch/um/kernel/skas/process.c +++ b/arch/um/kernel/skas/process.c @@ -64,7 +64,7 @@ void wait_stub_done(int pid, int sig, char * fname) (WSTOPSIG(status) == SIGVTALRM)); if((n < 0) || !WIFSTOPPED(status) || - (WSTOPSIG(status) != SIGUSR1 && WSTOPSIG(status != SIGTRAP))){ + (WSTOPSIG(status) != SIGUSR1 && WSTOPSIG(status) != SIGTRAP)){ panic("%s : failed to wait for SIGUSR1/SIGTRAP, " "pid = %d, n = %d, errno = %d, status = 0x%x\n", fname, pid, n, errno, status); -- cgit v1.2.3 From 30f417c65e151dc96998a8ef721149a43998bc65 Mon Sep 17 00:00:00 2001 From: Christophe Lucas Date: Thu, 28 Jul 2005 21:16:12 -0700 Subject: [PATCH] uml: Clean up prink calls printk() calls should include appropriate KERN_* constant. Signed-off-by: Christophe Lucas Signed-off-by: Domen Puncer Signed-off-by: Jeff Dike Cc: Paolo 'Blaisorblade' Giarrusso Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/kernel/exitcode.c | 2 +- arch/um/kernel/process_kern.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/um/kernel') diff --git a/arch/um/kernel/exitcode.c b/arch/um/kernel/exitcode.c index 0ea87f24b36f..d21ebad666b4 100644 --- a/arch/um/kernel/exitcode.c +++ b/arch/um/kernel/exitcode.c @@ -48,7 +48,7 @@ static int make_proc_exitcode(void) ent = create_proc_entry("exitcode", 0600, &proc_root); if(ent == NULL){ - printk("make_proc_exitcode : Failed to register " + printk(KERN_WARNING "make_proc_exitcode : Failed to register " "/proc/exitcode\n"); return(0); } diff --git a/arch/um/kernel/process_kern.c b/arch/um/kernel/process_kern.c index d4036ed680bc..c23d8a08d0ff 100644 --- a/arch/um/kernel/process_kern.c +++ b/arch/um/kernel/process_kern.c @@ -412,7 +412,7 @@ int __init make_proc_sysemu(void) if (ent == NULL) { - printk("Failed to register /proc/sysemu\n"); + printk(KERN_WARNING "Failed to register /proc/sysemu\n"); return(0); } -- cgit v1.2.3 From 2eaa297ca234eb518673b28dd6f3715d4b292e09 Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Thu, 18 Aug 2005 11:24:25 -0700 Subject: [PATCH] uml: fix a crash under screen Running UML inside a detached screen delivers SIGWINCH when UML is not expecting it. This patch ignores them. Signed-off-by: Jeff Dike Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/kernel/skas/process.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/um/kernel') diff --git a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c index 6dd9e5bf18ed..f228f8b54194 100644 --- a/arch/um/kernel/skas/process.c +++ b/arch/um/kernel/skas/process.c @@ -61,7 +61,11 @@ void wait_stub_done(int pid, int sig, char * fname) CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); } while((n >= 0) && WIFSTOPPED(status) && - (WSTOPSIG(status) == SIGVTALRM)); + ((WSTOPSIG(status) == SIGVTALRM) || + /* running UML inside a detached screen can cause + * SIGWINCHes + */ + (WSTOPSIG(status) == SIGWINCH))); if((n < 0) || !WIFSTOPPED(status) || (WSTOPSIG(status) != SIGUSR1 && WSTOPSIG(status) != SIGTRAP)){ -- cgit v1.2.3 From 69be8f189653cd81aae5a74e26615b12871bb72e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 29 Aug 2005 11:44:09 -0400 Subject: [PATCH] convert signal handling of NODEFER to act like other Unix boxes. It has been reported that the way Linux handles NODEFER for signals is not consistent with the way other Unix boxes handle it. I've written a program to test the behavior of how this flag affects signals and had several reports from people who ran this on various Unix boxes, confirming that Linux seems to be unique on the way this is handled. The way NODEFER affects signals on other Unix boxes is as follows: 1) If NODEFER is set, other signals in sa_mask are still blocked. 2) If NODEFER is set and the signal is in sa_mask, then the signal is still blocked. (Note: this is the behavior of all tested but Linux _and_ NetBSD 2.0 *). The way NODEFER affects signals on Linux: 1) If NODEFER is set, other signals are _not_ blocked regardless of sa_mask (Even NetBSD doesn't do this). 2) If NODEFER is set and the signal is in sa_mask, then the signal being handled is not blocked. The patch converts signal handling in all current Linux architectures to the way most Unix boxes work. Unix boxes that were tested: DU4, AIX 5.2, Irix 6.5, NetBSD 2.0, SFU 3.5 on WinXP, AIX 5.3, Mac OSX, and of course Linux 2.6.13-rcX. * NetBSD was the only other Unix to behave like Linux on point #2. The main concern was brought up by point #1 which even NetBSD isn't like Linux. So with this patch, we leave NetBSD as the lonely one that behaves differently here with #2. Signed-off-by: Linus Torvalds --- arch/um/kernel/signal_kern.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/um/kernel') diff --git a/arch/um/kernel/signal_kern.c b/arch/um/kernel/signal_kern.c index 7807a3e8c426..03618bd13d55 100644 --- a/arch/um/kernel/signal_kern.c +++ b/arch/um/kernel/signal_kern.c @@ -87,12 +87,12 @@ static int handle_signal(struct pt_regs *regs, unsigned long signr, recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); force_sigsegv(signr, current); - } - else if(!(ka->sa.sa_flags & SA_NODEFER)){ + } else { spin_lock_irq(¤t->sighand->siglock); sigorsets(¤t->blocked, ¤t->blocked, &ka->sa.sa_mask); - sigaddset(¤t->blocked, signr); + if(!(ka->sa.sa_flags & SA_NODEFER)) + sigaddset(¤t->blocked, signr); recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); } -- cgit v1.2.3