diff options
author | Jeff Dike <jdike@addtoit.com> | 2008-02-08 04:22:07 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2008-02-08 09:22:42 -0800 |
commit | 536788fe2d28e11db6aeda74207d95d750fb761f (patch) | |
tree | 73df2d3a46c542c71d3a84c20c8fd1ce617386a3 /arch/um/os-Linux | |
parent | 2f569afd9ced9ebec9a6eb3dbf6f83429be0a7b4 (diff) |
uml: runtime host VMSPLIT detection
Calculate TASK_SIZE at run-time by figuring out the host's VMSPLIT - this is
needed on i386 if UML is to run on hosts with varying VMSPLITs without
recompilation.
TASK_SIZE is now defined in terms of a variable, task_size. This gets rid of
an include of pgtable.h from processor.h, which can cause include loops.
On i386, task_size is calculated early in boot by probing the address space in
a binary search to figure out where the boundary between usable and non-usable
memory is. This tries to make sure that a page that is considered to be in
userspace is, or can be made, read-write. I'm concerned about a system-global
VDSO page in kernel memory being hit and considered to be a userspace page.
On x86_64, task_size is just the old value of CONFIG_TOP_ADDR.
A bunch of config variable are gone now. CONFIG_TOP_ADDR is directly replaced
by TASK_SIZE. NEST_LEVEL is gone since the relocation of the stubs makes it
irrelevant. All the HOST_VMSPLIT stuff is gone. All references to these in
arch/um/Makefile are also gone.
I noticed and fixed a missing extern in os.h when adding os_get_task_size.
Note: This has been revised to fix the 32-bit UML on 64-bit host bug that
Miklos ran into.
Signed-off-by: Jeff Dike <jdike@linux.intel.com>
Cc: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'arch/um/os-Linux')
-rw-r--r-- | arch/um/os-Linux/sys-i386/Makefile | 2 | ||||
-rw-r--r-- | arch/um/os-Linux/sys-i386/task_size.c | 120 | ||||
-rw-r--r-- | arch/um/os-Linux/sys-x86_64/Makefile | 2 | ||||
-rw-r--r-- | arch/um/os-Linux/sys-x86_64/task_size.c | 5 |
4 files changed, 127 insertions, 2 deletions
diff --git a/arch/um/os-Linux/sys-i386/Makefile b/arch/um/os-Linux/sys-i386/Makefile index a841262c594a..b4bc6ac4f30b 100644 --- a/arch/um/os-Linux/sys-i386/Makefile +++ b/arch/um/os-Linux/sys-i386/Makefile @@ -3,7 +3,7 @@ # Licensed under the GPL # -obj-y = registers.o signal.o tls.o +obj-y = registers.o signal.o task_size.o tls.o USER_OBJS := $(obj-y) diff --git a/arch/um/os-Linux/sys-i386/task_size.c b/arch/um/os-Linux/sys-i386/task_size.c new file mode 100644 index 000000000000..48d211b3d9a1 --- /dev/null +++ b/arch/um/os-Linux/sys-i386/task_size.c @@ -0,0 +1,120 @@ +#include <stdio.h> +#include <stdlib.h> +#include <signal.h> +#include <sys/mman.h> +#include "longjmp.h" +#include "kern_constants.h" + +static jmp_buf buf; + +static void segfault(int sig) +{ + longjmp(buf, 1); +} + +static int page_ok(unsigned long page) +{ + unsigned long *address = (unsigned long *) (page << UM_KERN_PAGE_SHIFT); + unsigned long n = ~0UL; + void *mapped = NULL; + int ok = 0; + + /* + * First see if the page is readable. If it is, it may still + * be a VDSO, so we go on to see if it's writable. If not + * then try mapping memory there. If that fails, then we're + * still in the kernel area. As a sanity check, we'll fail if + * the mmap succeeds, but gives us an address different from + * what we wanted. + */ + if (setjmp(buf) == 0) + n = *address; + else { + mapped = mmap(address, UM_KERN_PAGE_SIZE, + PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (mapped == MAP_FAILED) + return 0; + if (mapped != address) + goto out; + } + + /* + * Now, is it writeable? If so, then we're in user address + * space. If not, then try mprotecting it and try the write + * again. + */ + if (setjmp(buf) == 0) { + *address = n; + ok = 1; + goto out; + } else if (mprotect(address, UM_KERN_PAGE_SIZE, + PROT_READ | PROT_WRITE) != 0) + goto out; + + if (setjmp(buf) == 0) { + *address = n; + ok = 1; + } + + out: + if (mapped != NULL) + munmap(mapped, UM_KERN_PAGE_SIZE); + return ok; +} + +unsigned long os_get_task_size(void) +{ + struct sigaction sa, old; + unsigned long bottom = 0; + /* + * A 32-bit UML on a 64-bit host gets confused about the VDSO at + * 0xffffe000. It is mapped, is readable, can be reprotected writeable + * and written. However, exec discovers later that it can't be + * unmapped. So, just set the highest address to be checked to just + * below it. This might waste some address space on 4G/4G 32-bit + * hosts, but shouldn't hurt otherwise. + */ + unsigned long top = 0xffffd000 >> UM_KERN_PAGE_SHIFT; + unsigned long test; + + printf("Locating the top of the address space ... "); + fflush(stdout); + + /* + * We're going to be longjmping out of the signal handler, so + * SA_DEFER needs to be set. + */ + sa.sa_handler = segfault; + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_NODEFER; + sigaction(SIGSEGV, &sa, &old); + + if (!page_ok(bottom)) { + fprintf(stderr, "Address 0x%x no good?\n", + bottom << UM_KERN_PAGE_SHIFT); + exit(1); + } + + /* This could happen with a 4G/4G split */ + if (page_ok(top)) + goto out; + + do { + test = bottom + (top - bottom) / 2; + if (page_ok(test)) + bottom = test; + else + top = test; + } while (top - bottom > 1); + +out: + /* Restore the old SIGSEGV handling */ + sigaction(SIGSEGV, &old, NULL); + + top <<= UM_KERN_PAGE_SHIFT; + printf("0x%x\n", top); + fflush(stdout); + + return top; +} diff --git a/arch/um/os-Linux/sys-x86_64/Makefile b/arch/um/os-Linux/sys-x86_64/Makefile index a42a4ef02e1e..a44a47f8f57b 100644 --- a/arch/um/os-Linux/sys-x86_64/Makefile +++ b/arch/um/os-Linux/sys-x86_64/Makefile @@ -3,7 +3,7 @@ # Licensed under the GPL # -obj-y = registers.o prctl.o signal.o +obj-y = registers.o prctl.o signal.o task_size.o USER_OBJS := $(obj-y) diff --git a/arch/um/os-Linux/sys-x86_64/task_size.c b/arch/um/os-Linux/sys-x86_64/task_size.c new file mode 100644 index 000000000000..fad6f57f8ee3 --- /dev/null +++ b/arch/um/os-Linux/sys-x86_64/task_size.c @@ -0,0 +1,5 @@ +unsigned long os_get_task_size(unsigned long shift) +{ + /* The old value of CONFIG_TOP_ADDR */ + return 0x7fc0000000; +} |