summaryrefslogtreecommitdiff
path: root/arch/um/os-Linux
diff options
context:
space:
mode:
authorJeff Dike <jdike@addtoit.com>2008-02-08 04:22:07 -0800
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2008-02-08 09:22:42 -0800
commit536788fe2d28e11db6aeda74207d95d750fb761f (patch)
tree73df2d3a46c542c71d3a84c20c8fd1ce617386a3 /arch/um/os-Linux
parent2f569afd9ced9ebec9a6eb3dbf6f83429be0a7b4 (diff)
uml: runtime host VMSPLIT detection
Calculate TASK_SIZE at run-time by figuring out the host's VMSPLIT - this is needed on i386 if UML is to run on hosts with varying VMSPLITs without recompilation. TASK_SIZE is now defined in terms of a variable, task_size. This gets rid of an include of pgtable.h from processor.h, which can cause include loops. On i386, task_size is calculated early in boot by probing the address space in a binary search to figure out where the boundary between usable and non-usable memory is. This tries to make sure that a page that is considered to be in userspace is, or can be made, read-write. I'm concerned about a system-global VDSO page in kernel memory being hit and considered to be a userspace page. On x86_64, task_size is just the old value of CONFIG_TOP_ADDR. A bunch of config variable are gone now. CONFIG_TOP_ADDR is directly replaced by TASK_SIZE. NEST_LEVEL is gone since the relocation of the stubs makes it irrelevant. All the HOST_VMSPLIT stuff is gone. All references to these in arch/um/Makefile are also gone. I noticed and fixed a missing extern in os.h when adding os_get_task_size. Note: This has been revised to fix the 32-bit UML on 64-bit host bug that Miklos ran into. Signed-off-by: Jeff Dike <jdike@linux.intel.com> Cc: Miklos Szeredi <miklos@szeredi.hu> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'arch/um/os-Linux')
-rw-r--r--arch/um/os-Linux/sys-i386/Makefile2
-rw-r--r--arch/um/os-Linux/sys-i386/task_size.c120
-rw-r--r--arch/um/os-Linux/sys-x86_64/Makefile2
-rw-r--r--arch/um/os-Linux/sys-x86_64/task_size.c5
4 files changed, 127 insertions, 2 deletions
diff --git a/arch/um/os-Linux/sys-i386/Makefile b/arch/um/os-Linux/sys-i386/Makefile
index a841262c594a..b4bc6ac4f30b 100644
--- a/arch/um/os-Linux/sys-i386/Makefile
+++ b/arch/um/os-Linux/sys-i386/Makefile
@@ -3,7 +3,7 @@
# Licensed under the GPL
#
-obj-y = registers.o signal.o tls.o
+obj-y = registers.o signal.o task_size.o tls.o
USER_OBJS := $(obj-y)
diff --git a/arch/um/os-Linux/sys-i386/task_size.c b/arch/um/os-Linux/sys-i386/task_size.c
new file mode 100644
index 000000000000..48d211b3d9a1
--- /dev/null
+++ b/arch/um/os-Linux/sys-i386/task_size.c
@@ -0,0 +1,120 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <sys/mman.h>
+#include "longjmp.h"
+#include "kern_constants.h"
+
+static jmp_buf buf;
+
+static void segfault(int sig)
+{
+ longjmp(buf, 1);
+}
+
+static int page_ok(unsigned long page)
+{
+ unsigned long *address = (unsigned long *) (page << UM_KERN_PAGE_SHIFT);
+ unsigned long n = ~0UL;
+ void *mapped = NULL;
+ int ok = 0;
+
+ /*
+ * First see if the page is readable. If it is, it may still
+ * be a VDSO, so we go on to see if it's writable. If not
+ * then try mapping memory there. If that fails, then we're
+ * still in the kernel area. As a sanity check, we'll fail if
+ * the mmap succeeds, but gives us an address different from
+ * what we wanted.
+ */
+ if (setjmp(buf) == 0)
+ n = *address;
+ else {
+ mapped = mmap(address, UM_KERN_PAGE_SIZE,
+ PROT_READ | PROT_WRITE,
+ MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (mapped == MAP_FAILED)
+ return 0;
+ if (mapped != address)
+ goto out;
+ }
+
+ /*
+ * Now, is it writeable? If so, then we're in user address
+ * space. If not, then try mprotecting it and try the write
+ * again.
+ */
+ if (setjmp(buf) == 0) {
+ *address = n;
+ ok = 1;
+ goto out;
+ } else if (mprotect(address, UM_KERN_PAGE_SIZE,
+ PROT_READ | PROT_WRITE) != 0)
+ goto out;
+
+ if (setjmp(buf) == 0) {
+ *address = n;
+ ok = 1;
+ }
+
+ out:
+ if (mapped != NULL)
+ munmap(mapped, UM_KERN_PAGE_SIZE);
+ return ok;
+}
+
+unsigned long os_get_task_size(void)
+{
+ struct sigaction sa, old;
+ unsigned long bottom = 0;
+ /*
+ * A 32-bit UML on a 64-bit host gets confused about the VDSO at
+ * 0xffffe000. It is mapped, is readable, can be reprotected writeable
+ * and written. However, exec discovers later that it can't be
+ * unmapped. So, just set the highest address to be checked to just
+ * below it. This might waste some address space on 4G/4G 32-bit
+ * hosts, but shouldn't hurt otherwise.
+ */
+ unsigned long top = 0xffffd000 >> UM_KERN_PAGE_SHIFT;
+ unsigned long test;
+
+ printf("Locating the top of the address space ... ");
+ fflush(stdout);
+
+ /*
+ * We're going to be longjmping out of the signal handler, so
+ * SA_DEFER needs to be set.
+ */
+ sa.sa_handler = segfault;
+ sigemptyset(&sa.sa_mask);
+ sa.sa_flags = SA_NODEFER;
+ sigaction(SIGSEGV, &sa, &old);
+
+ if (!page_ok(bottom)) {
+ fprintf(stderr, "Address 0x%x no good?\n",
+ bottom << UM_KERN_PAGE_SHIFT);
+ exit(1);
+ }
+
+ /* This could happen with a 4G/4G split */
+ if (page_ok(top))
+ goto out;
+
+ do {
+ test = bottom + (top - bottom) / 2;
+ if (page_ok(test))
+ bottom = test;
+ else
+ top = test;
+ } while (top - bottom > 1);
+
+out:
+ /* Restore the old SIGSEGV handling */
+ sigaction(SIGSEGV, &old, NULL);
+
+ top <<= UM_KERN_PAGE_SHIFT;
+ printf("0x%x\n", top);
+ fflush(stdout);
+
+ return top;
+}
diff --git a/arch/um/os-Linux/sys-x86_64/Makefile b/arch/um/os-Linux/sys-x86_64/Makefile
index a42a4ef02e1e..a44a47f8f57b 100644
--- a/arch/um/os-Linux/sys-x86_64/Makefile
+++ b/arch/um/os-Linux/sys-x86_64/Makefile
@@ -3,7 +3,7 @@
# Licensed under the GPL
#
-obj-y = registers.o prctl.o signal.o
+obj-y = registers.o prctl.o signal.o task_size.o
USER_OBJS := $(obj-y)
diff --git a/arch/um/os-Linux/sys-x86_64/task_size.c b/arch/um/os-Linux/sys-x86_64/task_size.c
new file mode 100644
index 000000000000..fad6f57f8ee3
--- /dev/null
+++ b/arch/um/os-Linux/sys-x86_64/task_size.c
@@ -0,0 +1,5 @@
+unsigned long os_get_task_size(unsigned long shift)
+{
+ /* The old value of CONFIG_TOP_ADDR */
+ return 0x7fc0000000;
+}