diff options
Diffstat (limited to 'arch/x86')
90 files changed, 6760 insertions, 0 deletions
diff --git a/arch/x86/Makefile.um b/arch/x86/Makefile.um new file mode 100644 index 000000000000..36ddec6a41c9 --- /dev/null +++ b/arch/x86/Makefile.um @@ -0,0 +1,61 @@ +core-y += arch/x86/crypto/ + +ifeq ($(CONFIG_X86_32),y) +START := 0x8048000 + +LDFLAGS += -m elf_i386 +ELF_ARCH := i386 +ELF_FORMAT := elf32-i386 +CHECKFLAGS += -D__i386__ + +ifeq ("$(origin SUBARCH)", "command line") +ifneq ("$(shell uname -m | sed -e s/i.86/i386/)", "$(SUBARCH)") +KBUILD_CFLAGS += $(call cc-option,-m32) +KBUILD_AFLAGS += $(call cc-option,-m32) +LINK-y += $(call cc-option,-m32) + +export LDFLAGS +endif +endif + +# First of all, tune CFLAGS for the specific CPU. This actually sets cflags-y. +include $(srctree)/arch/x86/Makefile_32.cpu + +# prevent gcc from keeping the stack 16 byte aligned. Taken from i386. +cflags-y += $(call cc-option,-mpreferred-stack-boundary=2) + +# Prevent sprintf in nfsd from being converted to strcpy and resulting in +# an unresolved reference. +cflags-y += -ffreestanding + +# Disable unit-at-a-time mode on pre-gcc-4.0 compilers, it makes gcc use +# a lot more stack due to the lack of sharing of stacklots. Also, gcc +# 4.3.0 needs -funit-at-a-time for extern inline functions. +KBUILD_CFLAGS += $(shell if [ $(call cc-version) -lt 0400 ] ; then \ + echo $(call cc-option,-fno-unit-at-a-time); \ + else echo $(call cc-option,-funit-at-a-time); fi ;) + +KBUILD_CFLAGS += $(cflags-y) + +else + +START := 0x60000000 + +KBUILD_CFLAGS += -fno-builtin -m64 + +CHECKFLAGS += -m64 -D__x86_64__ +KBUILD_AFLAGS += -m64 +LDFLAGS += -m elf_x86_64 +KBUILD_CPPFLAGS += -m64 + +ELF_ARCH := i386:x86-64 +ELF_FORMAT := elf64-x86-64 + +# Not on all 64-bit distros /lib is a symlink to /lib64. PLD is an example. + +LINK-$(CONFIG_LD_SCRIPT_DYN) += -Wl,-rpath,/lib64 +LINK-y += -m64 + +# Do unit-at-a-time unconditionally on x86_64, following the host +KBUILD_CFLAGS += $(call cc-option,-funit-at-a-time) +endif diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig new file mode 100644 index 000000000000..21bebe63df66 --- /dev/null +++ b/arch/x86/um/Kconfig @@ -0,0 +1,70 @@ +mainmenu "User Mode Linux/$SUBARCH $KERNELVERSION Kernel Configuration" + +source "arch/um/Kconfig.common" + +menu "UML-specific options" + +menu "Host processor type and features" + +config CMPXCHG_LOCAL + bool + default n + +config CMPXCHG_DOUBLE + bool + default n + +source "arch/x86/Kconfig.cpu" + +endmenu + +config UML_X86 + def_bool y + select GENERIC_FIND_FIRST_BIT + +config 64BIT + bool + default SUBARCH = "x86_64" + +config X86_32 + def_bool !64BIT + select HAVE_AOUT + +config X86_64 + def_bool 64BIT + +config RWSEM_XCHGADD_ALGORITHM + def_bool X86_XADD && 64BIT + +config RWSEM_GENERIC_SPINLOCK + def_bool !RWSEM_XCHGADD_ALGORITHM + +config 3_LEVEL_PGTABLES + bool "Three-level pagetables (EXPERIMENTAL)" if !64BIT + default 64BIT + depends on EXPERIMENTAL + help + Three-level pagetables will let UML have more than 4G of physical + memory. All the memory that can't be mapped directly will be treated + as high memory. + + However, this it experimental on 32-bit architectures, so if unsure say + N (on x86-64 it's automatically enabled, instead, as it's safe there). + +config ARCH_HAS_SC_SIGNALS + def_bool !64BIT + +config ARCH_REUSE_HOST_VSYSCALL_AREA + def_bool !64BIT + +config SMP_BROKEN + def_bool 64BIT + +config GENERIC_HWEIGHT + def_bool y + +source "arch/um/Kconfig.um" + +endmenu + +source "arch/um/Kconfig.rest" diff --git a/arch/x86/um/Makefile b/arch/x86/um/Makefile new file mode 100644 index 000000000000..df419896693f --- /dev/null +++ b/arch/x86/um/Makefile @@ -0,0 +1,45 @@ +# +# Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) +# + +ifeq ($(CONFIG_X86_32),y) + BITS := 32 +else + BITS := 64 +endif + +obj-y = bug.o bugs_$(BITS).o delay_$(BITS).o fault.o ksyms.o ldt.o \ + ptrace_$(BITS).o ptrace_user.o setjmp_$(BITS).o signal_$(BITS).o \ + stub_$(BITS).o stub_segv.o syscalls_$(BITS).o \ + sys_call_table_$(BITS).o sysrq_$(BITS).o tls_$(BITS).o \ + mem_$(BITS).o subarch.o os-$(OS)/ + +ifeq ($(CONFIG_X86_32),y) + +obj-y += checksum_32.o +obj-$(CONFIG_BINFMT_ELF) += elfcore.o + +subarch-y = ../lib/string_32.o ../lib/atomic64_32.o ../lib/atomic64_cx8_32.o +subarch-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += ../lib/rwsem.o +subarch-$(CONFIG_HIGHMEM) += ../mm/highmem_32.o + +else + +obj-y += vdso/ + +subarch-y = ../lib/csum-partial_64.o ../lib/memcpy_64.o ../lib/thunk_64.o \ + ../lib/rwsem.o + +endif + +subarch-$(CONFIG_MODULES) += ../kernel/module.o + +USER_OBJS := bugs_$(BITS).o ptrace_user.o fault.o + +extra-y += user-offsets.s +$(obj)/user-offsets.s: c_flags = -Wp,-MD,$(depfile) $(USER_CFLAGS) + +UNPROFILE_OBJS := stub_segv.o +CFLAGS_stub_segv.o := $(CFLAGS_NO_HARDENING) + +include arch/um/scripts/Makefile.rules diff --git a/arch/x86/um/asm/arch_hweight.h b/arch/x86/um/asm/arch_hweight.h new file mode 100644 index 000000000000..c656cf443f4a --- /dev/null +++ b/arch/x86/um/asm/arch_hweight.h @@ -0,0 +1,6 @@ +#ifndef _ASM_UM_HWEIGHT_H +#define _ASM_UM_HWEIGHT_H + +#include <asm-generic/bitops/arch_hweight.h> + +#endif diff --git a/arch/x86/um/asm/archparam.h b/arch/x86/um/asm/archparam.h new file mode 100644 index 000000000000..c17cf68dda0f --- /dev/null +++ b/arch/x86/um/asm/archparam.h @@ -0,0 +1,20 @@ +/* + * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) + * Copyright 2003 PathScale, Inc. + * Licensed under the GPL + */ + +#ifndef __UM_ARCHPARAM_H +#define __UM_ARCHPARAM_H + +#ifdef CONFIG_X86_32 + +#ifdef CONFIG_X86_PAE +#define LAST_PKMAP 512 +#else +#define LAST_PKMAP 1024 +#endif + +#endif + +#endif diff --git a/arch/x86/um/asm/checksum.h b/arch/x86/um/asm/checksum.h new file mode 100644 index 000000000000..b6efe2381b5d --- /dev/null +++ b/arch/x86/um/asm/checksum.h @@ -0,0 +1,10 @@ +#ifndef __UM_CHECKSUM_H +#define __UM_CHECKSUM_H + +#ifdef CONFIG_X86_32 +# include "checksum_32.h" +#else +# include "checksum_64.h" +#endif + +#endif diff --git a/arch/x86/um/asm/checksum_32.h b/arch/x86/um/asm/checksum_32.h new file mode 100644 index 000000000000..caab74252e27 --- /dev/null +++ b/arch/x86/um/asm/checksum_32.h @@ -0,0 +1,201 @@ +/* + * Licensed under the GPL + */ + +#ifndef __UM_SYSDEP_CHECKSUM_H +#define __UM_SYSDEP_CHECKSUM_H + +#include "linux/in6.h" +#include "linux/string.h" + +/* + * computes the checksum of a memory block at buff, length len, + * and adds in "sum" (32-bit) + * + * returns a 32-bit number suitable for feeding into itself + * or csum_tcpudp_magic + * + * this function must be called with even lengths, except + * for the last fragment, which may be odd + * + * it's best to have buff aligned on a 32-bit boundary + */ +__wsum csum_partial(const void *buff, int len, __wsum sum); + +/* + * Note: when you get a NULL pointer exception here this means someone + * passed in an incorrect kernel address to one of these functions. + * + * If you use these functions directly please don't forget the + * access_ok(). + */ + +static __inline__ +__wsum csum_partial_copy_nocheck(const void *src, void *dst, + int len, __wsum sum) +{ + memcpy(dst, src, len); + return csum_partial(dst, len, sum); +} + +/* + * the same as csum_partial, but copies from src while it + * checksums, and handles user-space pointer exceptions correctly, when needed. + * + * here even more important to align src and dst on a 32-bit (or even + * better 64-bit) boundary + */ + +static __inline__ +__wsum csum_partial_copy_from_user(const void __user *src, void *dst, + int len, __wsum sum, int *err_ptr) +{ + if (copy_from_user(dst, src, len)) { + *err_ptr = -EFAULT; + return (__force __wsum)-1; + } + + return csum_partial(dst, len, sum); +} + +/* + * This is a version of ip_compute_csum() optimized for IP headers, + * which always checksum on 4 octet boundaries. + * + * By Jorge Cwik <jorge@laser.satlink.net>, adapted for linux by + * Arnt Gulbrandsen. + */ +static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) +{ + unsigned int sum; + + __asm__ __volatile__( + "movl (%1), %0 ;\n" + "subl $4, %2 ;\n" + "jbe 2f ;\n" + "addl 4(%1), %0 ;\n" + "adcl 8(%1), %0 ;\n" + "adcl 12(%1), %0 ;\n" +"1: adcl 16(%1), %0 ;\n" + "lea 4(%1), %1 ;\n" + "decl %2 ;\n" + "jne 1b ;\n" + "adcl $0, %0 ;\n" + "movl %0, %2 ;\n" + "shrl $16, %0 ;\n" + "addw %w2, %w0 ;\n" + "adcl $0, %0 ;\n" + "notl %0 ;\n" +"2: ;\n" + /* Since the input registers which are loaded with iph and ipl + are modified, we must also specify them as outputs, or gcc + will assume they contain their original values. */ + : "=r" (sum), "=r" (iph), "=r" (ihl) + : "1" (iph), "2" (ihl) + : "memory"); + return (__force __sum16)sum; +} + +/* + * Fold a partial checksum + */ + +static inline __sum16 csum_fold(__wsum sum) +{ + __asm__( + "addl %1, %0 ;\n" + "adcl $0xffff, %0 ;\n" + : "=r" (sum) + : "r" ((__force u32)sum << 16), + "0" ((__force u32)sum & 0xffff0000) + ); + return (__force __sum16)(~(__force u32)sum >> 16); +} + +static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, + unsigned short len, + unsigned short proto, + __wsum sum) +{ + __asm__( + "addl %1, %0 ;\n" + "adcl %2, %0 ;\n" + "adcl %3, %0 ;\n" + "adcl $0, %0 ;\n" + : "=r" (sum) + : "g" (daddr), "g"(saddr), "g"((len + proto) << 8), "0"(sum)); + return sum; +} + +/* + * computes the checksum of the TCP/UDP pseudo-header + * returns a 16-bit checksum, already complemented + */ +static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, + unsigned short len, + unsigned short proto, + __wsum sum) +{ + return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum)); +} + +/* + * this routine is used for miscellaneous IP-like checksums, mainly + * in icmp.c + */ + +static inline __sum16 ip_compute_csum(const void *buff, int len) +{ + return csum_fold (csum_partial(buff, len, 0)); +} + +#define _HAVE_ARCH_IPV6_CSUM +static __inline__ __sum16 csum_ipv6_magic(const struct in6_addr *saddr, + const struct in6_addr *daddr, + __u32 len, unsigned short proto, + __wsum sum) +{ + __asm__( + "addl 0(%1), %0 ;\n" + "adcl 4(%1), %0 ;\n" + "adcl 8(%1), %0 ;\n" + "adcl 12(%1), %0 ;\n" + "adcl 0(%2), %0 ;\n" + "adcl 4(%2), %0 ;\n" + "adcl 8(%2), %0 ;\n" + "adcl 12(%2), %0 ;\n" + "adcl %3, %0 ;\n" + "adcl %4, %0 ;\n" + "adcl $0, %0 ;\n" + : "=&r" (sum) + : "r" (saddr), "r" (daddr), + "r"(htonl(len)), "r"(htonl(proto)), "0"(sum)); + + return csum_fold(sum); +} + +/* + * Copy and checksum to user + */ +#define HAVE_CSUM_COPY_USER +static __inline__ __wsum csum_and_copy_to_user(const void *src, + void __user *dst, + int len, __wsum sum, int *err_ptr) +{ + if (access_ok(VERIFY_WRITE, dst, len)) { + if (copy_to_user(dst, src, len)) { + *err_ptr = -EFAULT; + return (__force __wsum)-1; + } + + return csum_partial(src, len, sum); + } + + if (len) + *err_ptr = -EFAULT; + + return (__force __wsum)-1; /* invalid checksum */ +} + +#endif + diff --git a/arch/x86/um/asm/checksum_64.h b/arch/x86/um/asm/checksum_64.h new file mode 100644 index 000000000000..a5be9031ea85 --- /dev/null +++ b/arch/x86/um/asm/checksum_64.h @@ -0,0 +1,144 @@ +/* + * Licensed under the GPL + */ + +#ifndef __UM_SYSDEP_CHECKSUM_H +#define __UM_SYSDEP_CHECKSUM_H + +#include "linux/string.h" +#include "linux/in6.h" +#include "asm/uaccess.h" + +extern __wsum csum_partial(const void *buff, int len, __wsum sum); + +/* + * Note: when you get a NULL pointer exception here this means someone + * passed in an incorrect kernel address to one of these functions. + * + * If you use these functions directly please don't forget the + * access_ok(). + */ + +static __inline__ +__wsum csum_partial_copy_nocheck(const void *src, void *dst, + int len, __wsum sum) +{ + memcpy(dst, src, len); + return(csum_partial(dst, len, sum)); +} + +static __inline__ +__wsum csum_partial_copy_from_user(const void __user *src, + void *dst, int len, __wsum sum, + int *err_ptr) +{ + if (copy_from_user(dst, src, len)) { + *err_ptr = -EFAULT; + return (__force __wsum)-1; + } + return csum_partial(dst, len, sum); +} + +/** + * csum_fold - Fold and invert a 32bit checksum. + * sum: 32bit unfolded sum + * + * Fold a 32bit running checksum to 16bit and invert it. This is usually + * the last step before putting a checksum into a packet. + * Make sure not to mix with 64bit checksums. + */ +static inline __sum16 csum_fold(__wsum sum) +{ + __asm__( + " addl %1,%0\n" + " adcl $0xffff,%0" + : "=r" (sum) + : "r" ((__force u32)sum << 16), + "0" ((__force u32)sum & 0xffff0000) + ); + return (__force __sum16)(~(__force u32)sum >> 16); +} + +/** + * csum_tcpup_nofold - Compute an IPv4 pseudo header checksum. + * @saddr: source address + * @daddr: destination address + * @len: length of packet + * @proto: ip protocol of packet + * @sum: initial sum to be added in (32bit unfolded) + * + * Returns the pseudo header checksum the input data. Result is + * 32bit unfolded. + */ +static inline __wsum +csum_tcpudp_nofold(__be32 saddr, __be32 daddr, unsigned short len, + unsigned short proto, __wsum sum) +{ + asm(" addl %1, %0\n" + " adcl %2, %0\n" + " adcl %3, %0\n" + " adcl $0, %0\n" + : "=r" (sum) + : "g" (daddr), "g" (saddr), "g" ((len + proto) << 8), "0" (sum)); + return sum; +} + +/* + * computes the checksum of the TCP/UDP pseudo-header + * returns a 16-bit checksum, already complemented + */ +static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, + unsigned short len, + unsigned short proto, + __wsum sum) +{ + return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum)); +} + +/** + * ip_fast_csum - Compute the IPv4 header checksum efficiently. + * iph: ipv4 header + * ihl: length of header / 4 + */ +static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) +{ + unsigned int sum; + + asm( " movl (%1), %0\n" + " subl $4, %2\n" + " jbe 2f\n" + " addl 4(%1), %0\n" + " adcl 8(%1), %0\n" + " adcl 12(%1), %0\n" + "1: adcl 16(%1), %0\n" + " lea 4(%1), %1\n" + " decl %2\n" + " jne 1b\n" + " adcl $0, %0\n" + " movl %0, %2\n" + " shrl $16, %0\n" + " addw %w2, %w0\n" + " adcl $0, %0\n" + " notl %0\n" + "2:" + /* Since the input registers which are loaded with iph and ipl + are modified, we must also specify them as outputs, or gcc + will assume they contain their original values. */ + : "=r" (sum), "=r" (iph), "=r" (ihl) + : "1" (iph), "2" (ihl) + : "memory"); + return (__force __sum16)sum; +} + +static inline unsigned add32_with_carry(unsigned a, unsigned b) +{ + asm("addl %2,%0\n\t" + "adcl $0,%0" + : "=r" (a) + : "0" (a), "r" (b)); + return a; +} + +extern __sum16 ip_compute_csum(const void *buff, int len); + +#endif diff --git a/arch/x86/um/asm/elf.h b/arch/x86/um/asm/elf.h new file mode 100644 index 000000000000..f3b0633b69a1 --- /dev/null +++ b/arch/x86/um/asm/elf.h @@ -0,0 +1,221 @@ +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ +#ifndef __UM_ELF_X86_H +#define __UM_ELF_X86_H + +#include <asm/user.h> +#include "skas.h" + +#ifdef CONFIG_X86_32 + +#define R_386_NONE 0 +#define R_386_32 1 +#define R_386_PC32 2 +#define R_386_GOT32 3 +#define R_386_PLT32 4 +#define R_386_COPY 5 +#define R_386_GLOB_DAT 6 +#define R_386_JMP_SLOT 7 +#define R_386_RELATIVE 8 +#define R_386_GOTOFF 9 +#define R_386_GOTPC 10 +#define R_386_NUM 11 + +/* + * This is used to ensure we don't load something for the wrong architecture. + */ +#define elf_check_arch(x) \ + (((x)->e_machine == EM_386) || ((x)->e_machine == EM_486)) + +#define ELF_CLASS ELFCLASS32 +#define ELF_DATA ELFDATA2LSB +#define ELF_ARCH EM_386 + +#define ELF_PLAT_INIT(regs, load_addr) do { \ + PT_REGS_EBX(regs) = 0; \ + PT_REGS_ECX(regs) = 0; \ + PT_REGS_EDX(regs) = 0; \ + PT_REGS_ESI(regs) = 0; \ + PT_REGS_EDI(regs) = 0; \ + PT_REGS_EBP(regs) = 0; \ + PT_REGS_EAX(regs) = 0; \ +} while (0) + +/* Shamelessly stolen from include/asm-i386/elf.h */ + +#define ELF_CORE_COPY_REGS(pr_reg, regs) do { \ + pr_reg[0] = PT_REGS_EBX(regs); \ + pr_reg[1] = PT_REGS_ECX(regs); \ + pr_reg[2] = PT_REGS_EDX(regs); \ + pr_reg[3] = PT_REGS_ESI(regs); \ + pr_reg[4] = PT_REGS_EDI(regs); \ + pr_reg[5] = PT_REGS_EBP(regs); \ + pr_reg[6] = PT_REGS_EAX(regs); \ + pr_reg[7] = PT_REGS_DS(regs); \ + pr_reg[8] = PT_REGS_ES(regs); \ + /* fake once used fs and gs selectors? */ \ + pr_reg[9] = PT_REGS_DS(regs); \ + pr_reg[10] = PT_REGS_DS(regs); \ + pr_reg[11] = PT_REGS_SYSCALL_NR(regs); \ + pr_reg[12] = PT_REGS_IP(regs); \ + pr_reg[13] = PT_REGS_CS(regs); \ + pr_reg[14] = PT_REGS_EFLAGS(regs); \ + pr_reg[15] = PT_REGS_SP(regs); \ + pr_reg[16] = PT_REGS_SS(regs); \ +} while (0); + +extern char * elf_aux_platform; +#define ELF_PLATFORM (elf_aux_platform) + +extern unsigned long vsyscall_ehdr; +extern unsigned long vsyscall_end; +extern unsigned long __kernel_vsyscall; + +/* + * This is the range that is readable by user mode, and things + * acting like user mode such as get_user_pages. + */ +#define FIXADDR_USER_START vsyscall_ehdr +#define FIXADDR_USER_END vsyscall_end + + +/* + * Architecture-neutral AT_ values in 0-17, leave some room + * for more of them, start the x86-specific ones at 32. + */ +#define AT_SYSINFO 32 +#define AT_SYSINFO_EHDR 33 + +#define ARCH_DLINFO \ +do { \ + if ( vsyscall_ehdr ) { \ + NEW_AUX_ENT(AT_SYSINFO, __kernel_vsyscall); \ + NEW_AUX_ENT(AT_SYSINFO_EHDR, vsyscall_ehdr); \ + } \ +} while (0) + +#else + +/* x86-64 relocation types, taken from asm-x86_64/elf.h */ +#define R_X86_64_NONE 0 /* No reloc */ +#define R_X86_64_64 1 /* Direct 64 bit */ +#define R_X86_64_PC32 2 /* PC relative 32 bit signed */ +#define R_X86_64_GOT32 3 /* 32 bit GOT entry */ +#define R_X86_64_PLT32 4 /* 32 bit PLT address */ +#define R_X86_64_COPY 5 /* Copy symbol at runtime */ +#define R_X86_64_GLOB_DAT 6 /* Create GOT entry */ +#define R_X86_64_JUMP_SLOT 7 /* Create PLT entry */ +#define R_X86_64_RELATIVE 8 /* Adjust by program base */ +#define R_X86_64_GOTPCREL 9 /* 32 bit signed pc relative + offset to GOT */ +#define R_X86_64_32 10 /* Direct 32 bit zero extended */ +#define R_X86_64_32S 11 /* Direct 32 bit sign extended */ +#define R_X86_64_16 12 /* Direct 16 bit zero extended */ +#define R_X86_64_PC16 13 /* 16 bit sign extended pc relative */ +#define R_X86_64_8 14 /* Direct 8 bit sign extended */ +#define R_X86_64_PC8 15 /* 8 bit sign extended pc relative */ + +#define R_X86_64_NUM 16 + +/* + * This is used to ensure we don't load something for the wrong architecture. + */ +#define elf_check_arch(x) \ + ((x)->e_machine == EM_X86_64) + +#define ELF_CLASS ELFCLASS64 +#define ELF_DATA ELFDATA2LSB +#define ELF_ARCH EM_X86_64 + +#define ELF_PLAT_INIT(regs, load_addr) do { \ + PT_REGS_RBX(regs) = 0; \ + PT_REGS_RCX(regs) = 0; \ + PT_REGS_RDX(regs) = 0; \ + PT_REGS_RSI(regs) = 0; \ + PT_REGS_RDI(regs) = 0; \ + PT_REGS_RBP(regs) = 0; \ + PT_REGS_RAX(regs) = 0; \ + PT_REGS_R8(regs) = 0; \ + PT_REGS_R9(regs) = 0; \ + PT_REGS_R10(regs) = 0; \ + PT_REGS_R11(regs) = 0; \ + PT_REGS_R12(regs) = 0; \ + PT_REGS_R13(regs) = 0; \ + PT_REGS_R14(regs) = 0; \ + PT_REGS_R15(regs) = 0; \ +} while (0) + +#define ELF_CORE_COPY_REGS(pr_reg, _regs) \ + (pr_reg)[0] = (_regs)->regs.gp[0]; \ + (pr_reg)[1] = (_regs)->regs.gp[1]; \ + (pr_reg)[2] = (_regs)->regs.gp[2]; \ + (pr_reg)[3] = (_regs)->regs.gp[3]; \ + (pr_reg)[4] = (_regs)->regs.gp[4]; \ + (pr_reg)[5] = (_regs)->regs.gp[5]; \ + (pr_reg)[6] = (_regs)->regs.gp[6]; \ + (pr_reg)[7] = (_regs)->regs.gp[7]; \ + (pr_reg)[8] = (_regs)->regs.gp[8]; \ + (pr_reg)[9] = (_regs)->regs.gp[9]; \ + (pr_reg)[10] = (_regs)->regs.gp[10]; \ + (pr_reg)[11] = (_regs)->regs.gp[11]; \ + (pr_reg)[12] = (_regs)->regs.gp[12]; \ + (pr_reg)[13] = (_regs)->regs.gp[13]; \ + (pr_reg)[14] = (_regs)->regs.gp[14]; \ + (pr_reg)[15] = (_regs)->regs.gp[15]; \ + (pr_reg)[16] = (_regs)->regs.gp[16]; \ + (pr_reg)[17] = (_regs)->regs.gp[17]; \ + (pr_reg)[18] = (_regs)->regs.gp[18]; \ + (pr_reg)[19] = (_regs)->regs.gp[19]; \ + (pr_reg)[20] = (_regs)->regs.gp[20]; \ + (pr_reg)[21] = current->thread.arch.fs; \ + (pr_reg)[22] = 0; \ + (pr_reg)[23] = 0; \ + (pr_reg)[24] = 0; \ + (pr_reg)[25] = 0; \ + (pr_reg)[26] = 0; + +#define ELF_PLATFORM "x86_64" + +/* No user-accessible fixmap addresses, i.e. vsyscall */ +#define FIXADDR_USER_START 0 +#define FIXADDR_USER_END 0 + +#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 +struct linux_binprm; +extern int arch_setup_additional_pages(struct linux_binprm *bprm, + int uses_interp); + +extern unsigned long um_vdso_addr; +#define AT_SYSINFO_EHDR 33 +#define ARCH_DLINFO NEW_AUX_ENT(AT_SYSINFO_EHDR, um_vdso_addr) + +#endif + +typedef unsigned long elf_greg_t; + +#define ELF_NGREG (sizeof (struct user_regs_struct) / sizeof(elf_greg_t)) +typedef elf_greg_t elf_gregset_t[ELF_NGREG]; + +typedef struct user_i387_struct elf_fpregset_t; + +#define task_pt_regs(t) (&(t)->thread.regs) + +struct task_struct; + +extern int elf_core_copy_fpregs(struct task_struct *t, elf_fpregset_t *fpu); + +#define ELF_CORE_COPY_FPREGS(t, fpu) elf_core_copy_fpregs(t, fpu) + +#define ELF_EXEC_PAGESIZE 4096 + +#define ELF_ET_DYN_BASE (2 * TASK_SIZE / 3) + +extern long elf_aux_hwcap; +#define ELF_HWCAP (elf_aux_hwcap) + +#define SET_PERSONALITY(ex) do ; while(0) +#define __HAVE_ARCH_GATE_AREA 1 + +#endif diff --git a/arch/x86/um/asm/module.h b/arch/x86/um/asm/module.h new file mode 100644 index 000000000000..61af80e932eb --- /dev/null +++ b/arch/x86/um/asm/module.h @@ -0,0 +1,23 @@ +#ifndef __UM_MODULE_H +#define __UM_MODULE_H + +/* UML is simple */ +struct mod_arch_specific +{ +}; + +#ifdef CONFIG_X86_32 + +#define Elf_Shdr Elf32_Shdr +#define Elf_Sym Elf32_Sym +#define Elf_Ehdr Elf32_Ehdr + +#else + +#define Elf_Shdr Elf64_Shdr +#define Elf_Sym Elf64_Sym +#define Elf_Ehdr Elf64_Ehdr + +#endif + +#endif diff --git a/arch/x86/um/asm/processor.h b/arch/x86/um/asm/processor.h new file mode 100644 index 000000000000..d3ac1cecf0f4 --- /dev/null +++ b/arch/x86/um/asm/processor.h @@ -0,0 +1,15 @@ +#ifndef __UM_PROCESSOR_H +#define __UM_PROCESSOR_H + +/* include faultinfo structure */ +#include <sysdep/faultinfo.h> + +#ifdef CONFIG_X86_32 +# include "processor_32.h" +#else +# include "processor_64.h" +#endif + +#include <asm/processor-generic.h> + +#endif diff --git a/arch/x86/um/asm/processor_32.h b/arch/x86/um/asm/processor_32.h new file mode 100644 index 000000000000..ae0d189aafcf --- /dev/null +++ b/arch/x86/um/asm/processor_32.h @@ -0,0 +1,73 @@ +/* + * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __UM_PROCESSOR_I386_H +#define __UM_PROCESSOR_I386_H + +#include <linux/string.h> +#include <sysdep/host_ldt.h> +#include <asm/segment.h> + +extern int host_has_cmov; + +struct uml_tls_struct { + struct user_desc tls; + unsigned flushed:1; + unsigned present:1; +}; + +struct arch_thread { + struct uml_tls_struct tls_array[GDT_ENTRY_TLS_ENTRIES]; + unsigned long debugregs[8]; + int debugregs_seq; + struct faultinfo faultinfo; +}; + +#define INIT_ARCH_THREAD { \ + .tls_array = { [ 0 ... GDT_ENTRY_TLS_ENTRIES - 1 ] = \ + { .present = 0, .flushed = 0 } }, \ + .debugregs = { [ 0 ... 7 ] = 0 }, \ + .debugregs_seq = 0, \ + .faultinfo = { 0, 0, 0 } \ +} + +static inline void arch_flush_thread(struct arch_thread *thread) +{ + /* Clear any TLS still hanging */ + memset(&thread->tls_array, 0, sizeof(thread->tls_array)); +} + +static inline void arch_copy_thread(struct arch_thread *from, + struct arch_thread *to) +{ + memcpy(&to->tls_array, &from->tls_array, sizeof(from->tls_array)); +} + +#include <asm/user.h> + +/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */ +static inline void rep_nop(void) +{ + __asm__ __volatile__("rep;nop": : :"memory"); +} + +#define cpu_relax() rep_nop() + +/* + * Default implementation of macro that returns current + * instruction pointer ("program counter"). Stolen + * from asm-i386/processor.h + */ +#define current_text_addr() \ + ({ void *pc; __asm__("movl $1f,%0\n1:":"=g" (pc)); pc; }) + +#define ARCH_IS_STACKGROW(address) \ + (address + 32 >= UPT_SP(¤t->thread.regs.regs)) + +#define KSTK_EIP(tsk) KSTK_REG(tsk, EIP) +#define KSTK_ESP(tsk) KSTK_REG(tsk, UESP) +#define KSTK_EBP(tsk) KSTK_REG(tsk, EBP) + +#endif diff --git a/arch/x86/um/asm/processor_64.h b/arch/x86/um/asm/processor_64.h new file mode 100644 index 000000000000..6db812b24f48 --- /dev/null +++ b/arch/x86/um/asm/processor_64.h @@ -0,0 +1,51 @@ +/* + * Copyright 2003 PathScale, Inc. + * + * Licensed under the GPL + */ + +#ifndef __UM_PROCESSOR_X86_64_H +#define __UM_PROCESSOR_X86_64_H + +struct arch_thread { + unsigned long debugregs[8]; + int debugregs_seq; + unsigned long fs; + struct faultinfo faultinfo; +}; + +/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */ +static inline void rep_nop(void) +{ + __asm__ __volatile__("rep;nop": : :"memory"); +} + +#define cpu_relax() rep_nop() + +#define INIT_ARCH_THREAD { .debugregs = { [ 0 ... 7 ] = 0 }, \ + .debugregs_seq = 0, \ + .fs = 0, \ + .faultinfo = { 0, 0, 0 } } + +static inline void arch_flush_thread(struct arch_thread *thread) +{ +} + +static inline void arch_copy_thread(struct arch_thread *from, + struct arch_thread *to) +{ + to->fs = from->fs; +} + +#include <asm/user.h> + +#define current_text_addr() \ + ({ void *pc; __asm__("movq $1f,%0\n1:":"=g" (pc)); pc; }) + +#define ARCH_IS_STACKGROW(address) \ + (address + 128 >= UPT_SP(¤t->thread.regs.regs)) + +#define KSTK_EIP(tsk) KSTK_REG(tsk, RIP) +#define KSTK_ESP(tsk) KSTK_REG(tsk, RSP) + +#endif diff --git a/arch/x86/um/asm/ptrace.h b/arch/x86/um/asm/ptrace.h new file mode 100644 index 000000000000..c8aca8c501b0 --- /dev/null +++ b/arch/x86/um/asm/ptrace.h @@ -0,0 +1,5 @@ +#ifdef CONFIG_X86_32 +# include "ptrace_32.h" +#else +# include "ptrace_64.h" +#endif diff --git a/arch/x86/um/asm/ptrace_32.h b/arch/x86/um/asm/ptrace_32.h new file mode 100644 index 000000000000..5d2a59112537 --- /dev/null +++ b/arch/x86/um/asm/ptrace_32.h @@ -0,0 +1,51 @@ +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#ifndef __UM_PTRACE_I386_H +#define __UM_PTRACE_I386_H + +#define HOST_AUDIT_ARCH AUDIT_ARCH_I386 + +#include "linux/compiler.h" +#include "asm/ptrace-generic.h" + +#define PT_REGS_EAX(r) UPT_EAX(&(r)->regs) +#define PT_REGS_EBX(r) UPT_EBX(&(r)->regs) +#define PT_REGS_ECX(r) UPT_ECX(&(r)->regs) +#define PT_REGS_EDX(r) UPT_EDX(&(r)->regs) +#define PT_REGS_ESI(r) UPT_ESI(&(r)->regs) +#define PT_REGS_EDI(r) UPT_EDI(&(r)->regs) +#define PT_REGS_EBP(r) UPT_EBP(&(r)->regs) + +#define PT_REGS_CS(r) UPT_CS(&(r)->regs) +#define PT_REGS_SS(r) UPT_SS(&(r)->regs) +#define PT_REGS_DS(r) UPT_DS(&(r)->regs) +#define PT_REGS_ES(r) UPT_ES(&(r)->regs) +#define PT_REGS_FS(r) UPT_FS(&(r)->regs) +#define PT_REGS_GS(r) UPT_GS(&(r)->regs) + +#define PT_REGS_EFLAGS(r) UPT_EFLAGS(&(r)->regs) + +#define PT_REGS_ORIG_SYSCALL(r) PT_REGS_EAX(r) +#define PT_REGS_SYSCALL_RET(r) PT_REGS_EAX(r) +#define PT_FIX_EXEC_STACK(sp) do ; while(0) + +#define profile_pc(regs) PT_REGS_IP(regs) + +#define user_mode(r) UPT_IS_USER(&(r)->regs) + +/* + * Forward declaration to avoid including sysdep/tls.h, which causes a + * circular include, and compilation failures. + */ +struct user_desc; + +extern int ptrace_get_thread_area(struct task_struct *child, int idx, + struct user_desc __user *user_desc); + +extern int ptrace_set_thread_area(struct task_struct *child, int idx, + struct user_desc __user *user_desc); + +#endif diff --git a/arch/x86/um/asm/ptrace_64.h b/arch/x86/um/asm/ptrace_64.h new file mode 100644 index 000000000000..83d8c473b905 --- /dev/null +++ b/arch/x86/um/asm/ptrace_64.h @@ -0,0 +1,72 @@ +/* + * Copyright 2003 PathScale, Inc. + * + * Licensed under the GPL + */ + +#ifndef __UM_PTRACE_X86_64_H +#define __UM_PTRACE_X86_64_H + +#include "linux/compiler.h" +#include "asm/errno.h" + +#define __FRAME_OFFSETS /* Needed to get the R* macros */ +#include "asm/ptrace-generic.h" + +#define HOST_AUDIT_ARCH AUDIT_ARCH_X86_64 + +#define PT_REGS_RBX(r) UPT_RBX(&(r)->regs) +#define PT_REGS_RCX(r) UPT_RCX(&(r)->regs) +#define PT_REGS_RDX(r) UPT_RDX(&(r)->regs) +#define PT_REGS_RSI(r) UPT_RSI(&(r)->regs) +#define PT_REGS_RDI(r) UPT_RDI(&(r)->regs) +#define PT_REGS_RBP(r) UPT_RBP(&(r)->regs) +#define PT_REGS_RAX(r) UPT_RAX(&(r)->regs) +#define PT_REGS_R8(r) UPT_R8(&(r)->regs) +#define PT_REGS_R9(r) UPT_R9(&(r)->regs) +#define PT_REGS_R10(r) UPT_R10(&(r)->regs) +#define PT_REGS_R11(r) UPT_R11(&(r)->regs) +#define PT_REGS_R12(r) UPT_R12(&(r)->regs) +#define PT_REGS_R13(r) UPT_R13(&(r)->regs) +#define PT_REGS_R14(r) UPT_R14(&(r)->regs) +#define PT_REGS_R15(r) UPT_R15(&(r)->regs) + +#define PT_REGS_FS(r) UPT_FS(&(r)->regs) +#define PT_REGS_GS(r) UPT_GS(&(r)->regs) +#define PT_REGS_DS(r) UPT_DS(&(r)->regs) +#define PT_REGS_ES(r) UPT_ES(&(r)->regs) +#define PT_REGS_SS(r) UPT_SS(&(r)->regs) +#define PT_REGS_CS(r) UPT_CS(&(r)->regs) + +#define PT_REGS_ORIG_RAX(r) UPT_ORIG_RAX(&(r)->regs) +#define PT_REGS_RIP(r) UPT_IP(&(r)->regs) +#define PT_REGS_RSP(r) UPT_SP(&(r)->regs) + +#define PT_REGS_EFLAGS(r) UPT_EFLAGS(&(r)->regs) + +/* XXX */ +#define user_mode(r) UPT_IS_USER(&(r)->regs) +#define PT_REGS_ORIG_SYSCALL(r) PT_REGS_RAX(r) +#define PT_REGS_SYSCALL_RET(r) PT_REGS_RAX(r) + +#define PT_FIX_EXEC_STACK(sp) do ; while(0) + +#define profile_pc(regs) PT_REGS_IP(regs) + +struct user_desc; + +static inline int ptrace_get_thread_area(struct task_struct *child, int idx, + struct user_desc __user *user_desc) +{ + return -ENOSYS; +} + +static inline int ptrace_set_thread_area(struct task_struct *child, int idx, + struct user_desc __user *user_desc) +{ + return -ENOSYS; +} + +extern long arch_prctl(struct task_struct *task, int code, + unsigned long __user *addr); +#endif diff --git a/arch/x86/um/asm/system.h b/arch/x86/um/asm/system.h new file mode 100644 index 000000000000..a89113bc74f2 --- /dev/null +++ b/arch/x86/um/asm/system.h @@ -0,0 +1,133 @@ +#ifndef _ASM_X86_SYSTEM_H_ +#define _ASM_X86_SYSTEM_H_ + +#include <asm/asm.h> +#include <asm/segment.h> +#include <asm/cpufeature.h> +#include <asm/cmpxchg.h> +#include <asm/nops.h> +#include <asm/system-um.h> + +#include <linux/kernel.h> +#include <linux/irqflags.h> + +/* entries in ARCH_DLINFO: */ +#ifdef CONFIG_IA32_EMULATION +# define AT_VECTOR_SIZE_ARCH 2 +#else +# define AT_VECTOR_SIZE_ARCH 1 +#endif + +extern unsigned long arch_align_stack(unsigned long sp); + +void default_idle(void); + +/* + * Force strict CPU ordering. + * And yes, this is required on UP too when we're talking + * to devices. + */ +#ifdef CONFIG_X86_32 +/* + * Some non-Intel clones support out of order store. wmb() ceases to be a + * nop for these. + */ +#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2) +#define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2) +#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM) +#else +#define mb() asm volatile("mfence":::"memory") +#define rmb() asm volatile("lfence":::"memory") +#define wmb() asm volatile("sfence" ::: "memory") +#endif + +/** + * read_barrier_depends - Flush all pending reads that subsequents reads + * depend on. + * + * No data-dependent reads from memory-like regions are ever reordered + * over this barrier. All reads preceding this primitive are guaranteed + * to access memory (but not necessarily other CPUs' caches) before any + * reads following this primitive that depend on the data return by + * any of the preceding reads. This primitive is much lighter weight than + * rmb() on most CPUs, and is never heavier weight than is + * rmb(). + * + * These ordering constraints are respected by both the local CPU + * and the compiler. + * + * Ordering is not guaranteed by anything other than these primitives, + * not even by data dependencies. See the documentation for + * memory_barrier() for examples and URLs to more information. + * + * For example, the following code would force ordering (the initial + * value of "a" is zero, "b" is one, and "p" is "&a"): + * + * <programlisting> + * CPU 0 CPU 1 + * + * b = 2; + * memory_barrier(); + * p = &b; q = p; + * read_barrier_depends(); + * d = *q; + * </programlisting> + * + * because the read of "*q" depends on the read of "p" and these + * two reads are separated by a read_barrier_depends(). However, + * the following code, with the same initial values for "a" and "b": + * + * <programlisting> + * CPU 0 CPU 1 + * + * a = 2; + * memory_barrier(); + * b = 3; y = b; + * read_barrier_depends(); + * x = a; + * </programlisting> + * + * does not enforce ordering, since there is no data dependency between + * the read of "a" and the read of "b". Therefore, on some CPUs, such + * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb() + * in cases like this where there are no data dependencies. + **/ + +#define read_barrier_depends() do { } while (0) + +#ifdef CONFIG_SMP +#define smp_mb() mb() +#ifdef CONFIG_X86_PPRO_FENCE +# define smp_rmb() rmb() +#else +# define smp_rmb() barrier() +#endif +#ifdef CONFIG_X86_OOSTORE +# define smp_wmb() wmb() +#else +# define smp_wmb() barrier() +#endif +#define smp_read_barrier_depends() read_barrier_depends() +#define set_mb(var, value) do { (void)xchg(&var, value); } while (0) +#else +#define smp_mb() barrier() +#define smp_rmb() barrier() +#define smp_wmb() barrier() +#define smp_read_barrier_depends() do { } while (0) +#define set_mb(var, value) do { var = value; barrier(); } while (0) +#endif + +/* + * Stop RDTSC speculation. This is needed when you need to use RDTSC + * (or get_cycles or vread that possibly accesses the TSC) in a defined + * code region. + * + * (Could use an alternative three way for this if there was one.) + */ +static inline void rdtsc_barrier(void) +{ + alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC); + alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC); +} + +#endif diff --git a/arch/x86/um/asm/vm-flags.h b/arch/x86/um/asm/vm-flags.h new file mode 100644 index 000000000000..7c297e9e2413 --- /dev/null +++ b/arch/x86/um/asm/vm-flags.h @@ -0,0 +1,25 @@ +/* + * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com) + * Copyright 2003 PathScale, Inc. + * Licensed under the GPL + */ + +#ifndef __VM_FLAGS_X86_H +#define __VM_FLAGS_X86_H + +#ifdef CONFIG_X86_32 + +#define VM_DATA_DEFAULT_FLAGS \ + (VM_READ | VM_WRITE | \ + ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0 ) | \ + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) + +#else + +#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_STACK_DEFAULT_FLAGS (VM_GROWSDOWN | VM_READ | VM_WRITE | \ + VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) + +#endif +#endif diff --git a/arch/x86/um/bug.c b/arch/x86/um/bug.c new file mode 100644 index 000000000000..e8034e363d83 --- /dev/null +++ b/arch/x86/um/bug.c @@ -0,0 +1,21 @@ +/* + * Copyright (C) 2006 Jeff Dike (jdike@addtoit.com) + * Licensed under the GPL V2 + */ + +#include <linux/uaccess.h> + +/* + * Mostly copied from i386/x86_86 - eliminated the eip < PAGE_OFFSET because + * that's not relevant in skas mode. + */ + +int is_valid_bugaddr(unsigned long eip) +{ + unsigned short ud2; + + if (probe_kernel_address((unsigned short __user *)eip, ud2)) + return 0; + + return ud2 == 0x0b0f; +} diff --git a/arch/x86/um/bugs_32.c b/arch/x86/um/bugs_32.c new file mode 100644 index 000000000000..7058e1fa903b --- /dev/null +++ b/arch/x86/um/bugs_32.c @@ -0,0 +1,74 @@ +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <signal.h> +#include "kern_util.h" +#include "longjmp.h" +#include "task.h" +#include "sysdep/ptrace.h" + +/* Set during early boot */ +static int host_has_cmov = 1; +static jmp_buf cmov_test_return; + +static void cmov_sigill_test_handler(int sig) +{ + host_has_cmov = 0; + longjmp(cmov_test_return, 1); +} + +void arch_check_bugs(void) +{ + struct sigaction old, new; + + printk(UM_KERN_INFO "Checking for host processor cmov support..."); + new.sa_handler = cmov_sigill_test_handler; + + /* Make sure that SIGILL is enabled after the handler longjmps back */ + new.sa_flags = SA_NODEFER; + sigemptyset(&new.sa_mask); + sigaction(SIGILL, &new, &old); + + if (setjmp(cmov_test_return) == 0) { + unsigned long foo = 0; + __asm__ __volatile__("cmovz %0, %1" : "=r" (foo) : "0" (foo)); + printk(UM_KERN_CONT "Yes\n"); + } else + printk(UM_KERN_CONT "No\n"); + + sigaction(SIGILL, &old, &new); +} + +void arch_examine_signal(int sig, struct uml_pt_regs *regs) +{ + unsigned char tmp[2]; + + /* + * This is testing for a cmov (0x0f 0x4x) instruction causing a + * SIGILL in init. + */ + if ((sig != SIGILL) || (TASK_PID(get_current()) != 1)) + return; + + if (copy_from_user_proc(tmp, (void *) UPT_IP(regs), 2)) { + printk(UM_KERN_ERR "SIGILL in init, could not read " + "instructions!\n"); + return; + } + + if ((tmp[0] != 0x0f) || ((tmp[1] & 0xf0) != 0x40)) + return; + + if (host_has_cmov == 0) + printk(UM_KERN_ERR "SIGILL caused by cmov, which this " + "processor doesn't implement. Boot a filesystem " + "compiled for older processors"); + else if (host_has_cmov == 1) + printk(UM_KERN_ERR "SIGILL caused by cmov, which this " + "processor claims to implement"); + else + printk(UM_KERN_ERR "Bad value for host_has_cmov (%d)", + host_has_cmov); +} diff --git a/arch/x86/um/bugs_64.c b/arch/x86/um/bugs_64.c new file mode 100644 index 000000000000..44e02ba2a265 --- /dev/null +++ b/arch/x86/um/bugs_64.c @@ -0,0 +1,15 @@ +/* + * Copyright 2003 PathScale, Inc. + * + * Licensed under the GPL + */ + +#include "sysdep/ptrace.h" + +void arch_check_bugs(void) +{ +} + +void arch_examine_signal(int sig, struct uml_pt_regs *regs) +{ +} diff --git a/arch/x86/um/checksum_32.S b/arch/x86/um/checksum_32.S new file mode 100644 index 000000000000..f058d2f82e18 --- /dev/null +++ b/arch/x86/um/checksum_32.S @@ -0,0 +1,458 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * IP/TCP/UDP checksumming routines + * + * Authors: Jorge Cwik, <jorge@laser.satlink.net> + * Arnt Gulbrandsen, <agulbra@nvg.unit.no> + * Tom May, <ftom@netcom.com> + * Pentium Pro/II routines: + * Alexander Kjeldaas <astor@guardian.no> + * Finn Arne Gangstad <finnag@guardian.no> + * Lots of code moved from tcp.c and ip.c; see those files + * for more names. + * + * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception + * handling. + * Andi Kleen, add zeroing on error + * converted to pure assembler + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <asm/errno.h> + +/* + * computes a partial checksum, e.g. for TCP/UDP fragments + */ + +/* +unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum) + */ + +.text +.align 4 +.globl csum_partial + +#ifndef CONFIG_X86_USE_PPRO_CHECKSUM + + /* + * Experiments with Ethernet and SLIP connections show that buff + * is aligned on either a 2-byte or 4-byte boundary. We get at + * least a twofold speedup on 486 and Pentium if it is 4-byte aligned. + * Fortunately, it is easy to convert 2-byte alignment to 4-byte + * alignment for the unrolled loop. + */ +csum_partial: + pushl %esi + pushl %ebx + movl 20(%esp),%eax # Function arg: unsigned int sum + movl 16(%esp),%ecx # Function arg: int len + movl 12(%esp),%esi # Function arg: unsigned char *buff + testl $2, %esi # Check alignment. + jz 2f # Jump if alignment is ok. + subl $2, %ecx # Alignment uses up two bytes. + jae 1f # Jump if we had at least two bytes. + addl $2, %ecx # ecx was < 2. Deal with it. + jmp 4f +1: movw (%esi), %bx + addl $2, %esi + addw %bx, %ax + adcl $0, %eax +2: + movl %ecx, %edx + shrl $5, %ecx + jz 2f + testl %esi, %esi +1: movl (%esi), %ebx + adcl %ebx, %eax + movl 4(%esi), %ebx + adcl %ebx, %eax + movl 8(%esi), %ebx + adcl %ebx, %eax + movl 12(%esi), %ebx + adcl %ebx, %eax + movl 16(%esi), %ebx + adcl %ebx, %eax + movl 20(%esi), %ebx + adcl %ebx, %eax + movl 24(%esi), %ebx + adcl %ebx, %eax + movl 28(%esi), %ebx + adcl %ebx, %eax + lea 32(%esi), %esi + dec %ecx + jne 1b + adcl $0, %eax +2: movl %edx, %ecx + andl $0x1c, %edx + je 4f + shrl $2, %edx # This clears CF +3: adcl (%esi), %eax + lea 4(%esi), %esi + dec %edx + jne 3b + adcl $0, %eax +4: andl $3, %ecx + jz 7f + cmpl $2, %ecx + jb 5f + movw (%esi),%cx + leal 2(%esi),%esi + je 6f + shll $16,%ecx +5: movb (%esi),%cl +6: addl %ecx,%eax + adcl $0, %eax +7: + popl %ebx + popl %esi + ret + +#else + +/* Version for PentiumII/PPro */ + +csum_partial: + pushl %esi + pushl %ebx + movl 20(%esp),%eax # Function arg: unsigned int sum + movl 16(%esp),%ecx # Function arg: int len + movl 12(%esp),%esi # Function arg: const unsigned char *buf + + testl $2, %esi + jnz 30f +10: + movl %ecx, %edx + movl %ecx, %ebx + andl $0x7c, %ebx + shrl $7, %ecx + addl %ebx,%esi + shrl $2, %ebx + negl %ebx + lea 45f(%ebx,%ebx,2), %ebx + testl %esi, %esi + jmp *%ebx + + # Handle 2-byte-aligned regions +20: addw (%esi), %ax + lea 2(%esi), %esi + adcl $0, %eax + jmp 10b + +30: subl $2, %ecx + ja 20b + je 32f + movzbl (%esi),%ebx # csumming 1 byte, 2-aligned + addl %ebx, %eax + adcl $0, %eax + jmp 80f +32: + addw (%esi), %ax # csumming 2 bytes, 2-aligned + adcl $0, %eax + jmp 80f + +40: + addl -128(%esi), %eax + adcl -124(%esi), %eax + adcl -120(%esi), %eax + adcl -116(%esi), %eax + adcl -112(%esi), %eax + adcl -108(%esi), %eax + adcl -104(%esi), %eax + adcl -100(%esi), %eax + adcl -96(%esi), %eax + adcl -92(%esi), %eax + adcl -88(%esi), %eax + adcl -84(%esi), %eax + adcl -80(%esi), %eax + adcl -76(%esi), %eax + adcl -72(%esi), %eax + adcl -68(%esi), %eax + adcl -64(%esi), %eax + adcl -60(%esi), %eax + adcl -56(%esi), %eax + adcl -52(%esi), %eax + adcl -48(%esi), %eax + adcl -44(%esi), %eax + adcl -40(%esi), %eax + adcl -36(%esi), %eax + adcl -32(%esi), %eax + adcl -28(%esi), %eax + adcl -24(%esi), %eax + adcl -20(%esi), %eax + adcl -16(%esi), %eax + adcl -12(%esi), %eax + adcl -8(%esi), %eax + adcl -4(%esi), %eax +45: + lea 128(%esi), %esi + adcl $0, %eax + dec %ecx + jge 40b + movl %edx, %ecx +50: andl $3, %ecx + jz 80f + + # Handle the last 1-3 bytes without jumping + notl %ecx # 1->2, 2->1, 3->0, higher bits are masked + movl $0xffffff,%ebx # by the shll and shrl instructions + shll $3,%ecx + shrl %cl,%ebx + andl -128(%esi),%ebx # esi is 4-aligned so should be ok + addl %ebx,%eax + adcl $0,%eax +80: + popl %ebx + popl %esi + ret + +#endif + +/* +unsigned int csum_partial_copy_generic (const char *src, char *dst, + int len, int sum, int *src_err_ptr, int *dst_err_ptr) + */ + +/* + * Copy from ds while checksumming, otherwise like csum_partial + * + * The macros SRC and DST specify the type of access for the instruction. + * thus we can call a custom exception handler for all access types. + * + * FIXME: could someone double-check whether I haven't mixed up some SRC and + * DST definitions? It's damn hard to trigger all cases. I hope I got + * them all but there's no guarantee. + */ + +#define SRC(y...) \ + 9999: y; \ + .section __ex_table, "a"; \ + .long 9999b, 6001f ; \ + .previous + +#define DST(y...) \ + 9999: y; \ + .section __ex_table, "a"; \ + .long 9999b, 6002f ; \ + .previous + +.align 4 + +#ifndef CONFIG_X86_USE_PPRO_CHECKSUM + +#define ARGBASE 16 +#define FP 12 + +csum_partial_copy_generic_i386: + subl $4,%esp + pushl %edi + pushl %esi + pushl %ebx + movl ARGBASE+16(%esp),%eax # sum + movl ARGBASE+12(%esp),%ecx # len + movl ARGBASE+4(%esp),%esi # src + movl ARGBASE+8(%esp),%edi # dst + + testl $2, %edi # Check alignment. + jz 2f # Jump if alignment is ok. + subl $2, %ecx # Alignment uses up two bytes. + jae 1f # Jump if we had at least two bytes. + addl $2, %ecx # ecx was < 2. Deal with it. + jmp 4f +SRC(1: movw (%esi), %bx ) + addl $2, %esi +DST( movw %bx, (%edi) ) + addl $2, %edi + addw %bx, %ax + adcl $0, %eax +2: + movl %ecx, FP(%esp) + shrl $5, %ecx + jz 2f + testl %esi, %esi +SRC(1: movl (%esi), %ebx ) +SRC( movl 4(%esi), %edx ) + adcl %ebx, %eax +DST( movl %ebx, (%edi) ) + adcl %edx, %eax +DST( movl %edx, 4(%edi) ) + +SRC( movl 8(%esi), %ebx ) +SRC( movl 12(%esi), %edx ) + adcl %ebx, %eax +DST( movl %ebx, 8(%edi) ) + adcl %edx, %eax +DST( movl %edx, 12(%edi) ) + +SRC( movl 16(%esi), %ebx ) +SRC( movl 20(%esi), %edx ) + adcl %ebx, %eax +DST( movl %ebx, 16(%edi) ) + adcl %edx, %eax +DST( movl %edx, 20(%edi) ) + +SRC( movl 24(%esi), %ebx ) +SRC( movl 28(%esi), %edx ) + adcl %ebx, %eax +DST( movl %ebx, 24(%edi) ) + adcl %edx, %eax +DST( movl %edx, 28(%edi) ) + + lea 32(%esi), %esi + lea 32(%edi), %edi + dec %ecx + jne 1b + adcl $0, %eax +2: movl FP(%esp), %edx + movl %edx, %ecx + andl $0x1c, %edx + je 4f + shrl $2, %edx # This clears CF +SRC(3: movl (%esi), %ebx ) + adcl %ebx, %eax +DST( movl %ebx, (%edi) ) + lea 4(%esi), %esi + lea 4(%edi), %edi + dec %edx + jne 3b + adcl $0, %eax +4: andl $3, %ecx + jz 7f + cmpl $2, %ecx + jb 5f +SRC( movw (%esi), %cx ) + leal 2(%esi), %esi +DST( movw %cx, (%edi) ) + leal 2(%edi), %edi + je 6f + shll $16,%ecx +SRC(5: movb (%esi), %cl ) +DST( movb %cl, (%edi) ) +6: addl %ecx, %eax + adcl $0, %eax +7: +5000: + +# Exception handler: +.section .fixup, "ax" + +6001: + movl ARGBASE+20(%esp), %ebx # src_err_ptr + movl $-EFAULT, (%ebx) + + # zero the complete destination - computing the rest + # is too much work + movl ARGBASE+8(%esp), %edi # dst + movl ARGBASE+12(%esp), %ecx # len + xorl %eax,%eax + rep ; stosb + + jmp 5000b + +6002: + movl ARGBASE+24(%esp), %ebx # dst_err_ptr + movl $-EFAULT,(%ebx) + jmp 5000b + +.previous + + popl %ebx + popl %esi + popl %edi + popl %ecx # equivalent to addl $4,%esp + ret + +#else + +/* Version for PentiumII/PPro */ + +#define ROUND1(x) \ + SRC(movl x(%esi), %ebx ) ; \ + addl %ebx, %eax ; \ + DST(movl %ebx, x(%edi) ) ; + +#define ROUND(x) \ + SRC(movl x(%esi), %ebx ) ; \ + adcl %ebx, %eax ; \ + DST(movl %ebx, x(%edi) ) ; + +#define ARGBASE 12 + +csum_partial_copy_generic_i386: + pushl %ebx + pushl %edi + pushl %esi + movl ARGBASE+4(%esp),%esi #src + movl ARGBASE+8(%esp),%edi #dst + movl ARGBASE+12(%esp),%ecx #len + movl ARGBASE+16(%esp),%eax #sum +# movl %ecx, %edx + movl %ecx, %ebx + movl %esi, %edx + shrl $6, %ecx + andl $0x3c, %ebx + negl %ebx + subl %ebx, %esi + subl %ebx, %edi + lea -1(%esi),%edx + andl $-32,%edx + lea 3f(%ebx,%ebx), %ebx + testl %esi, %esi + jmp *%ebx +1: addl $64,%esi + addl $64,%edi + SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl) + ROUND1(-64) ROUND(-60) ROUND(-56) ROUND(-52) + ROUND (-48) ROUND(-44) ROUND(-40) ROUND(-36) + ROUND (-32) ROUND(-28) ROUND(-24) ROUND(-20) + ROUND (-16) ROUND(-12) ROUND(-8) ROUND(-4) +3: adcl $0,%eax + addl $64, %edx + dec %ecx + jge 1b +4: movl ARGBASE+12(%esp),%edx #len + andl $3, %edx + jz 7f + cmpl $2, %edx + jb 5f +SRC( movw (%esi), %dx ) + leal 2(%esi), %esi +DST( movw %dx, (%edi) ) + leal 2(%edi), %edi + je 6f + shll $16,%edx +5: +SRC( movb (%esi), %dl ) +DST( movb %dl, (%edi) ) +6: addl %edx, %eax + adcl $0, %eax +7: +.section .fixup, "ax" +6001: movl ARGBASE+20(%esp), %ebx # src_err_ptr + movl $-EFAULT, (%ebx) + # zero the complete destination (computing the rest is too much work) + movl ARGBASE+8(%esp),%edi # dst + movl ARGBASE+12(%esp),%ecx # len + xorl %eax,%eax + rep; stosb + jmp 7b +6002: movl ARGBASE+24(%esp), %ebx # dst_err_ptr + movl $-EFAULT, (%ebx) + jmp 7b +.previous + + popl %esi + popl %edi + popl %ebx + ret + +#undef ROUND +#undef ROUND1 + +#endif diff --git a/arch/x86/um/delay_32.c b/arch/x86/um/delay_32.c new file mode 100644 index 000000000000..f3fe1a688f7e --- /dev/null +++ b/arch/x86/um/delay_32.c @@ -0,0 +1,60 @@ +/* + * Copyright (C) 2011 Richard Weinberger <richrd@nod.at> + * Mostly copied from arch/x86/lib/delay.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/delay.h> +#include <asm/param.h> + +void __delay(unsigned long loops) +{ + asm volatile( + "test %0,%0\n" + "jz 3f\n" + "jmp 1f\n" + + ".align 16\n" + "1: jmp 2f\n" + + ".align 16\n" + "2: dec %0\n" + " jnz 2b\n" + "3: dec %0\n" + + : /* we don't need output */ + : "a" (loops) + ); +} +EXPORT_SYMBOL(__delay); + +inline void __const_udelay(unsigned long xloops) +{ + int d0; + + xloops *= 4; + asm("mull %%edx" + : "=d" (xloops), "=&a" (d0) + : "1" (xloops), "0" + (loops_per_jiffy * (HZ/4))); + + __delay(++xloops); +} +EXPORT_SYMBOL(__const_udelay); + +void __udelay(unsigned long usecs) +{ + __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */ +} +EXPORT_SYMBOL(__udelay); + +void __ndelay(unsigned long nsecs) +{ + __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */ +} +EXPORT_SYMBOL(__ndelay); diff --git a/arch/x86/um/delay_64.c b/arch/x86/um/delay_64.c new file mode 100644 index 000000000000..f3fe1a688f7e --- /dev/null +++ b/arch/x86/um/delay_64.c @@ -0,0 +1,60 @@ +/* + * Copyright (C) 2011 Richard Weinberger <richrd@nod.at> + * Mostly copied from arch/x86/lib/delay.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/delay.h> +#include <asm/param.h> + +void __delay(unsigned long loops) +{ + asm volatile( + "test %0,%0\n" + "jz 3f\n" + "jmp 1f\n" + + ".align 16\n" + "1: jmp 2f\n" + + ".align 16\n" + "2: dec %0\n" + " jnz 2b\n" + "3: dec %0\n" + + : /* we don't need output */ + : "a" (loops) + ); +} +EXPORT_SYMBOL(__delay); + +inline void __const_udelay(unsigned long xloops) +{ + int d0; + + xloops *= 4; + asm("mull %%edx" + : "=d" (xloops), "=&a" (d0) + : "1" (xloops), "0" + (loops_per_jiffy * (HZ/4))); + + __delay(++xloops); +} +EXPORT_SYMBOL(__const_udelay); + +void __udelay(unsigned long usecs) +{ + __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */ +} +EXPORT_SYMBOL(__udelay); + +void __ndelay(unsigned long nsecs) +{ + __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */ +} +EXPORT_SYMBOL(__ndelay); diff --git a/arch/x86/um/elfcore.c b/arch/x86/um/elfcore.c new file mode 100644 index 000000000000..6bb49b687c97 --- /dev/null +++ b/arch/x86/um/elfcore.c @@ -0,0 +1,83 @@ +#include <linux/elf.h> +#include <linux/coredump.h> +#include <linux/fs.h> +#include <linux/mm.h> + +#include <asm/elf.h> + + +Elf32_Half elf_core_extra_phdrs(void) +{ + return vsyscall_ehdr ? (((struct elfhdr *)vsyscall_ehdr)->e_phnum) : 0; +} + +int elf_core_write_extra_phdrs(struct file *file, loff_t offset, size_t *size, + unsigned long limit) +{ + if ( vsyscall_ehdr ) { + const struct elfhdr *const ehdrp = + (struct elfhdr *) vsyscall_ehdr; + const struct elf_phdr *const phdrp = + (const struct elf_phdr *) (vsyscall_ehdr + ehdrp->e_phoff); + int i; + Elf32_Off ofs = 0; + + for (i = 0; i < ehdrp->e_phnum; ++i) { + struct elf_phdr phdr = phdrp[i]; + + if (phdr.p_type == PT_LOAD) { + ofs = phdr.p_offset = offset; + offset += phdr.p_filesz; + } else { + phdr.p_offset += ofs; + } + phdr.p_paddr = 0; /* match other core phdrs */ + *size += sizeof(phdr); + if (*size > limit + || !dump_write(file, &phdr, sizeof(phdr))) + return 0; + } + } + return 1; +} + +int elf_core_write_extra_data(struct file *file, size_t *size, + unsigned long limit) +{ + if ( vsyscall_ehdr ) { + const struct elfhdr *const ehdrp = + (struct elfhdr *) vsyscall_ehdr; + const struct elf_phdr *const phdrp = + (const struct elf_phdr *) (vsyscall_ehdr + ehdrp->e_phoff); + int i; + + for (i = 0; i < ehdrp->e_phnum; ++i) { + if (phdrp[i].p_type == PT_LOAD) { + void *addr = (void *) phdrp[i].p_vaddr; + size_t filesz = phdrp[i].p_filesz; + + *size += filesz; + if (*size > limit + || !dump_write(file, addr, filesz)) + return 0; + } + } + } + return 1; +} + +size_t elf_core_extra_data_size(void) +{ + if ( vsyscall_ehdr ) { + const struct elfhdr *const ehdrp = + (struct elfhdr *)vsyscall_ehdr; + const struct elf_phdr *const phdrp = + (const struct elf_phdr *) (vsyscall_ehdr + ehdrp->e_phoff); + int i; + + for (i = 0; i < ehdrp->e_phnum; ++i) + if (phdrp[i].p_type == PT_LOAD) + return (size_t) phdrp[i].p_filesz; + } + return 0; +} diff --git a/arch/x86/um/fault.c b/arch/x86/um/fault.c new file mode 100644 index 000000000000..d670f68532f4 --- /dev/null +++ b/arch/x86/um/fault.c @@ -0,0 +1,28 @@ +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include "sysdep/ptrace.h" + +/* These two are from asm-um/uaccess.h and linux/module.h, check them. */ +struct exception_table_entry +{ + unsigned long insn; + unsigned long fixup; +}; + +const struct exception_table_entry *search_exception_tables(unsigned long add); + +/* Compare this to arch/i386/mm/extable.c:fixup_exception() */ +int arch_fixup(unsigned long address, struct uml_pt_regs *regs) +{ + const struct exception_table_entry *fixup; + + fixup = search_exception_tables(address); + if (fixup != 0) { + UPT_IP(regs) = fixup->fixup; + return 1; + } + return 0; +} diff --git a/arch/x86/um/ksyms.c b/arch/x86/um/ksyms.c new file mode 100644 index 000000000000..2e8f43ec6214 --- /dev/null +++ b/arch/x86/um/ksyms.c @@ -0,0 +1,13 @@ +#include <linux/module.h> +#include <asm/string.h> +#include <asm/checksum.h> + +#ifndef CONFIG_X86_32 +/*XXX: we need them because they would be exported by x86_64 */ +#if (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) || __GNUC__ > 4 +EXPORT_SYMBOL(memcpy); +#else +EXPORT_SYMBOL(__memcpy); +#endif +#endif +EXPORT_SYMBOL(csum_partial); diff --git a/arch/x86/um/ldt.c b/arch/x86/um/ldt.c new file mode 100644 index 000000000000..3f2bf208d884 --- /dev/null +++ b/arch/x86/um/ldt.c @@ -0,0 +1,502 @@ +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <asm/unistd.h> +#include "os.h" +#include "proc_mm.h" +#include "skas.h" +#include "skas_ptrace.h" +#include "sysdep/tls.h" + +extern int modify_ldt(int func, void *ptr, unsigned long bytecount); + +static long write_ldt_entry(struct mm_id *mm_idp, int func, + struct user_desc *desc, void **addr, int done) +{ + long res; + + if (proc_mm) { + /* + * This is a special handling for the case, that the mm to + * modify isn't current->active_mm. + * If this is called directly by modify_ldt, + * (current->active_mm->context.skas.u == mm_idp) + * will be true. So no call to __switch_mm(mm_idp) is done. + * If this is called in case of init_new_ldt or PTRACE_LDT, + * mm_idp won't belong to current->active_mm, but child->mm. + * So we need to switch child's mm into our userspace, then + * later switch back. + * + * Note: I'm unsure: should interrupts be disabled here? + */ + if (!current->active_mm || current->active_mm == &init_mm || + mm_idp != ¤t->active_mm->context.id) + __switch_mm(mm_idp); + } + + if (ptrace_ldt) { + struct ptrace_ldt ldt_op = (struct ptrace_ldt) { + .func = func, + .ptr = desc, + .bytecount = sizeof(*desc)}; + u32 cpu; + int pid; + + if (!proc_mm) + pid = mm_idp->u.pid; + else { + cpu = get_cpu(); + pid = userspace_pid[cpu]; + } + + res = os_ptrace_ldt(pid, 0, (unsigned long) &ldt_op); + + if (proc_mm) + put_cpu(); + } + else { + void *stub_addr; + res = syscall_stub_data(mm_idp, (unsigned long *)desc, + (sizeof(*desc) + sizeof(long) - 1) & + ~(sizeof(long) - 1), + addr, &stub_addr); + if (!res) { + unsigned long args[] = { func, + (unsigned long)stub_addr, + sizeof(*desc), + 0, 0, 0 }; + res = run_syscall_stub(mm_idp, __NR_modify_ldt, args, + 0, addr, done); + } + } + + if (proc_mm) { + /* + * This is the second part of special handling, that makes + * PTRACE_LDT possible to implement. + */ + if (current->active_mm && current->active_mm != &init_mm && + mm_idp != ¤t->active_mm->context.id) + __switch_mm(¤t->active_mm->context.id); + } + + return res; +} + +static long read_ldt_from_host(void __user * ptr, unsigned long bytecount) +{ + int res, n; + struct ptrace_ldt ptrace_ldt = (struct ptrace_ldt) { + .func = 0, + .bytecount = bytecount, + .ptr = kmalloc(bytecount, GFP_KERNEL)}; + u32 cpu; + + if (ptrace_ldt.ptr == NULL) + return -ENOMEM; + + /* + * This is called from sys_modify_ldt only, so userspace_pid gives + * us the right number + */ + + cpu = get_cpu(); + res = os_ptrace_ldt(userspace_pid[cpu], 0, (unsigned long) &ptrace_ldt); + put_cpu(); + if (res < 0) + goto out; + + n = copy_to_user(ptr, ptrace_ldt.ptr, res); + if (n != 0) + res = -EFAULT; + + out: + kfree(ptrace_ldt.ptr); + + return res; +} + +/* + * In skas mode, we hold our own ldt data in UML. + * Thus, the code implementing sys_modify_ldt_skas + * is very similar to (and mostly stolen from) sys_modify_ldt + * for arch/i386/kernel/ldt.c + * The routines copied and modified in part are: + * - read_ldt + * - read_default_ldt + * - write_ldt + * - sys_modify_ldt_skas + */ + +static int read_ldt(void __user * ptr, unsigned long bytecount) +{ + int i, err = 0; + unsigned long size; + uml_ldt_t * ldt = ¤t->mm->context.ldt; + + if (!ldt->entry_count) + goto out; + if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES) + bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES; + err = bytecount; + + if (ptrace_ldt) + return read_ldt_from_host(ptr, bytecount); + + mutex_lock(&ldt->lock); + if (ldt->entry_count <= LDT_DIRECT_ENTRIES) { + size = LDT_ENTRY_SIZE*LDT_DIRECT_ENTRIES; + if (size > bytecount) + size = bytecount; + if (copy_to_user(ptr, ldt->u.entries, size)) + err = -EFAULT; + bytecount -= size; + ptr += size; + } + else { + for (i=0; i<ldt->entry_count/LDT_ENTRIES_PER_PAGE && bytecount; + i++) { + size = PAGE_SIZE; + if (size > bytecount) + size = bytecount; + if (copy_to_user(ptr, ldt->u.pages[i], size)) { + err = -EFAULT; + break; + } + bytecount -= size; + ptr += size; + } + } + mutex_unlock(&ldt->lock); + + if (bytecount == 0 || err == -EFAULT) + goto out; + + if (clear_user(ptr, bytecount)) + err = -EFAULT; + +out: + return err; +} + +static int read_default_ldt(void __user * ptr, unsigned long bytecount) +{ + int err; + + if (bytecount > 5*LDT_ENTRY_SIZE) + bytecount = 5*LDT_ENTRY_SIZE; + + err = bytecount; + /* + * UML doesn't support lcall7 and lcall27. + * So, we don't really have a default ldt, but emulate + * an empty ldt of common host default ldt size. + */ + if (clear_user(ptr, bytecount)) + err = -EFAULT; + + return err; +} + +static int write_ldt(void __user * ptr, unsigned long bytecount, int func) +{ + uml_ldt_t * ldt = ¤t->mm->context.ldt; + struct mm_id * mm_idp = ¤t->mm->context.id; + int i, err; + struct user_desc ldt_info; + struct ldt_entry entry0, *ldt_p; + void *addr = NULL; + + err = -EINVAL; + if (bytecount != sizeof(ldt_info)) + goto out; + err = -EFAULT; + if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info))) + goto out; + + err = -EINVAL; + if (ldt_info.entry_number >= LDT_ENTRIES) + goto out; + if (ldt_info.contents == 3) { + if (func == 1) + goto out; + if (ldt_info.seg_not_present == 0) + goto out; + } + + if (!ptrace_ldt) + mutex_lock(&ldt->lock); + + err = write_ldt_entry(mm_idp, func, &ldt_info, &addr, 1); + if (err) + goto out_unlock; + else if (ptrace_ldt) { + /* With PTRACE_LDT available, this is used as a flag only */ + ldt->entry_count = 1; + goto out; + } + + if (ldt_info.entry_number >= ldt->entry_count && + ldt_info.entry_number >= LDT_DIRECT_ENTRIES) { + for (i=ldt->entry_count/LDT_ENTRIES_PER_PAGE; + i*LDT_ENTRIES_PER_PAGE <= ldt_info.entry_number; + i++) { + if (i == 0) + memcpy(&entry0, ldt->u.entries, + sizeof(entry0)); + ldt->u.pages[i] = (struct ldt_entry *) + __get_free_page(GFP_KERNEL|__GFP_ZERO); + if (!ldt->u.pages[i]) { + err = -ENOMEM; + /* Undo the change in host */ + memset(&ldt_info, 0, sizeof(ldt_info)); + write_ldt_entry(mm_idp, 1, &ldt_info, &addr, 1); + goto out_unlock; + } + if (i == 0) { + memcpy(ldt->u.pages[0], &entry0, + sizeof(entry0)); + memcpy(ldt->u.pages[0]+1, ldt->u.entries+1, + sizeof(entry0)*(LDT_DIRECT_ENTRIES-1)); + } + ldt->entry_count = (i + 1) * LDT_ENTRIES_PER_PAGE; + } + } + if (ldt->entry_count <= ldt_info.entry_number) + ldt->entry_count = ldt_info.entry_number + 1; + + if (ldt->entry_count <= LDT_DIRECT_ENTRIES) + ldt_p = ldt->u.entries + ldt_info.entry_number; + else + ldt_p = ldt->u.pages[ldt_info.entry_number/LDT_ENTRIES_PER_PAGE] + + ldt_info.entry_number%LDT_ENTRIES_PER_PAGE; + + if (ldt_info.base_addr == 0 && ldt_info.limit == 0 && + (func == 1 || LDT_empty(&ldt_info))) { + ldt_p->a = 0; + ldt_p->b = 0; + } + else{ + if (func == 1) + ldt_info.useable = 0; + ldt_p->a = LDT_entry_a(&ldt_info); + ldt_p->b = LDT_entry_b(&ldt_info); + } + err = 0; + +out_unlock: + mutex_unlock(&ldt->lock); +out: + return err; +} + +static long do_modify_ldt_skas(int func, void __user *ptr, + unsigned long bytecount) +{ + int ret = -ENOSYS; + + switch (func) { + case 0: + ret = read_ldt(ptr, bytecount); + break; + case 1: + case 0x11: + ret = write_ldt(ptr, bytecount, func); + break; + case 2: + ret = read_default_ldt(ptr, bytecount); + break; + } + return ret; +} + +static DEFINE_SPINLOCK(host_ldt_lock); +static short dummy_list[9] = {0, -1}; +static short * host_ldt_entries = NULL; + +static void ldt_get_host_info(void) +{ + long ret; + struct ldt_entry * ldt; + short *tmp; + int i, size, k, order; + + spin_lock(&host_ldt_lock); + + if (host_ldt_entries != NULL) { + spin_unlock(&host_ldt_lock); + return; + } + host_ldt_entries = dummy_list+1; + + spin_unlock(&host_ldt_lock); + + for (i = LDT_PAGES_MAX-1, order=0; i; i>>=1, order++) + ; + + ldt = (struct ldt_entry *) + __get_free_pages(GFP_KERNEL|__GFP_ZERO, order); + if (ldt == NULL) { + printk(KERN_ERR "ldt_get_host_info: couldn't allocate buffer " + "for host ldt\n"); + return; + } + + ret = modify_ldt(0, ldt, (1<<order)*PAGE_SIZE); + if (ret < 0) { + printk(KERN_ERR "ldt_get_host_info: couldn't read host ldt\n"); + goto out_free; + } + if (ret == 0) { + /* default_ldt is active, simply write an empty entry 0 */ + host_ldt_entries = dummy_list; + goto out_free; + } + + for (i=0, size=0; i<ret/LDT_ENTRY_SIZE; i++) { + if (ldt[i].a != 0 || ldt[i].b != 0) + size++; + } + + if (size < ARRAY_SIZE(dummy_list)) + host_ldt_entries = dummy_list; + else { + size = (size + 1) * sizeof(dummy_list[0]); + tmp = kmalloc(size, GFP_KERNEL); + if (tmp == NULL) { + printk(KERN_ERR "ldt_get_host_info: couldn't allocate " + "host ldt list\n"); + goto out_free; + } + host_ldt_entries = tmp; + } + + for (i=0, k=0; i<ret/LDT_ENTRY_SIZE; i++) { + if (ldt[i].a != 0 || ldt[i].b != 0) + host_ldt_entries[k++] = i; + } + host_ldt_entries[k] = -1; + +out_free: + free_pages((unsigned long)ldt, order); +} + +long init_new_ldt(struct mm_context *new_mm, struct mm_context *from_mm) +{ + struct user_desc desc; + short * num_p; + int i; + long page, err=0; + void *addr = NULL; + struct proc_mm_op copy; + + + if (!ptrace_ldt) + mutex_init(&new_mm->ldt.lock); + + if (!from_mm) { + memset(&desc, 0, sizeof(desc)); + /* + * We have to initialize a clean ldt. + */ + if (proc_mm) { + /* + * If the new mm was created using proc_mm, host's + * default-ldt currently is assigned, which normally + * contains the call-gates for lcall7 and lcall27. + * To remove these gates, we simply write an empty + * entry as number 0 to the host. + */ + err = write_ldt_entry(&new_mm->id, 1, &desc, &addr, 1); + } + else{ + /* + * Now we try to retrieve info about the ldt, we + * inherited from the host. All ldt-entries found + * will be reset in the following loop + */ + ldt_get_host_info(); + for (num_p=host_ldt_entries; *num_p != -1; num_p++) { + desc.entry_number = *num_p; + err = write_ldt_entry(&new_mm->id, 1, &desc, + &addr, *(num_p + 1) == -1); + if (err) + break; + } + } + new_mm->ldt.entry_count = 0; + + goto out; + } + + if (proc_mm) { + /* + * We have a valid from_mm, so we now have to copy the LDT of + * from_mm to new_mm, because using proc_mm an new mm with + * an empty/default LDT was created in new_mm() + */ + copy = ((struct proc_mm_op) { .op = MM_COPY_SEGMENTS, + .u = + { .copy_segments = + from_mm->id.u.mm_fd } } ); + i = os_write_file(new_mm->id.u.mm_fd, ©, sizeof(copy)); + if (i != sizeof(copy)) + printk(KERN_ERR "new_mm : /proc/mm copy_segments " + "failed, err = %d\n", -i); + } + + if (!ptrace_ldt) { + /* + * Our local LDT is used to supply the data for + * modify_ldt(READLDT), if PTRACE_LDT isn't available, + * i.e., we have to use the stub for modify_ldt, which + * can't handle the big read buffer of up to 64kB. + */ + mutex_lock(&from_mm->ldt.lock); + if (from_mm->ldt.entry_count <= LDT_DIRECT_ENTRIES) + memcpy(new_mm->ldt.u.entries, from_mm->ldt.u.entries, + sizeof(new_mm->ldt.u.entries)); + else { + i = from_mm->ldt.entry_count / LDT_ENTRIES_PER_PAGE; + while (i-->0) { + page = __get_free_page(GFP_KERNEL|__GFP_ZERO); + if (!page) { + err = -ENOMEM; + break; + } + new_mm->ldt.u.pages[i] = + (struct ldt_entry *) page; + memcpy(new_mm->ldt.u.pages[i], + from_mm->ldt.u.pages[i], PAGE_SIZE); + } + } + new_mm->ldt.entry_count = from_mm->ldt.entry_count; + mutex_unlock(&from_mm->ldt.lock); + } + + out: + return err; +} + + +void free_ldt(struct mm_context *mm) +{ + int i; + + if (!ptrace_ldt && mm->ldt.entry_count > LDT_DIRECT_ENTRIES) { + i = mm->ldt.entry_count / LDT_ENTRIES_PER_PAGE; + while (i-- > 0) + free_page((long) mm->ldt.u.pages[i]); + } + mm->ldt.entry_count = 0; +} + +int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount) +{ + return do_modify_ldt_skas(func, ptr, bytecount); +} diff --git a/arch/x86/um/mem_32.c b/arch/x86/um/mem_32.c new file mode 100644 index 000000000000..639900a6fde9 --- /dev/null +++ b/arch/x86/um/mem_32.c @@ -0,0 +1,62 @@ +/* + * Copyright (C) 2011 Richard Weinberger <richrd@nod.at> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/mm.h> +#include <asm/page.h> +#include <asm/mman.h> + +static struct vm_area_struct gate_vma; + +static int __init gate_vma_init(void) +{ + if (!FIXADDR_USER_START) + return 0; + + gate_vma.vm_mm = NULL; + gate_vma.vm_start = FIXADDR_USER_START; + gate_vma.vm_end = FIXADDR_USER_END; + gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC; + gate_vma.vm_page_prot = __P101; + + /* + * Make sure the vDSO gets into every core dump. + * Dumping its contents makes post-mortem fully interpretable later + * without matching up the same kernel and hardware config to see + * what PC values meant. + */ + gate_vma.vm_flags |= VM_ALWAYSDUMP; + + return 0; +} +__initcall(gate_vma_init); + +struct vm_area_struct *get_gate_vma(struct mm_struct *mm) +{ + return FIXADDR_USER_START ? &gate_vma : NULL; +} + +int in_gate_area_no_mm(unsigned long addr) +{ + if (!FIXADDR_USER_START) + return 0; + + if ((addr >= FIXADDR_USER_START) && (addr < FIXADDR_USER_END)) + return 1; + + return 0; +} + +int in_gate_area(struct mm_struct *mm, unsigned long addr) +{ + struct vm_area_struct *vma = get_gate_vma(mm); + + if (!vma) + return 0; + + return (addr >= vma->vm_start) && (addr < vma->vm_end); +} diff --git a/arch/x86/um/mem_64.c b/arch/x86/um/mem_64.c new file mode 100644 index 000000000000..546518727a73 --- /dev/null +++ b/arch/x86/um/mem_64.c @@ -0,0 +1,26 @@ +#include "linux/mm.h" +#include "asm/page.h" +#include "asm/mman.h" + +const char *arch_vma_name(struct vm_area_struct *vma) +{ + if (vma->vm_mm && vma->vm_start == um_vdso_addr) + return "[vdso]"; + + return NULL; +} + +struct vm_area_struct *get_gate_vma(struct mm_struct *mm) +{ + return NULL; +} + +int in_gate_area(struct mm_struct *mm, unsigned long addr) +{ + return 0; +} + +int in_gate_area_no_mm(unsigned long addr) +{ + return 0; +} diff --git a/arch/x86/um/os-Linux/Makefile b/arch/x86/um/os-Linux/Makefile new file mode 100644 index 000000000000..253bfb8cb702 --- /dev/null +++ b/arch/x86/um/os-Linux/Makefile @@ -0,0 +1,13 @@ +# +# Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) +# Licensed under the GPL +# + +obj-y = registers.o task_size.o mcontext.o + +obj-$(CONFIG_X86_32) += tls.o +obj-$(CONFIG_64BIT) += prctl.o + +USER_OBJS := $(obj-y) + +include arch/um/scripts/Makefile.rules diff --git a/arch/x86/um/os-Linux/mcontext.c b/arch/x86/um/os-Linux/mcontext.c new file mode 100644 index 000000000000..1d33d72c6284 --- /dev/null +++ b/arch/x86/um/os-Linux/mcontext.c @@ -0,0 +1,31 @@ +#include <sys/ucontext.h> +#define __FRAME_OFFSETS +#include <asm/ptrace.h> +#include <sysdep/ptrace.h> + +void get_regs_from_mc(struct uml_pt_regs *regs, mcontext_t *mc) +{ +#ifdef __i386__ +#define COPY2(X,Y) regs->gp[X] = mc->gregs[REG_##Y] +#define COPY(X) regs->gp[X] = mc->gregs[REG_##X] +#define COPY_SEG(X) regs->gp[X] = mc->gregs[REG_##X] & 0xffff; +#define COPY_SEG_CPL3(X) regs->gp[X] = (mc->gregs[REG_##X] & 0xffff) | 3; + COPY_SEG(GS); COPY_SEG(FS); COPY_SEG(ES); COPY_SEG(DS); + COPY(EDI); COPY(ESI); COPY(EBP); + COPY2(UESP, ESP); /* sic */ + COPY(EBX); COPY(EDX); COPY(ECX); COPY(EAX); + COPY(EIP); COPY_SEG_CPL3(CS); COPY(EFL); COPY_SEG_CPL3(SS); +#else +#define COPY2(X,Y) regs->gp[X/sizeof(unsigned long)] = mc->gregs[REG_##Y] +#define COPY(X) regs->gp[X/sizeof(unsigned long)] = mc->gregs[REG_##X] + COPY(R8); COPY(R9); COPY(R10); COPY(R11); + COPY(R12); COPY(R13); COPY(R14); COPY(R15); + COPY(RDI); COPY(RSI); COPY(RBP); COPY(RBX); + COPY(RDX); COPY(RAX); COPY(RCX); COPY(RSP); + COPY(RIP); + COPY2(EFLAGS, EFL); + COPY2(CS, CSGSFS); + regs->gp[CS / sizeof(unsigned long)] &= 0xffff; + regs->gp[CS / sizeof(unsigned long)] |= 3; +#endif +} diff --git a/arch/x86/um/os-Linux/prctl.c b/arch/x86/um/os-Linux/prctl.c new file mode 100644 index 000000000000..9d34eddb517f --- /dev/null +++ b/arch/x86/um/os-Linux/prctl.c @@ -0,0 +1,12 @@ +/* + * Copyright (C) 2007 Jeff Dike (jdike@{addtoit.com,linux.intel.com}) + * Licensed under the GPL + */ + +#include <sys/ptrace.h> +#include <linux/ptrace.h> + +int os_arch_prctl(int pid, int code, unsigned long *addr) +{ + return ptrace(PTRACE_ARCH_PRCTL, pid, (unsigned long) addr, code); +} diff --git a/arch/x86/um/os-Linux/registers.c b/arch/x86/um/os-Linux/registers.c new file mode 100644 index 000000000000..3a9b6247bbbc --- /dev/null +++ b/arch/x86/um/os-Linux/registers.c @@ -0,0 +1,111 @@ +/* + * Copyright (C) 2004 PathScale, Inc + * Copyright (C) 2004 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <errno.h> +#include <sys/ptrace.h> +#include <sys/user.h> +#include "longjmp.h" +#include "sysdep/ptrace_user.h" + +int save_fp_registers(int pid, unsigned long *fp_regs) +{ + if (ptrace(PTRACE_GETFPREGS, pid, 0, fp_regs) < 0) + return -errno; + return 0; +} + +int restore_fp_registers(int pid, unsigned long *fp_regs) +{ + if (ptrace(PTRACE_SETFPREGS, pid, 0, fp_regs) < 0) + return -errno; + return 0; +} + +#ifdef __i386__ +int have_fpx_regs = 1; +int save_fpx_registers(int pid, unsigned long *fp_regs) +{ + if (ptrace(PTRACE_GETFPXREGS, pid, 0, fp_regs) < 0) + return -errno; + return 0; +} + +int restore_fpx_registers(int pid, unsigned long *fp_regs) +{ + if (ptrace(PTRACE_SETFPXREGS, pid, 0, fp_regs) < 0) + return -errno; + return 0; +} + +int get_fp_registers(int pid, unsigned long *regs) +{ + if (have_fpx_regs) + return save_fpx_registers(pid, regs); + else + return save_fp_registers(pid, regs); +} + +int put_fp_registers(int pid, unsigned long *regs) +{ + if (have_fpx_regs) + return restore_fpx_registers(pid, regs); + else + return restore_fp_registers(pid, regs); +} + +void arch_init_registers(int pid) +{ + struct user_fpxregs_struct fpx_regs; + int err; + + err = ptrace(PTRACE_GETFPXREGS, pid, 0, &fpx_regs); + if (!err) + return; + + if (errno != EIO) + panic("check_ptrace : PTRACE_GETFPXREGS failed, errno = %d", + errno); + + have_fpx_regs = 0; +} +#else + +int get_fp_registers(int pid, unsigned long *regs) +{ + return save_fp_registers(pid, regs); +} + +int put_fp_registers(int pid, unsigned long *regs) +{ + return restore_fp_registers(pid, regs); +} + +#endif + +unsigned long get_thread_reg(int reg, jmp_buf *buf) +{ + switch (reg) { +#ifdef __i386__ + case EIP: + return buf[0]->__eip; + case UESP: + return buf[0]->__esp; + case EBP: + return buf[0]->__ebp; +#else + case RIP: + return buf[0]->__rip; + case RSP: + return buf[0]->__rsp; + case RBP: + return buf[0]->__rbp; +#endif + default: + printk(UM_KERN_ERR "get_thread_regs - unknown register %d\n", + reg); + return 0; + } +} diff --git a/arch/x86/um/os-Linux/task_size.c b/arch/x86/um/os-Linux/task_size.c new file mode 100644 index 000000000000..efb16c5c9bcf --- /dev/null +++ b/arch/x86/um/os-Linux/task_size.c @@ -0,0 +1,150 @@ +#include <stdio.h> +#include <stdlib.h> +#include <signal.h> +#include <sys/mman.h> +#include "longjmp.h" + +#ifdef __i386__ + +static jmp_buf buf; + +static void segfault(int sig) +{ + longjmp(buf, 1); +} + +static int page_ok(unsigned long page) +{ + unsigned long *address = (unsigned long *) (page << UM_KERN_PAGE_SHIFT); + unsigned long n = ~0UL; + void *mapped = NULL; + int ok = 0; + + /* + * First see if the page is readable. If it is, it may still + * be a VDSO, so we go on to see if it's writable. If not + * then try mapping memory there. If that fails, then we're + * still in the kernel area. As a sanity check, we'll fail if + * the mmap succeeds, but gives us an address different from + * what we wanted. + */ + if (setjmp(buf) == 0) + n = *address; + else { + mapped = mmap(address, UM_KERN_PAGE_SIZE, + PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (mapped == MAP_FAILED) + return 0; + if (mapped != address) + goto out; + } + + /* + * Now, is it writeable? If so, then we're in user address + * space. If not, then try mprotecting it and try the write + * again. + */ + if (setjmp(buf) == 0) { + *address = n; + ok = 1; + goto out; + } else if (mprotect(address, UM_KERN_PAGE_SIZE, + PROT_READ | PROT_WRITE) != 0) + goto out; + + if (setjmp(buf) == 0) { + *address = n; + ok = 1; + } + + out: + if (mapped != NULL) + munmap(mapped, UM_KERN_PAGE_SIZE); + return ok; +} + +unsigned long os_get_top_address(void) +{ + struct sigaction sa, old; + unsigned long bottom = 0; + /* + * A 32-bit UML on a 64-bit host gets confused about the VDSO at + * 0xffffe000. It is mapped, is readable, can be reprotected writeable + * and written. However, exec discovers later that it can't be + * unmapped. So, just set the highest address to be checked to just + * below it. This might waste some address space on 4G/4G 32-bit + * hosts, but shouldn't hurt otherwise. + */ + unsigned long top = 0xffffd000 >> UM_KERN_PAGE_SHIFT; + unsigned long test, original; + + printf("Locating the bottom of the address space ... "); + fflush(stdout); + + /* + * We're going to be longjmping out of the signal handler, so + * SA_DEFER needs to be set. + */ + sa.sa_handler = segfault; + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_NODEFER; + if (sigaction(SIGSEGV, &sa, &old)) { + perror("os_get_top_address"); + exit(1); + } + + /* Manually scan the address space, bottom-up, until we find + * the first valid page (or run out of them). + */ + for (bottom = 0; bottom < top; bottom++) { + if (page_ok(bottom)) + break; + } + + /* If we've got this far, we ran out of pages. */ + if (bottom == top) { + fprintf(stderr, "Unable to determine bottom of address " + "space.\n"); + exit(1); + } + + printf("0x%x\n", bottom << UM_KERN_PAGE_SHIFT); + printf("Locating the top of the address space ... "); + fflush(stdout); + + original = bottom; + + /* This could happen with a 4G/4G split */ + if (page_ok(top)) + goto out; + + do { + test = bottom + (top - bottom) / 2; + if (page_ok(test)) + bottom = test; + else + top = test; + } while (top - bottom > 1); + +out: + /* Restore the old SIGSEGV handling */ + if (sigaction(SIGSEGV, &old, NULL)) { + perror("os_get_top_address"); + exit(1); + } + top <<= UM_KERN_PAGE_SHIFT; + printf("0x%x\n", top); + + return top; +} + +#else + +unsigned long os_get_top_address(void) +{ + /* The old value of CONFIG_TOP_ADDR */ + return 0x7fc0000000; +} + +#endif diff --git a/arch/x86/um/os-Linux/tls.c b/arch/x86/um/os-Linux/tls.c new file mode 100644 index 000000000000..281e83ecce3d --- /dev/null +++ b/arch/x86/um/os-Linux/tls.c @@ -0,0 +1,35 @@ +#include <errno.h> +#include <linux/unistd.h> + +#include <sys/syscall.h> +#include <unistd.h> + +#include "sysdep/tls.h" + +/* Checks whether host supports TLS, and sets *tls_min according to the value + * valid on the host. + * i386 host have it == 6; x86_64 host have it == 12, for i386 emulation. */ +void check_host_supports_tls(int *supports_tls, int *tls_min) { + /* Values for x86 and x86_64.*/ + int val[] = {GDT_ENTRY_TLS_MIN_I386, GDT_ENTRY_TLS_MIN_X86_64}; + int i; + + for (i = 0; i < ARRAY_SIZE(val); i++) { + user_desc_t info; + info.entry_number = val[i]; + + if (syscall(__NR_get_thread_area, &info) == 0) { + *tls_min = val[i]; + *supports_tls = 1; + return; + } else { + if (errno == EINVAL) + continue; + else if (errno == ENOSYS) + *supports_tls = 0; + return; + } + } + + *supports_tls = 0; +} diff --git a/arch/x86/um/ptrace_32.c b/arch/x86/um/ptrace_32.c new file mode 100644 index 000000000000..a174fde2531c --- /dev/null +++ b/arch/x86/um/ptrace_32.c @@ -0,0 +1,273 @@ +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include "linux/mm.h" +#include "linux/sched.h" +#include "asm/uaccess.h" +#include "skas.h" + +extern int arch_switch_tls(struct task_struct *to); + +void arch_switch_to(struct task_struct *to) +{ + int err = arch_switch_tls(to); + if (!err) + return; + + if (err != -EINVAL) + printk(KERN_WARNING "arch_switch_tls failed, errno %d, " + "not EINVAL\n", -err); + else + printk(KERN_WARNING "arch_switch_tls failed, errno = EINVAL\n"); +} + +int is_syscall(unsigned long addr) +{ + unsigned short instr; + int n; + + n = copy_from_user(&instr, (void __user *) addr, sizeof(instr)); + if (n) { + /* access_process_vm() grants access to vsyscall and stub, + * while copy_from_user doesn't. Maybe access_process_vm is + * slow, but that doesn't matter, since it will be called only + * in case of singlestepping, if copy_from_user failed. + */ + n = access_process_vm(current, addr, &instr, sizeof(instr), 0); + if (n != sizeof(instr)) { + printk(KERN_ERR "is_syscall : failed to read " + "instruction from 0x%lx\n", addr); + return 1; + } + } + /* int 0x80 or sysenter */ + return (instr == 0x80cd) || (instr == 0x340f); +} + +/* determines which flags the user has access to. */ +/* 1 = access 0 = no access */ +#define FLAG_MASK 0x00044dd5 + +static const int reg_offsets[] = { + [EBX] = HOST_EBX, + [ECX] = HOST_ECX, + [EDX] = HOST_EDX, + [ESI] = HOST_ESI, + [EDI] = HOST_EDI, + [EBP] = HOST_EBP, + [EAX] = HOST_EAX, + [DS] = HOST_DS, + [ES] = HOST_ES, + [FS] = HOST_FS, + [GS] = HOST_GS, + [EIP] = HOST_IP, + [CS] = HOST_CS, + [EFL] = HOST_EFLAGS, + [UESP] = HOST_SP, + [SS] = HOST_SS, +}; + +int putreg(struct task_struct *child, int regno, unsigned long value) +{ + regno >>= 2; + switch (regno) { + case EBX: + case ECX: + case EDX: + case ESI: + case EDI: + case EBP: + case EAX: + case EIP: + case UESP: + break; + case FS: + if (value && (value & 3) != 3) + return -EIO; + break; + case GS: + if (value && (value & 3) != 3) + return -EIO; + break; + case DS: + case ES: + if (value && (value & 3) != 3) + return -EIO; + value &= 0xffff; + break; + case SS: + case CS: + if ((value & 3) != 3) + return -EIO; + value &= 0xffff; + break; + case EFL: + value &= FLAG_MASK; + child->thread.regs.regs.gp[HOST_EFLAGS] |= value; + return 0; + case ORIG_EAX: + child->thread.regs.regs.syscall = value; + return 0; + default : + panic("Bad register in putreg() : %d\n", regno); + } + child->thread.regs.regs.gp[reg_offsets[regno]] = value; + return 0; +} + +int poke_user(struct task_struct *child, long addr, long data) +{ + if ((addr & 3) || addr < 0) + return -EIO; + + if (addr < MAX_REG_OFFSET) + return putreg(child, addr, data); + else if ((addr >= offsetof(struct user, u_debugreg[0])) && + (addr <= offsetof(struct user, u_debugreg[7]))) { + addr -= offsetof(struct user, u_debugreg[0]); + addr = addr >> 2; + if ((addr == 4) || (addr == 5)) + return -EIO; + child->thread.arch.debugregs[addr] = data; + return 0; + } + return -EIO; +} + +unsigned long getreg(struct task_struct *child, int regno) +{ + unsigned long mask = ~0UL; + + regno >>= 2; + switch (regno) { + case ORIG_EAX: + return child->thread.regs.regs.syscall; + case FS: + case GS: + case DS: + case ES: + case SS: + case CS: + mask = 0xffff; + break; + case EIP: + case UESP: + case EAX: + case EBX: + case ECX: + case EDX: + case ESI: + case EDI: + case EBP: + case EFL: + break; + default: + panic("Bad register in getreg() : %d\n", regno); + } + return mask & child->thread.regs.regs.gp[reg_offsets[regno]]; +} + +/* read the word at location addr in the USER area. */ +int peek_user(struct task_struct *child, long addr, long data) +{ + unsigned long tmp; + + if ((addr & 3) || addr < 0) + return -EIO; + + tmp = 0; /* Default return condition */ + if (addr < MAX_REG_OFFSET) { + tmp = getreg(child, addr); + } + else if ((addr >= offsetof(struct user, u_debugreg[0])) && + (addr <= offsetof(struct user, u_debugreg[7]))) { + addr -= offsetof(struct user, u_debugreg[0]); + addr = addr >> 2; + tmp = child->thread.arch.debugregs[addr]; + } + return put_user(tmp, (unsigned long __user *) data); +} + +static int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *child) +{ + int err, n, cpu = ((struct thread_info *) child->stack)->cpu; + struct user_i387_struct fpregs; + + err = save_fp_registers(userspace_pid[cpu], (unsigned long *) &fpregs); + if (err) + return err; + + n = copy_to_user(buf, &fpregs, sizeof(fpregs)); + if(n > 0) + return -EFAULT; + + return n; +} + +static int set_fpregs(struct user_i387_struct __user *buf, struct task_struct *child) +{ + int n, cpu = ((struct thread_info *) child->stack)->cpu; + struct user_i387_struct fpregs; + + n = copy_from_user(&fpregs, buf, sizeof(fpregs)); + if (n > 0) + return -EFAULT; + + return restore_fp_registers(userspace_pid[cpu], + (unsigned long *) &fpregs); +} + +static int get_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *child) +{ + int err, n, cpu = ((struct thread_info *) child->stack)->cpu; + struct user_fxsr_struct fpregs; + + err = save_fpx_registers(userspace_pid[cpu], (unsigned long *) &fpregs); + if (err) + return err; + + n = copy_to_user(buf, &fpregs, sizeof(fpregs)); + if(n > 0) + return -EFAULT; + + return n; +} + +static int set_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *child) +{ + int n, cpu = ((struct thread_info *) child->stack)->cpu; + struct user_fxsr_struct fpregs; + + n = copy_from_user(&fpregs, buf, sizeof(fpregs)); + if (n > 0) + return -EFAULT; + + return restore_fpx_registers(userspace_pid[cpu], + (unsigned long *) &fpregs); +} + +long subarch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) +{ + int ret = -EIO; + void __user *datap = (void __user *) data; + switch (request) { + case PTRACE_GETFPREGS: /* Get the child FPU state. */ + ret = get_fpregs(datap, child); + break; + case PTRACE_SETFPREGS: /* Set the child FPU state. */ + ret = set_fpregs(datap, child); + break; + case PTRACE_GETFPXREGS: /* Get the child FPU state. */ + ret = get_fpxregs(datap, child); + break; + case PTRACE_SETFPXREGS: /* Set the child FPU state. */ + ret = set_fpxregs(datap, child); + break; + default: + ret = -EIO; + } + return ret; +} diff --git a/arch/x86/um/ptrace_64.c b/arch/x86/um/ptrace_64.c new file mode 100644 index 000000000000..44e68e0c0d10 --- /dev/null +++ b/arch/x86/um/ptrace_64.c @@ -0,0 +1,271 @@ +/* + * Copyright 2003 PathScale, Inc. + * Copyright (C) 2003 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * + * Licensed under the GPL + */ + +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/errno.h> +#define __FRAME_OFFSETS +#include <asm/ptrace.h> +#include <asm/uaccess.h> + +/* + * determines which flags the user has access to. + * 1 = access 0 = no access + */ +#define FLAG_MASK 0x44dd5UL + +static const int reg_offsets[] = +{ + [R8 >> 3] = HOST_R8, + [R9 >> 3] = HOST_R9, + [R10 >> 3] = HOST_R10, + [R11 >> 3] = HOST_R11, + [R12 >> 3] = HOST_R12, + [R13 >> 3] = HOST_R13, + [R14 >> 3] = HOST_R14, + [R15 >> 3] = HOST_R15, + [RIP >> 3] = HOST_IP, + [RSP >> 3] = HOST_SP, + [RAX >> 3] = HOST_RAX, + [RBX >> 3] = HOST_RBX, + [RCX >> 3] = HOST_RCX, + [RDX >> 3] = HOST_RDX, + [RSI >> 3] = HOST_RSI, + [RDI >> 3] = HOST_RDI, + [RBP >> 3] = HOST_RBP, + [CS >> 3] = HOST_CS, + [SS >> 3] = HOST_SS, + [FS_BASE >> 3] = HOST_FS_BASE, + [GS_BASE >> 3] = HOST_GS_BASE, + [DS >> 3] = HOST_DS, + [ES >> 3] = HOST_ES, + [FS >> 3] = HOST_FS, + [GS >> 3] = HOST_GS, + [EFLAGS >> 3] = HOST_EFLAGS, + [ORIG_RAX >> 3] = HOST_ORIG_RAX, +}; + +int putreg(struct task_struct *child, int regno, unsigned long value) +{ +#ifdef TIF_IA32 + /* + * Some code in the 64bit emulation may not be 64bit clean. + * Don't take any chances. + */ + if (test_tsk_thread_flag(child, TIF_IA32)) + value &= 0xffffffff; +#endif + switch (regno) { + case R8: + case R9: + case R10: + case R11: + case R12: + case R13: + case R14: + case R15: + case RIP: + case RSP: + case RAX: + case RBX: + case RCX: + case RDX: + case RSI: + case RDI: + case RBP: + case ORIG_RAX: + break; + + case FS: + case GS: + case DS: + case ES: + case SS: + case CS: + if (value && (value & 3) != 3) + return -EIO; + value &= 0xffff; + break; + + case FS_BASE: + case GS_BASE: + if (!((value >> 48) == 0 || (value >> 48) == 0xffff)) + return -EIO; + break; + + case EFLAGS: + value &= FLAG_MASK; + child->thread.regs.regs.gp[HOST_EFLAGS] |= value; + return 0; + + default: + panic("Bad register in putreg(): %d\n", regno); + } + + child->thread.regs.regs.gp[reg_offsets[regno >> 3]] = value; + return 0; +} + +int poke_user(struct task_struct *child, long addr, long data) +{ + if ((addr & 3) || addr < 0) + return -EIO; + + if (addr < MAX_REG_OFFSET) + return putreg(child, addr, data); + else if ((addr >= offsetof(struct user, u_debugreg[0])) && + (addr <= offsetof(struct user, u_debugreg[7]))) { + addr -= offsetof(struct user, u_debugreg[0]); + addr = addr >> 2; + if ((addr == 4) || (addr == 5)) + return -EIO; + child->thread.arch.debugregs[addr] = data; + return 0; + } + return -EIO; +} + +unsigned long getreg(struct task_struct *child, int regno) +{ + unsigned long mask = ~0UL; +#ifdef TIF_IA32 + if (test_tsk_thread_flag(child, TIF_IA32)) + mask = 0xffffffff; +#endif + switch (regno) { + case R8: + case R9: + case R10: + case R11: + case R12: + case R13: + case R14: + case R15: + case RIP: + case RSP: + case RAX: + case RBX: + case RCX: + case RDX: + case RSI: + case RDI: + case RBP: + case ORIG_RAX: + case EFLAGS: + case FS_BASE: + case GS_BASE: + break; + case FS: + case GS: + case DS: + case ES: + case SS: + case CS: + mask = 0xffff; + break; + default: + panic("Bad register in getreg: %d\n", regno); + } + return mask & child->thread.regs.regs.gp[reg_offsets[regno >> 3]]; +} + +int peek_user(struct task_struct *child, long addr, long data) +{ + /* read the word at location addr in the USER area. */ + unsigned long tmp; + + if ((addr & 3) || addr < 0) + return -EIO; + + tmp = 0; /* Default return condition */ + if (addr < MAX_REG_OFFSET) + tmp = getreg(child, addr); + else if ((addr >= offsetof(struct user, u_debugreg[0])) && + (addr <= offsetof(struct user, u_debugreg[7]))) { + addr -= offsetof(struct user, u_debugreg[0]); + addr = addr >> 2; + tmp = child->thread.arch.debugregs[addr]; + } + return put_user(tmp, (unsigned long *) data); +} + +/* XXX Mostly copied from sys-i386 */ +int is_syscall(unsigned long addr) +{ + unsigned short instr; + int n; + + n = copy_from_user(&instr, (void __user *) addr, sizeof(instr)); + if (n) { + /* + * access_process_vm() grants access to vsyscall and stub, + * while copy_from_user doesn't. Maybe access_process_vm is + * slow, but that doesn't matter, since it will be called only + * in case of singlestepping, if copy_from_user failed. + */ + n = access_process_vm(current, addr, &instr, sizeof(instr), 0); + if (n != sizeof(instr)) { + printk("is_syscall : failed to read instruction from " + "0x%lx\n", addr); + return 1; + } + } + /* sysenter */ + return instr == 0x050f; +} + +static int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *child) +{ + int err, n, cpu = ((struct thread_info *) child->stack)->cpu; + long fpregs[HOST_FP_SIZE]; + + BUG_ON(sizeof(*buf) != sizeof(fpregs)); + err = save_fp_registers(userspace_pid[cpu], fpregs); + if (err) + return err; + + n = copy_to_user(buf, fpregs, sizeof(fpregs)); + if (n > 0) + return -EFAULT; + + return n; +} + +static int set_fpregs(struct user_i387_struct __user *buf, struct task_struct *child) +{ + int n, cpu = ((struct thread_info *) child->stack)->cpu; + long fpregs[HOST_FP_SIZE]; + + BUG_ON(sizeof(*buf) != sizeof(fpregs)); + n = copy_from_user(fpregs, buf, sizeof(fpregs)); + if (n > 0) + return -EFAULT; + + return restore_fp_registers(userspace_pid[cpu], fpregs); +} + +long subarch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) +{ + int ret = -EIO; + void __user *datap = (void __user *) data; + + switch (request) { + case PTRACE_GETFPREGS: /* Get the child FPU state. */ + ret = get_fpregs(datap, child); + break; + case PTRACE_SETFPREGS: /* Set the child FPU state. */ + ret = set_fpregs(datap, child); + break; + case PTRACE_ARCH_PRCTL: + /* XXX Calls ptrace on the host - needs some SMP thinking */ + ret = arch_prctl(child, data, (void __user *) addr); + break; + } + + return ret; +} diff --git a/arch/x86/um/ptrace_user.c b/arch/x86/um/ptrace_user.c new file mode 100644 index 000000000000..3960ca1dd35a --- /dev/null +++ b/arch/x86/um/ptrace_user.c @@ -0,0 +1,21 @@ +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <errno.h> +#include "ptrace_user.h" + +int ptrace_getregs(long pid, unsigned long *regs_out) +{ + if (ptrace(PTRACE_GETREGS, pid, 0, regs_out) < 0) + return -errno; + return 0; +} + +int ptrace_setregs(long pid, unsigned long *regs) +{ + if (ptrace(PTRACE_SETREGS, pid, 0, regs) < 0) + return -errno; + return 0; +} diff --git a/arch/x86/um/setjmp_32.S b/arch/x86/um/setjmp_32.S new file mode 100644 index 000000000000..b766792c9933 --- /dev/null +++ b/arch/x86/um/setjmp_32.S @@ -0,0 +1,58 @@ +# +# arch/i386/setjmp.S +# +# setjmp/longjmp for the i386 architecture +# + +# +# The jmp_buf is assumed to contain the following, in order: +# %ebx +# %esp +# %ebp +# %esi +# %edi +# <return address> +# + + .text + .align 4 + .globl setjmp + .type setjmp, @function +setjmp: +#ifdef _REGPARM + movl %eax,%edx +#else + movl 4(%esp),%edx +#endif + popl %ecx # Return address, and adjust the stack + xorl %eax,%eax # Return value + movl %ebx,(%edx) + movl %esp,4(%edx) # Post-return %esp! + pushl %ecx # Make the call/return stack happy + movl %ebp,8(%edx) + movl %esi,12(%edx) + movl %edi,16(%edx) + movl %ecx,20(%edx) # Return address + ret + + .size setjmp,.-setjmp + + .text + .align 4 + .globl longjmp + .type longjmp, @function +longjmp: +#ifdef _REGPARM + xchgl %eax,%edx +#else + movl 4(%esp),%edx # jmp_ptr address + movl 8(%esp),%eax # Return value +#endif + movl (%edx),%ebx + movl 4(%edx),%esp + movl 8(%edx),%ebp + movl 12(%edx),%esi + movl 16(%edx),%edi + jmp *20(%edx) + + .size longjmp,.-longjmp diff --git a/arch/x86/um/setjmp_64.S b/arch/x86/um/setjmp_64.S new file mode 100644 index 000000000000..45f547b4043e --- /dev/null +++ b/arch/x86/um/setjmp_64.S @@ -0,0 +1,54 @@ +# +# arch/x86_64/setjmp.S +# +# setjmp/longjmp for the x86-64 architecture +# + +# +# The jmp_buf is assumed to contain the following, in order: +# %rbx +# %rsp (post-return) +# %rbp +# %r12 +# %r13 +# %r14 +# %r15 +# <return address> +# + + .text + .align 4 + .globl setjmp + .type setjmp, @function +setjmp: + pop %rsi # Return address, and adjust the stack + xorl %eax,%eax # Return value + movq %rbx,(%rdi) + movq %rsp,8(%rdi) # Post-return %rsp! + push %rsi # Make the call/return stack happy + movq %rbp,16(%rdi) + movq %r12,24(%rdi) + movq %r13,32(%rdi) + movq %r14,40(%rdi) + movq %r15,48(%rdi) + movq %rsi,56(%rdi) # Return address + ret + + .size setjmp,.-setjmp + + .text + .align 4 + .globl longjmp + .type longjmp, @function +longjmp: + movl %esi,%eax # Return value (int) + movq (%rdi),%rbx + movq 8(%rdi),%rsp + movq 16(%rdi),%rbp + movq 24(%rdi),%r12 + movq 32(%rdi),%r13 + movq 40(%rdi),%r14 + movq 48(%rdi),%r15 + jmp *56(%rdi) + + .size longjmp,.-longjmp diff --git a/arch/x86/um/shared/sysdep/archsetjmp.h b/arch/x86/um/shared/sysdep/archsetjmp.h new file mode 100644 index 000000000000..ff7766d28226 --- /dev/null +++ b/arch/x86/um/shared/sysdep/archsetjmp.h @@ -0,0 +1,5 @@ +#ifdef __i386__ +#include "archsetjmp_32.h" +#else +#include "archsetjmp_64.h" +#endif diff --git a/arch/x86/um/shared/sysdep/archsetjmp_32.h b/arch/x86/um/shared/sysdep/archsetjmp_32.h new file mode 100644 index 000000000000..0f312085ce1d --- /dev/null +++ b/arch/x86/um/shared/sysdep/archsetjmp_32.h @@ -0,0 +1,22 @@ +/* + * arch/um/include/sysdep-i386/archsetjmp.h + */ + +#ifndef _KLIBC_ARCHSETJMP_H +#define _KLIBC_ARCHSETJMP_H + +struct __jmp_buf { + unsigned int __ebx; + unsigned int __esp; + unsigned int __ebp; + unsigned int __esi; + unsigned int __edi; + unsigned int __eip; +}; + +typedef struct __jmp_buf jmp_buf[1]; + +#define JB_IP __eip +#define JB_SP __esp + +#endif /* _SETJMP_H */ diff --git a/arch/x86/um/shared/sysdep/archsetjmp_64.h b/arch/x86/um/shared/sysdep/archsetjmp_64.h new file mode 100644 index 000000000000..2af8f12ca161 --- /dev/null +++ b/arch/x86/um/shared/sysdep/archsetjmp_64.h @@ -0,0 +1,24 @@ +/* + * arch/um/include/sysdep-x86_64/archsetjmp.h + */ + +#ifndef _KLIBC_ARCHSETJMP_H +#define _KLIBC_ARCHSETJMP_H + +struct __jmp_buf { + unsigned long __rbx; + unsigned long __rsp; + unsigned long __rbp; + unsigned long __r12; + unsigned long __r13; + unsigned long __r14; + unsigned long __r15; + unsigned long __rip; +}; + +typedef struct __jmp_buf jmp_buf[1]; + +#define JB_IP __rip +#define JB_SP __rsp + +#endif /* _SETJMP_H */ diff --git a/arch/x86/um/shared/sysdep/faultinfo.h b/arch/x86/um/shared/sysdep/faultinfo.h new file mode 100644 index 000000000000..862ecb1c7781 --- /dev/null +++ b/arch/x86/um/shared/sysdep/faultinfo.h @@ -0,0 +1,5 @@ +#ifdef __i386__ +#include "faultinfo_32.h" +#else +#include "faultinfo_64.h" +#endif diff --git a/arch/x86/um/shared/sysdep/faultinfo_32.h b/arch/x86/um/shared/sysdep/faultinfo_32.h new file mode 100644 index 000000000000..a26086b8a800 --- /dev/null +++ b/arch/x86/um/shared/sysdep/faultinfo_32.h @@ -0,0 +1,35 @@ +/* + * Copyright (C) 2004 Fujitsu Siemens Computers GmbH + * Author: Bodo Stroesser <bstroesser@fujitsu-siemens.com> + * Licensed under the GPL + */ + +#ifndef __FAULTINFO_I386_H +#define __FAULTINFO_I386_H + +/* this structure contains the full arch-specific faultinfo + * from the traps. + * On i386, ptrace_faultinfo unfortunately doesn't provide + * all the info, since trap_no is missing. + * All common elements are defined at the same position in + * both structures, thus making it easy to copy the + * contents without knowledge about the structure elements. + */ +struct faultinfo { + int error_code; /* in ptrace_faultinfo misleadingly called is_write */ + unsigned long cr2; /* in ptrace_faultinfo called addr */ + int trap_no; /* missing in ptrace_faultinfo */ +}; + +#define FAULT_WRITE(fi) ((fi).error_code & 2) +#define FAULT_ADDRESS(fi) ((fi).cr2) + +/* This is Page Fault */ +#define SEGV_IS_FIXABLE(fi) ((fi)->trap_no == 14) + +/* SKAS3 has no trap_no on i386, but get_skas_faultinfo() sets it to 0. */ +#define SEGV_MAYBE_FIXABLE(fi) ((fi)->trap_no == 0 && ptrace_faultinfo) + +#define PTRACE_FULL_FAULTINFO 0 + +#endif diff --git a/arch/x86/um/shared/sysdep/faultinfo_64.h b/arch/x86/um/shared/sysdep/faultinfo_64.h new file mode 100644 index 000000000000..f811cbe15d62 --- /dev/null +++ b/arch/x86/um/shared/sysdep/faultinfo_64.h @@ -0,0 +1,35 @@ +/* + * Copyright (C) 2004 Fujitsu Siemens Computers GmbH + * Author: Bodo Stroesser <bstroesser@fujitsu-siemens.com> + * Licensed under the GPL + */ + +#ifndef __FAULTINFO_X86_64_H +#define __FAULTINFO_X86_64_H + +/* this structure contains the full arch-specific faultinfo + * from the traps. + * On i386, ptrace_faultinfo unfortunately doesn't provide + * all the info, since trap_no is missing. + * All common elements are defined at the same position in + * both structures, thus making it easy to copy the + * contents without knowledge about the structure elements. + */ +struct faultinfo { + int error_code; /* in ptrace_faultinfo misleadingly called is_write */ + unsigned long cr2; /* in ptrace_faultinfo called addr */ + int trap_no; /* missing in ptrace_faultinfo */ +}; + +#define FAULT_WRITE(fi) ((fi).error_code & 2) +#define FAULT_ADDRESS(fi) ((fi).cr2) + +/* This is Page Fault */ +#define SEGV_IS_FIXABLE(fi) ((fi)->trap_no == 14) + +/* No broken SKAS API, which doesn't pass trap_no, here. */ +#define SEGV_MAYBE_FIXABLE(fi) 0 + +#define PTRACE_FULL_FAULTINFO 1 + +#endif diff --git a/arch/x86/um/shared/sysdep/host_ldt.h b/arch/x86/um/shared/sysdep/host_ldt.h new file mode 100644 index 000000000000..94518b3e0da5 --- /dev/null +++ b/arch/x86/um/shared/sysdep/host_ldt.h @@ -0,0 +1,5 @@ +#ifdef __i386__ +#include "host_ldt_32.h" +#else +#include "host_ldt_64.h" +#endif diff --git a/arch/x86/um/shared/sysdep/host_ldt_32.h b/arch/x86/um/shared/sysdep/host_ldt_32.h new file mode 100644 index 000000000000..0953cc4df652 --- /dev/null +++ b/arch/x86/um/shared/sysdep/host_ldt_32.h @@ -0,0 +1,34 @@ +#ifndef __ASM_HOST_LDT_I386_H +#define __ASM_HOST_LDT_I386_H + +#include <asm/ldt.h> + +/* + * macros stolen from include/asm-i386/desc.h + */ +#define LDT_entry_a(info) \ + ((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff)) + +#define LDT_entry_b(info) \ + (((info)->base_addr & 0xff000000) | \ + (((info)->base_addr & 0x00ff0000) >> 16) | \ + ((info)->limit & 0xf0000) | \ + (((info)->read_exec_only ^ 1) << 9) | \ + ((info)->contents << 10) | \ + (((info)->seg_not_present ^ 1) << 15) | \ + ((info)->seg_32bit << 22) | \ + ((info)->limit_in_pages << 23) | \ + ((info)->useable << 20) | \ + 0x7000) + +#define LDT_empty(info) (\ + (info)->base_addr == 0 && \ + (info)->limit == 0 && \ + (info)->contents == 0 && \ + (info)->read_exec_only == 1 && \ + (info)->seg_32bit == 0 && \ + (info)->limit_in_pages == 0 && \ + (info)->seg_not_present == 1 && \ + (info)->useable == 0 ) + +#endif diff --git a/arch/x86/um/shared/sysdep/host_ldt_64.h b/arch/x86/um/shared/sysdep/host_ldt_64.h new file mode 100644 index 000000000000..e8b1be1e154f --- /dev/null +++ b/arch/x86/um/shared/sysdep/host_ldt_64.h @@ -0,0 +1,38 @@ +#ifndef __ASM_HOST_LDT_X86_64_H +#define __ASM_HOST_LDT_X86_64_H + +#include <asm/ldt.h> + +/* + * macros stolen from include/asm-x86_64/desc.h + */ +#define LDT_entry_a(info) \ + ((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff)) + +/* Don't allow setting of the lm bit. It is useless anyways because + * 64bit system calls require __USER_CS. */ +#define LDT_entry_b(info) \ + (((info)->base_addr & 0xff000000) | \ + (((info)->base_addr & 0x00ff0000) >> 16) | \ + ((info)->limit & 0xf0000) | \ + (((info)->read_exec_only ^ 1) << 9) | \ + ((info)->contents << 10) | \ + (((info)->seg_not_present ^ 1) << 15) | \ + ((info)->seg_32bit << 22) | \ + ((info)->limit_in_pages << 23) | \ + ((info)->useable << 20) | \ + /* ((info)->lm << 21) | */ \ + 0x7000) + +#define LDT_empty(info) (\ + (info)->base_addr == 0 && \ + (info)->limit == 0 && \ + (info)->contents == 0 && \ + (info)->read_exec_only == 1 && \ + (info)->seg_32bit == 0 && \ + (info)->limit_in_pages == 0 && \ + (info)->seg_not_present == 1 && \ + (info)->useable == 0 && \ + (info)->lm == 0) + +#endif diff --git a/arch/x86/um/shared/sysdep/kernel-offsets.h b/arch/x86/um/shared/sysdep/kernel-offsets.h new file mode 100644 index 000000000000..5868526b5eef --- /dev/null +++ b/arch/x86/um/shared/sysdep/kernel-offsets.h @@ -0,0 +1,21 @@ +#include <linux/stddef.h> +#include <linux/sched.h> +#include <linux/elf.h> +#include <linux/crypto.h> +#include <asm/mman.h> + +#define DEFINE(sym, val) \ + asm volatile("\n->" #sym " %0 " #val : : "i" (val)) + +#define STR(x) #x +#define DEFINE_STR(sym, val) asm volatile("\n->" #sym " " STR(val) " " #val: : ) + +#define BLANK() asm volatile("\n->" : : ) + +#define OFFSET(sym, str, mem) \ + DEFINE(sym, offsetof(struct str, mem)); + +void foo(void) +{ +#include <common-offsets.h> +} diff --git a/arch/x86/um/shared/sysdep/mcontext.h b/arch/x86/um/shared/sysdep/mcontext.h new file mode 100644 index 000000000000..b724c54da316 --- /dev/null +++ b/arch/x86/um/shared/sysdep/mcontext.h @@ -0,0 +1,31 @@ +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#ifndef __SYS_SIGCONTEXT_X86_H +#define __SYS_SIGCONTEXT_X86_H + +extern void get_regs_from_mc(struct uml_pt_regs *, mcontext_t *); + +#ifdef __i386__ + +#define GET_FAULTINFO_FROM_MC(fi, mc) \ + { \ + (fi).cr2 = (mc)->cr2; \ + (fi).error_code = (mc)->gregs[REG_ERR]; \ + (fi).trap_no = (mc)->gregs[REG_TRAPNO]; \ + } + +#else + +#define GET_FAULTINFO_FROM_MC(fi, mc) \ + { \ + (fi).cr2 = (mc)->gregs[REG_CR2]; \ + (fi).error_code = (mc)->gregs[REG_ERR]; \ + (fi).trap_no = (mc)->gregs[REG_TRAPNO]; \ + } + +#endif + +#endif diff --git a/arch/x86/um/shared/sysdep/ptrace.h b/arch/x86/um/shared/sysdep/ptrace.h new file mode 100644 index 000000000000..711b1621747f --- /dev/null +++ b/arch/x86/um/shared/sysdep/ptrace.h @@ -0,0 +1,5 @@ +#ifdef __i386__ +#include "ptrace_32.h" +#else +#include "ptrace_64.h" +#endif diff --git a/arch/x86/um/shared/sysdep/ptrace_32.h b/arch/x86/um/shared/sysdep/ptrace_32.h new file mode 100644 index 000000000000..ce77fa1e2a15 --- /dev/null +++ b/arch/x86/um/shared/sysdep/ptrace_32.h @@ -0,0 +1,117 @@ +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#ifndef __SYSDEP_I386_PTRACE_H +#define __SYSDEP_I386_PTRACE_H + +#include <generated/user_constants.h> +#include "sysdep/faultinfo.h" + +#define MAX_REG_NR (UM_FRAME_SIZE / sizeof(unsigned long)) +#define MAX_REG_OFFSET (UM_FRAME_SIZE) + +static inline void update_debugregs(int seq) {} + +/* syscall emulation path in ptrace */ + +#ifndef PTRACE_SYSEMU +#define PTRACE_SYSEMU 31 +#endif + +void set_using_sysemu(int value); +int get_using_sysemu(void); +extern int sysemu_supported; + +#define REGS_IP(r) ((r)[HOST_IP]) +#define REGS_SP(r) ((r)[HOST_SP]) +#define REGS_EFLAGS(r) ((r)[HOST_EFLAGS]) +#define REGS_EAX(r) ((r)[HOST_EAX]) +#define REGS_EBX(r) ((r)[HOST_EBX]) +#define REGS_ECX(r) ((r)[HOST_ECX]) +#define REGS_EDX(r) ((r)[HOST_EDX]) +#define REGS_ESI(r) ((r)[HOST_ESI]) +#define REGS_EDI(r) ((r)[HOST_EDI]) +#define REGS_EBP(r) ((r)[HOST_EBP]) +#define REGS_CS(r) ((r)[HOST_CS]) +#define REGS_SS(r) ((r)[HOST_SS]) +#define REGS_DS(r) ((r)[HOST_DS]) +#define REGS_ES(r) ((r)[HOST_ES]) +#define REGS_FS(r) ((r)[HOST_FS]) +#define REGS_GS(r) ((r)[HOST_GS]) + +#define REGS_SET_SYSCALL_RETURN(r, res) REGS_EAX(r) = (res) + +#define IP_RESTART_SYSCALL(ip) ((ip) -= 2) +#define REGS_RESTART_SYSCALL(r) IP_RESTART_SYSCALL(REGS_IP(r)) + +#ifndef PTRACE_SYSEMU_SINGLESTEP +#define PTRACE_SYSEMU_SINGLESTEP 32 +#endif + +struct uml_pt_regs { + unsigned long gp[MAX_REG_NR]; + unsigned long fp[HOST_FPX_SIZE]; + struct faultinfo faultinfo; + long syscall; + int is_user; +}; + +#define EMPTY_UML_PT_REGS { } + +#define UPT_IP(r) REGS_IP((r)->gp) +#define UPT_SP(r) REGS_SP((r)->gp) +#define UPT_EFLAGS(r) REGS_EFLAGS((r)->gp) +#define UPT_EAX(r) REGS_EAX((r)->gp) +#define UPT_EBX(r) REGS_EBX((r)->gp) +#define UPT_ECX(r) REGS_ECX((r)->gp) +#define UPT_EDX(r) REGS_EDX((r)->gp) +#define UPT_ESI(r) REGS_ESI((r)->gp) +#define UPT_EDI(r) REGS_EDI((r)->gp) +#define UPT_EBP(r) REGS_EBP((r)->gp) +#define UPT_ORIG_EAX(r) ((r)->syscall) +#define UPT_CS(r) REGS_CS((r)->gp) +#define UPT_SS(r) REGS_SS((r)->gp) +#define UPT_DS(r) REGS_DS((r)->gp) +#define UPT_ES(r) REGS_ES((r)->gp) +#define UPT_FS(r) REGS_FS((r)->gp) +#define UPT_GS(r) REGS_GS((r)->gp) + +#define UPT_SYSCALL_ARG1(r) UPT_EBX(r) +#define UPT_SYSCALL_ARG2(r) UPT_ECX(r) +#define UPT_SYSCALL_ARG3(r) UPT_EDX(r) +#define UPT_SYSCALL_ARG4(r) UPT_ESI(r) +#define UPT_SYSCALL_ARG5(r) UPT_EDI(r) +#define UPT_SYSCALL_ARG6(r) UPT_EBP(r) + +extern int user_context(unsigned long sp); + +#define UPT_IS_USER(r) ((r)->is_user) + +struct syscall_args { + unsigned long args[6]; +}; + +#define SYSCALL_ARGS(r) ((struct syscall_args) \ + { .args = { UPT_SYSCALL_ARG1(r), \ + UPT_SYSCALL_ARG2(r), \ + UPT_SYSCALL_ARG3(r), \ + UPT_SYSCALL_ARG4(r), \ + UPT_SYSCALL_ARG5(r), \ + UPT_SYSCALL_ARG6(r) } } ) + +#define UPT_SET_SYSCALL_RETURN(r, res) \ + REGS_SET_SYSCALL_RETURN((r)->regs, (res)) + +#define UPT_RESTART_SYSCALL(r) REGS_RESTART_SYSCALL((r)->gp) + +#define UPT_ORIG_SYSCALL(r) UPT_EAX(r) +#define UPT_SYSCALL_NR(r) UPT_ORIG_EAX(r) +#define UPT_SYSCALL_RET(r) UPT_EAX(r) + +#define UPT_FAULTINFO(r) (&(r)->faultinfo) + +extern void arch_init_registers(int pid); + +#endif diff --git a/arch/x86/um/shared/sysdep/ptrace_64.h b/arch/x86/um/shared/sysdep/ptrace_64.h new file mode 100644 index 000000000000..866fe7e47369 --- /dev/null +++ b/arch/x86/um/shared/sysdep/ptrace_64.h @@ -0,0 +1,160 @@ +/* + * Copyright 2003 PathScale, Inc. + * Copyright (C) 2003 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * + * Licensed under the GPL + */ + +#ifndef __SYSDEP_X86_64_PTRACE_H +#define __SYSDEP_X86_64_PTRACE_H + +#include <generated/user_constants.h> +#include "sysdep/faultinfo.h" + +#define MAX_REG_OFFSET (UM_FRAME_SIZE) +#define MAX_REG_NR ((MAX_REG_OFFSET) / sizeof(unsigned long)) + +#define REGS_IP(r) ((r)[HOST_IP]) +#define REGS_SP(r) ((r)[HOST_SP]) + +#define REGS_RBX(r) ((r)[HOST_RBX]) +#define REGS_RCX(r) ((r)[HOST_RCX]) +#define REGS_RDX(r) ((r)[HOST_RDX]) +#define REGS_RSI(r) ((r)[HOST_RSI]) +#define REGS_RDI(r) ((r)[HOST_RDI]) +#define REGS_RBP(r) ((r)[HOST_RBP]) +#define REGS_RAX(r) ((r)[HOST_RAX]) +#define REGS_R8(r) ((r)[HOST_R8]) +#define REGS_R9(r) ((r)[HOST_R9]) +#define REGS_R10(r) ((r)[HOST_R10]) +#define REGS_R11(r) ((r)[HOST_R11]) +#define REGS_R12(r) ((r)[HOST_R12]) +#define REGS_R13(r) ((r)[HOST_R13]) +#define REGS_R14(r) ((r)[HOST_R14]) +#define REGS_R15(r) ((r)[HOST_R15]) +#define REGS_CS(r) ((r)[HOST_CS]) +#define REGS_EFLAGS(r) ((r)[HOST_EFLAGS]) +#define REGS_SS(r) ((r)[HOST_SS]) + +#define HOST_FS_BASE 21 +#define HOST_GS_BASE 22 +#define HOST_DS 23 +#define HOST_ES 24 +#define HOST_FS 25 +#define HOST_GS 26 + +/* Also defined in asm/ptrace-x86_64.h, but not in libc headers. So, these + * are already defined for kernel code, but not for userspace code. + */ +#ifndef FS_BASE +/* These aren't defined in ptrace.h, but exist in struct user_regs_struct, + * which is what x86_64 ptrace actually uses. + */ +#define FS_BASE (HOST_FS_BASE * sizeof(long)) +#define GS_BASE (HOST_GS_BASE * sizeof(long)) +#define DS (HOST_DS * sizeof(long)) +#define ES (HOST_ES * sizeof(long)) +#define FS (HOST_FS * sizeof(long)) +#define GS (HOST_GS * sizeof(long)) +#endif + +#define REGS_FS_BASE(r) ((r)[HOST_FS_BASE]) +#define REGS_GS_BASE(r) ((r)[HOST_GS_BASE]) +#define REGS_DS(r) ((r)[HOST_DS]) +#define REGS_ES(r) ((r)[HOST_ES]) +#define REGS_FS(r) ((r)[HOST_FS]) +#define REGS_GS(r) ((r)[HOST_GS]) + +#define REGS_ORIG_RAX(r) ((r)[HOST_ORIG_RAX]) + +#define REGS_SET_SYSCALL_RETURN(r, res) REGS_RAX(r) = (res) + +#define IP_RESTART_SYSCALL(ip) ((ip) -= 2) +#define REGS_RESTART_SYSCALL(r) IP_RESTART_SYSCALL(REGS_IP(r)) + +#define REGS_FAULT_ADDR(r) ((r)->fault_addr) + +#define REGS_FAULT_WRITE(r) FAULT_WRITE((r)->fault_type) + +#define REGS_TRAP(r) ((r)->trap_type) + +#define REGS_ERR(r) ((r)->fault_type) + +struct uml_pt_regs { + unsigned long gp[MAX_REG_NR]; + unsigned long fp[HOST_FP_SIZE]; + struct faultinfo faultinfo; + long syscall; + int is_user; +}; + +#define EMPTY_UML_PT_REGS { } + +#define UPT_RBX(r) REGS_RBX((r)->gp) +#define UPT_RCX(r) REGS_RCX((r)->gp) +#define UPT_RDX(r) REGS_RDX((r)->gp) +#define UPT_RSI(r) REGS_RSI((r)->gp) +#define UPT_RDI(r) REGS_RDI((r)->gp) +#define UPT_RBP(r) REGS_RBP((r)->gp) +#define UPT_RAX(r) REGS_RAX((r)->gp) +#define UPT_R8(r) REGS_R8((r)->gp) +#define UPT_R9(r) REGS_R9((r)->gp) +#define UPT_R10(r) REGS_R10((r)->gp) +#define UPT_R11(r) REGS_R11((r)->gp) +#define UPT_R12(r) REGS_R12((r)->gp) +#define UPT_R13(r) REGS_R13((r)->gp) +#define UPT_R14(r) REGS_R14((r)->gp) +#define UPT_R15(r) REGS_R15((r)->gp) +#define UPT_CS(r) REGS_CS((r)->gp) +#define UPT_FS_BASE(r) REGS_FS_BASE((r)->gp) +#define UPT_FS(r) REGS_FS((r)->gp) +#define UPT_GS_BASE(r) REGS_GS_BASE((r)->gp) +#define UPT_GS(r) REGS_GS((r)->gp) +#define UPT_DS(r) REGS_DS((r)->gp) +#define UPT_ES(r) REGS_ES((r)->gp) +#define UPT_CS(r) REGS_CS((r)->gp) +#define UPT_SS(r) REGS_SS((r)->gp) +#define UPT_ORIG_RAX(r) REGS_ORIG_RAX((r)->gp) + +#define UPT_IP(r) REGS_IP((r)->gp) +#define UPT_SP(r) REGS_SP((r)->gp) + +#define UPT_EFLAGS(r) REGS_EFLAGS((r)->gp) +#define UPT_SYSCALL_NR(r) ((r)->syscall) +#define UPT_SYSCALL_RET(r) UPT_RAX(r) + +extern int user_context(unsigned long sp); + +#define UPT_IS_USER(r) ((r)->is_user) + +#define UPT_SYSCALL_ARG1(r) UPT_RDI(r) +#define UPT_SYSCALL_ARG2(r) UPT_RSI(r) +#define UPT_SYSCALL_ARG3(r) UPT_RDX(r) +#define UPT_SYSCALL_ARG4(r) UPT_R10(r) +#define UPT_SYSCALL_ARG5(r) UPT_R8(r) +#define UPT_SYSCALL_ARG6(r) UPT_R9(r) + +struct syscall_args { + unsigned long args[6]; +}; + +#define SYSCALL_ARGS(r) ((struct syscall_args) \ + { .args = { UPT_SYSCALL_ARG1(r), \ + UPT_SYSCALL_ARG2(r), \ + UPT_SYSCALL_ARG3(r), \ + UPT_SYSCALL_ARG4(r), \ + UPT_SYSCALL_ARG5(r), \ + UPT_SYSCALL_ARG6(r) } } ) + +#define UPT_SET_SYSCALL_RETURN(r, res) \ + REGS_SET_SYSCALL_RETURN((r)->regs, (res)) + +#define UPT_RESTART_SYSCALL(r) REGS_RESTART_SYSCALL((r)->gp) + +#define UPT_FAULTINFO(r) (&(r)->faultinfo) + +static inline void arch_init_registers(int pid) +{ +} + +#endif diff --git a/arch/x86/um/shared/sysdep/ptrace_user.h b/arch/x86/um/shared/sysdep/ptrace_user.h new file mode 100644 index 000000000000..a92f883264ed --- /dev/null +++ b/arch/x86/um/shared/sysdep/ptrace_user.h @@ -0,0 +1,5 @@ +#ifdef __i386__ +#include "ptrace_user_32.h" +#else +#include "ptrace_user_64.h" +#endif diff --git a/arch/x86/um/shared/sysdep/ptrace_user_32.h b/arch/x86/um/shared/sysdep/ptrace_user_32.h new file mode 100644 index 000000000000..9d88a79a138b --- /dev/null +++ b/arch/x86/um/shared/sysdep/ptrace_user_32.h @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __SYSDEP_I386_PTRACE_USER_H__ +#define __SYSDEP_I386_PTRACE_USER_H__ + +#include <sys/ptrace.h> +#include <linux/ptrace.h> +#include <asm/ptrace.h> +#include <generated/user_constants.h> + +#define PT_OFFSET(r) ((r) * sizeof(long)) + +#define PT_SYSCALL_NR(regs) ((regs)[ORIG_EAX]) +#define PT_SYSCALL_NR_OFFSET PT_OFFSET(ORIG_EAX) + +#define PT_SYSCALL_RET_OFFSET PT_OFFSET(EAX) + +#define REGS_IP_INDEX EIP +#define REGS_SP_INDEX UESP + +#define FP_SIZE ((HOST_FPX_SIZE > HOST_FP_SIZE) ? HOST_FPX_SIZE : HOST_FP_SIZE) + +#endif diff --git a/arch/x86/um/shared/sysdep/ptrace_user_64.h b/arch/x86/um/shared/sysdep/ptrace_user_64.h new file mode 100644 index 000000000000..2f1b6e33d590 --- /dev/null +++ b/arch/x86/um/shared/sysdep/ptrace_user_64.h @@ -0,0 +1,38 @@ +/* + * Copyright 2003 PathScale, Inc. + * + * Licensed under the GPL + */ + +#ifndef __SYSDEP_X86_64_PTRACE_USER_H__ +#define __SYSDEP_X86_64_PTRACE_USER_H__ + +#define __FRAME_OFFSETS +#include <sys/ptrace.h> +#include <linux/ptrace.h> +#include <asm/ptrace.h> +#undef __FRAME_OFFSETS +#include <generated/user_constants.h> + +#define PT_INDEX(off) ((off) / sizeof(unsigned long)) + +#define PT_SYSCALL_NR(regs) ((regs)[PT_INDEX(ORIG_RAX)]) +#define PT_SYSCALL_NR_OFFSET (ORIG_RAX) + +#define PT_SYSCALL_RET_OFFSET (RAX) + +/* + * x86_64 FC3 doesn't define this in /usr/include/linux/ptrace.h even though + * it's defined in the kernel's include/linux/ptrace.h. Additionally, use the + * 2.4 name and value for 2.4 host compatibility. + */ +#ifndef PTRACE_OLDSETOPTIONS +#define PTRACE_OLDSETOPTIONS 21 +#endif + +#define REGS_IP_INDEX PT_INDEX(RIP) +#define REGS_SP_INDEX PT_INDEX(RSP) + +#define FP_SIZE (HOST_FP_SIZE) + +#endif diff --git a/arch/x86/um/shared/sysdep/skas_ptrace.h b/arch/x86/um/shared/sysdep/skas_ptrace.h new file mode 100644 index 000000000000..453febe98993 --- /dev/null +++ b/arch/x86/um/shared/sysdep/skas_ptrace.h @@ -0,0 +1,22 @@ +/* + * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __SYSDEP_X86_SKAS_PTRACE_H +#define __SYSDEP_X86_SKAS_PTRACE_H + +struct ptrace_faultinfo { + int is_write; + unsigned long addr; +}; + +struct ptrace_ldt { + int func; + void *ptr; + unsigned long bytecount; +}; + +#define PTRACE_LDT 54 + +#endif diff --git a/arch/x86/um/shared/sysdep/stub.h b/arch/x86/um/shared/sysdep/stub.h new file mode 100644 index 000000000000..bd161e300102 --- /dev/null +++ b/arch/x86/um/shared/sysdep/stub.h @@ -0,0 +1,14 @@ +#include <asm/unistd.h> +#include <sys/mman.h> +#include <signal.h> +#include "as-layout.h" +#include "stub-data.h" + +#ifdef __i386__ +#include "stub_32.h" +#else +#include "stub_64.h" +#endif + +extern void stub_segv_handler(int, siginfo_t *, void *); +extern void stub_clone_handler(void); diff --git a/arch/x86/um/shared/sysdep/stub_32.h b/arch/x86/um/shared/sysdep/stub_32.h new file mode 100644 index 000000000000..51fd256c75f0 --- /dev/null +++ b/arch/x86/um/shared/sysdep/stub_32.h @@ -0,0 +1,93 @@ +/* + * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com) + * Licensed under the GPL + */ + +#ifndef __SYSDEP_STUB_H +#define __SYSDEP_STUB_H + +#include <asm/ptrace.h> + +#define STUB_SYSCALL_RET EAX +#define STUB_MMAP_NR __NR_mmap2 +#define MMAP_OFFSET(o) ((o) >> UM_KERN_PAGE_SHIFT) + +static inline long stub_syscall0(long syscall) +{ + long ret; + + __asm__ volatile ("int $0x80" : "=a" (ret) : "0" (syscall)); + + return ret; +} + +static inline long stub_syscall1(long syscall, long arg1) +{ + long ret; + + __asm__ volatile ("int $0x80" : "=a" (ret) : "0" (syscall), "b" (arg1)); + + return ret; +} + +static inline long stub_syscall2(long syscall, long arg1, long arg2) +{ + long ret; + + __asm__ volatile ("int $0x80" : "=a" (ret) : "0" (syscall), "b" (arg1), + "c" (arg2)); + + return ret; +} + +static inline long stub_syscall3(long syscall, long arg1, long arg2, long arg3) +{ + long ret; + + __asm__ volatile ("int $0x80" : "=a" (ret) : "0" (syscall), "b" (arg1), + "c" (arg2), "d" (arg3)); + + return ret; +} + +static inline long stub_syscall4(long syscall, long arg1, long arg2, long arg3, + long arg4) +{ + long ret; + + __asm__ volatile ("int $0x80" : "=a" (ret) : "0" (syscall), "b" (arg1), + "c" (arg2), "d" (arg3), "S" (arg4)); + + return ret; +} + +static inline long stub_syscall5(long syscall, long arg1, long arg2, long arg3, + long arg4, long arg5) +{ + long ret; + + __asm__ volatile ("int $0x80" : "=a" (ret) : "0" (syscall), "b" (arg1), + "c" (arg2), "d" (arg3), "S" (arg4), "D" (arg5)); + + return ret; +} + +static inline void trap_myself(void) +{ + __asm("int3"); +} + +static inline void remap_stack(int fd, unsigned long offset) +{ + __asm__ volatile ("movl %%eax,%%ebp ; movl %0,%%eax ; int $0x80 ;" + "movl %7, %%ebx ; movl %%eax, (%%ebx)" + : : "g" (STUB_MMAP_NR), "b" (STUB_DATA), + "c" (UM_KERN_PAGE_SIZE), + "d" (PROT_READ | PROT_WRITE), + "S" (MAP_FIXED | MAP_SHARED), "D" (fd), + "a" (offset), + "i" (&((struct stub_data *) STUB_DATA)->err) + : "memory"); +} + +#endif diff --git a/arch/x86/um/shared/sysdep/stub_64.h b/arch/x86/um/shared/sysdep/stub_64.h new file mode 100644 index 000000000000..994df93c5ed3 --- /dev/null +++ b/arch/x86/um/shared/sysdep/stub_64.h @@ -0,0 +1,99 @@ +/* + * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com) + * Licensed under the GPL + */ + +#ifndef __SYSDEP_STUB_H +#define __SYSDEP_STUB_H + +#include <sysdep/ptrace_user.h> + +#define STUB_SYSCALL_RET PT_INDEX(RAX) +#define STUB_MMAP_NR __NR_mmap +#define MMAP_OFFSET(o) (o) + +#define __syscall_clobber "r11","rcx","memory" +#define __syscall "syscall" + +static inline long stub_syscall0(long syscall) +{ + long ret; + + __asm__ volatile (__syscall + : "=a" (ret) + : "0" (syscall) : __syscall_clobber ); + + return ret; +} + +static inline long stub_syscall2(long syscall, long arg1, long arg2) +{ + long ret; + + __asm__ volatile (__syscall + : "=a" (ret) + : "0" (syscall), "D" (arg1), "S" (arg2) : __syscall_clobber ); + + return ret; +} + +static inline long stub_syscall3(long syscall, long arg1, long arg2, long arg3) +{ + long ret; + + __asm__ volatile (__syscall + : "=a" (ret) + : "0" (syscall), "D" (arg1), "S" (arg2), "d" (arg3) + : __syscall_clobber ); + + return ret; +} + +static inline long stub_syscall4(long syscall, long arg1, long arg2, long arg3, + long arg4) +{ + long ret; + + __asm__ volatile ("movq %5,%%r10 ; " __syscall + : "=a" (ret) + : "0" (syscall), "D" (arg1), "S" (arg2), "d" (arg3), + "g" (arg4) + : __syscall_clobber, "r10" ); + + return ret; +} + +static inline long stub_syscall5(long syscall, long arg1, long arg2, long arg3, + long arg4, long arg5) +{ + long ret; + + __asm__ volatile ("movq %5,%%r10 ; movq %6,%%r8 ; " __syscall + : "=a" (ret) + : "0" (syscall), "D" (arg1), "S" (arg2), "d" (arg3), + "g" (arg4), "g" (arg5) + : __syscall_clobber, "r10", "r8" ); + + return ret; +} + +static inline void trap_myself(void) +{ + __asm("int3"); +} + +static inline void remap_stack(long fd, unsigned long offset) +{ + __asm__ volatile ("movq %4,%%r10 ; movq %5,%%r8 ; " + "movq %6, %%r9; " __syscall "; movq %7, %%rbx ; " + "movq %%rax, (%%rbx)": + : "a" (STUB_MMAP_NR), "D" (STUB_DATA), + "S" (UM_KERN_PAGE_SIZE), + "d" (PROT_READ | PROT_WRITE), + "g" (MAP_FIXED | MAP_SHARED), "g" (fd), + "g" (offset), + "i" (&((struct stub_data *) STUB_DATA)->err) + : __syscall_clobber, "r10", "r8", "r9" ); +} + +#endif diff --git a/arch/x86/um/shared/sysdep/syscalls.h b/arch/x86/um/shared/sysdep/syscalls.h new file mode 100644 index 000000000000..bd9a89b67e41 --- /dev/null +++ b/arch/x86/um/shared/sysdep/syscalls.h @@ -0,0 +1,5 @@ +#ifdef __i386__ +#include "syscalls_32.h" +#else +#include "syscalls_64.h" +#endif diff --git a/arch/x86/um/shared/sysdep/syscalls_32.h b/arch/x86/um/shared/sysdep/syscalls_32.h new file mode 100644 index 000000000000..05cb796aecb5 --- /dev/null +++ b/arch/x86/um/shared/sysdep/syscalls_32.h @@ -0,0 +1,20 @@ +/* + * Copyright (C) 2000 - 2008 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include "asm/unistd.h" +#include "sysdep/ptrace.h" + +typedef long syscall_handler_t(struct pt_regs); + +/* Not declared on x86, incompatible declarations on x86_64, so these have + * to go here rather than in sys_call_table.c + */ +extern syscall_handler_t sys_rt_sigaction; + +extern syscall_handler_t *sys_call_table[]; + +#define EXECUTE_SYSCALL(syscall, regs) \ + ((long (*)(struct syscall_args)) \ + (*sys_call_table[syscall]))(SYSCALL_ARGS(®s->regs)) diff --git a/arch/x86/um/shared/sysdep/syscalls_64.h b/arch/x86/um/shared/sysdep/syscalls_64.h new file mode 100644 index 000000000000..8a7d5e1da98e --- /dev/null +++ b/arch/x86/um/shared/sysdep/syscalls_64.h @@ -0,0 +1,32 @@ +/* + * Copyright 2003 PathScale, Inc. + * + * Licensed under the GPL + */ + +#ifndef __SYSDEP_X86_64_SYSCALLS_H__ +#define __SYSDEP_X86_64_SYSCALLS_H__ + +#include <linux/msg.h> +#include <linux/shm.h> + +typedef long syscall_handler_t(void); + +extern syscall_handler_t *sys_call_table[]; + +#define EXECUTE_SYSCALL(syscall, regs) \ + (((long (*)(long, long, long, long, long, long)) \ + (*sys_call_table[syscall]))(UPT_SYSCALL_ARG1(®s->regs), \ + UPT_SYSCALL_ARG2(®s->regs), \ + UPT_SYSCALL_ARG3(®s->regs), \ + UPT_SYSCALL_ARG4(®s->regs), \ + UPT_SYSCALL_ARG5(®s->regs), \ + UPT_SYSCALL_ARG6(®s->regs))) + +extern long old_mmap(unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flags, + unsigned long fd, unsigned long pgoff); +extern syscall_handler_t sys_modify_ldt; +extern syscall_handler_t sys_arch_prctl; + +#endif diff --git a/arch/x86/um/shared/sysdep/tls.h b/arch/x86/um/shared/sysdep/tls.h new file mode 100644 index 000000000000..4d8f75262370 --- /dev/null +++ b/arch/x86/um/shared/sysdep/tls.h @@ -0,0 +1,5 @@ +#ifdef __i386__ +#include "tls_32.h" +#else +#include "tls_64.h" +#endif diff --git a/arch/x86/um/shared/sysdep/tls_32.h b/arch/x86/um/shared/sysdep/tls_32.h new file mode 100644 index 000000000000..34550755b2a1 --- /dev/null +++ b/arch/x86/um/shared/sysdep/tls_32.h @@ -0,0 +1,32 @@ +#ifndef _SYSDEP_TLS_H +#define _SYSDEP_TLS_H + +# ifndef __KERNEL__ + +/* Change name to avoid conflicts with the original one from <asm/ldt.h>, which + * may be named user_desc (but in 2.4 and in header matching its API was named + * modify_ldt_ldt_s). */ + +typedef struct um_dup_user_desc { + unsigned int entry_number; + unsigned int base_addr; + unsigned int limit; + unsigned int seg_32bit:1; + unsigned int contents:2; + unsigned int read_exec_only:1; + unsigned int limit_in_pages:1; + unsigned int seg_not_present:1; + unsigned int useable:1; +} user_desc_t; + +# else /* __KERNEL__ */ + +# include <ldt.h> +typedef struct user_desc user_desc_t; + +# endif /* __KERNEL__ */ + +#define GDT_ENTRY_TLS_MIN_I386 6 +#define GDT_ENTRY_TLS_MIN_X86_64 12 + +#endif /* _SYSDEP_TLS_H */ diff --git a/arch/x86/um/shared/sysdep/tls_64.h b/arch/x86/um/shared/sysdep/tls_64.h new file mode 100644 index 000000000000..18c000d0357a --- /dev/null +++ b/arch/x86/um/shared/sysdep/tls_64.h @@ -0,0 +1,29 @@ +#ifndef _SYSDEP_TLS_H +#define _SYSDEP_TLS_H + +# ifndef __KERNEL__ + +/* Change name to avoid conflicts with the original one from <asm/ldt.h>, which + * may be named user_desc (but in 2.4 and in header matching its API was named + * modify_ldt_ldt_s). */ + +typedef struct um_dup_user_desc { + unsigned int entry_number; + unsigned int base_addr; + unsigned int limit; + unsigned int seg_32bit:1; + unsigned int contents:2; + unsigned int read_exec_only:1; + unsigned int limit_in_pages:1; + unsigned int seg_not_present:1; + unsigned int useable:1; + unsigned int lm:1; +} user_desc_t; + +# else /* __KERNEL__ */ + +# include <ldt.h> +typedef struct user_desc user_desc_t; + +# endif /* __KERNEL__ */ +#endif /* _SYSDEP_TLS_H */ diff --git a/arch/x86/um/signal_32.c b/arch/x86/um/signal_32.c new file mode 100644 index 000000000000..bcbfb0d64813 --- /dev/null +++ b/arch/x86/um/signal_32.c @@ -0,0 +1,498 @@ +/* + * Copyright (C) 2004 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <linux/ptrace.h> +#include <asm/unistd.h> +#include <asm/uaccess.h> +#include <asm/ucontext.h> +#include "frame_kern.h" +#include "skas.h" + +/* + * FPU tag word conversions. + */ + +static inline unsigned short twd_i387_to_fxsr(unsigned short twd) +{ + unsigned int tmp; /* to avoid 16 bit prefixes in the code */ + + /* Transform each pair of bits into 01 (valid) or 00 (empty) */ + tmp = ~twd; + tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */ + /* and move the valid bits to the lower byte. */ + tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */ + tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */ + tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */ + return tmp; +} + +static inline unsigned long twd_fxsr_to_i387(struct user_fxsr_struct *fxsave) +{ + struct _fpxreg *st = NULL; + unsigned long twd = (unsigned long) fxsave->twd; + unsigned long tag; + unsigned long ret = 0xffff0000; + int i; + +#define FPREG_ADDR(f, n) ((char *)&(f)->st_space + (n) * 16) + + for (i = 0; i < 8; i++) { + if (twd & 0x1) { + st = (struct _fpxreg *) FPREG_ADDR(fxsave, i); + + switch (st->exponent & 0x7fff) { + case 0x7fff: + tag = 2; /* Special */ + break; + case 0x0000: + if ( !st->significand[0] && + !st->significand[1] && + !st->significand[2] && + !st->significand[3] ) { + tag = 1; /* Zero */ + } else { + tag = 2; /* Special */ + } + break; + default: + if (st->significand[3] & 0x8000) { + tag = 0; /* Valid */ + } else { + tag = 2; /* Special */ + } + break; + } + } else { + tag = 3; /* Empty */ + } + ret |= (tag << (2 * i)); + twd = twd >> 1; + } + return ret; +} + +static int convert_fxsr_to_user(struct _fpstate __user *buf, + struct user_fxsr_struct *fxsave) +{ + unsigned long env[7]; + struct _fpreg __user *to; + struct _fpxreg *from; + int i; + + env[0] = (unsigned long)fxsave->cwd | 0xffff0000ul; + env[1] = (unsigned long)fxsave->swd | 0xffff0000ul; + env[2] = twd_fxsr_to_i387(fxsave); + env[3] = fxsave->fip; + env[4] = fxsave->fcs | ((unsigned long)fxsave->fop << 16); + env[5] = fxsave->foo; + env[6] = fxsave->fos; + + if (__copy_to_user(buf, env, 7 * sizeof(unsigned long))) + return 1; + + to = &buf->_st[0]; + from = (struct _fpxreg *) &fxsave->st_space[0]; + for (i = 0; i < 8; i++, to++, from++) { + unsigned long __user *t = (unsigned long __user *)to; + unsigned long *f = (unsigned long *)from; + + if (__put_user(*f, t) || + __put_user(*(f + 1), t + 1) || + __put_user(from->exponent, &to->exponent)) + return 1; + } + return 0; +} + +static int convert_fxsr_from_user(struct user_fxsr_struct *fxsave, + struct _fpstate __user *buf) +{ + unsigned long env[7]; + struct _fpxreg *to; + struct _fpreg __user *from; + int i; + + if (copy_from_user( env, buf, 7 * sizeof(long))) + return 1; + + fxsave->cwd = (unsigned short)(env[0] & 0xffff); + fxsave->swd = (unsigned short)(env[1] & 0xffff); + fxsave->twd = twd_i387_to_fxsr((unsigned short)(env[2] & 0xffff)); + fxsave->fip = env[3]; + fxsave->fop = (unsigned short)((env[4] & 0xffff0000ul) >> 16); + fxsave->fcs = (env[4] & 0xffff); + fxsave->foo = env[5]; + fxsave->fos = env[6]; + + to = (struct _fpxreg *) &fxsave->st_space[0]; + from = &buf->_st[0]; + for (i = 0; i < 8; i++, to++, from++) { + unsigned long *t = (unsigned long *)to; + unsigned long __user *f = (unsigned long __user *)from; + + if (__get_user(*t, f) || + __get_user(*(t + 1), f + 1) || + __get_user(to->exponent, &from->exponent)) + return 1; + } + return 0; +} + +extern int have_fpx_regs; + +static int copy_sc_from_user(struct pt_regs *regs, + struct sigcontext __user *from) +{ + struct sigcontext sc; + int err, pid; + + err = copy_from_user(&sc, from, sizeof(sc)); + if (err) + return err; + + pid = userspace_pid[current_thread_info()->cpu]; + +#define GETREG(regno, regname) regs->regs.gp[HOST_##regno] = sc.regname + + GETREG(GS, gs); + GETREG(FS, fs); + GETREG(ES, es); + GETREG(DS, ds); + GETREG(EDI, di); + GETREG(ESI, si); + GETREG(EBP, bp); + GETREG(SP, sp); + GETREG(EBX, bx); + GETREG(EDX, dx); + GETREG(ECX, cx); + GETREG(EAX, ax); + GETREG(IP, ip); + GETREG(CS, cs); + GETREG(EFLAGS, flags); + GETREG(SS, ss); + +#undef GETREG + if (have_fpx_regs) { + struct user_fxsr_struct fpx; + + err = copy_from_user(&fpx, + &((struct _fpstate __user *)sc.fpstate)->_fxsr_env[0], + sizeof(struct user_fxsr_struct)); + if (err) + return 1; + + err = convert_fxsr_from_user(&fpx, sc.fpstate); + if (err) + return 1; + + err = restore_fpx_registers(pid, (unsigned long *) &fpx); + if (err < 0) { + printk(KERN_ERR "copy_sc_from_user - " + "restore_fpx_registers failed, errno = %d\n", + -err); + return 1; + } + } else { + struct user_i387_struct fp; + + err = copy_from_user(&fp, sc.fpstate, + sizeof(struct user_i387_struct)); + if (err) + return 1; + + err = restore_fp_registers(pid, (unsigned long *) &fp); + if (err < 0) { + printk(KERN_ERR "copy_sc_from_user - " + "restore_fp_registers failed, errno = %d\n", + -err); + return 1; + } + } + + return 0; +} + +static int copy_sc_to_user(struct sigcontext __user *to, + struct _fpstate __user *to_fp, struct pt_regs *regs, + unsigned long sp) +{ + struct sigcontext sc; + struct faultinfo * fi = ¤t->thread.arch.faultinfo; + int err, pid; + memset(&sc, 0, sizeof(struct sigcontext)); + + sc.gs = REGS_GS(regs->regs.gp); + sc.fs = REGS_FS(regs->regs.gp); + sc.es = REGS_ES(regs->regs.gp); + sc.ds = REGS_DS(regs->regs.gp); + sc.di = REGS_EDI(regs->regs.gp); + sc.si = REGS_ESI(regs->regs.gp); + sc.bp = REGS_EBP(regs->regs.gp); + sc.sp = sp; + sc.bx = REGS_EBX(regs->regs.gp); + sc.dx = REGS_EDX(regs->regs.gp); + sc.cx = REGS_ECX(regs->regs.gp); + sc.ax = REGS_EAX(regs->regs.gp); + sc.ip = REGS_IP(regs->regs.gp); + sc.cs = REGS_CS(regs->regs.gp); + sc.flags = REGS_EFLAGS(regs->regs.gp); + sc.sp_at_signal = regs->regs.gp[UESP]; + sc.ss = regs->regs.gp[SS]; + sc.cr2 = fi->cr2; + sc.err = fi->error_code; + sc.trapno = fi->trap_no; + + to_fp = (to_fp ? to_fp : (struct _fpstate __user *) (to + 1)); + sc.fpstate = to_fp; + + pid = userspace_pid[current_thread_info()->cpu]; + if (have_fpx_regs) { + struct user_fxsr_struct fpx; + + err = save_fpx_registers(pid, (unsigned long *) &fpx); + if (err < 0){ + printk(KERN_ERR "copy_sc_to_user - save_fpx_registers " + "failed, errno = %d\n", err); + return 1; + } + + err = convert_fxsr_to_user(to_fp, &fpx); + if (err) + return 1; + + err |= __put_user(fpx.swd, &to_fp->status); + err |= __put_user(X86_FXSR_MAGIC, &to_fp->magic); + if (err) + return 1; + + if (copy_to_user(&to_fp->_fxsr_env[0], &fpx, + sizeof(struct user_fxsr_struct))) + return 1; + } + else { + struct user_i387_struct fp; + + err = save_fp_registers(pid, (unsigned long *) &fp); + if (copy_to_user(to_fp, &fp, sizeof(struct user_i387_struct))) + return 1; + } + + return copy_to_user(to, &sc, sizeof(sc)); +} + +static int copy_ucontext_to_user(struct ucontext __user *uc, + struct _fpstate __user *fp, sigset_t *set, + unsigned long sp) +{ + int err = 0; + + err |= put_user(current->sas_ss_sp, &uc->uc_stack.ss_sp); + err |= put_user(sas_ss_flags(sp), &uc->uc_stack.ss_flags); + err |= put_user(current->sas_ss_size, &uc->uc_stack.ss_size); + err |= copy_sc_to_user(&uc->uc_mcontext, fp, ¤t->thread.regs, sp); + err |= copy_to_user(&uc->uc_sigmask, set, sizeof(*set)); + return err; +} + +struct sigframe +{ + char __user *pretcode; + int sig; + struct sigcontext sc; + struct _fpstate fpstate; + unsigned long extramask[_NSIG_WORDS-1]; + char retcode[8]; +}; + +struct rt_sigframe +{ + char __user *pretcode; + int sig; + struct siginfo __user *pinfo; + void __user *puc; + struct siginfo info; + struct ucontext uc; + struct _fpstate fpstate; + char retcode[8]; +}; + +int setup_signal_stack_sc(unsigned long stack_top, int sig, + struct k_sigaction *ka, struct pt_regs *regs, + sigset_t *mask) +{ + struct sigframe __user *frame; + void __user *restorer; + unsigned long save_sp = PT_REGS_SP(regs); + int err = 0; + + /* This is the same calculation as i386 - ((sp + 4) & 15) == 0 */ + stack_top = ((stack_top + 4) & -16UL) - 4; + frame = (struct sigframe __user *) stack_top - 1; + if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) + return 1; + + restorer = frame->retcode; + if (ka->sa.sa_flags & SA_RESTORER) + restorer = ka->sa.sa_restorer; + + /* Update SP now because the page fault handler refuses to extend + * the stack if the faulting address is too far below the current + * SP, which frame now certainly is. If there's an error, the original + * value is restored on the way out. + * When writing the sigcontext to the stack, we have to write the + * original value, so that's passed to copy_sc_to_user, which does + * the right thing with it. + */ + PT_REGS_SP(regs) = (unsigned long) frame; + + err |= __put_user(restorer, &frame->pretcode); + err |= __put_user(sig, &frame->sig); + err |= copy_sc_to_user(&frame->sc, NULL, regs, save_sp); + err |= __put_user(mask->sig[0], &frame->sc.oldmask); + if (_NSIG_WORDS > 1) + err |= __copy_to_user(&frame->extramask, &mask->sig[1], + sizeof(frame->extramask)); + + /* + * This is popl %eax ; movl $,%eax ; int $0x80 + * + * WE DO NOT USE IT ANY MORE! It's only left here for historical + * reasons and because gdb uses it as a signature to notice + * signal handler stack frames. + */ + err |= __put_user(0xb858, (short __user *)(frame->retcode+0)); + err |= __put_user(__NR_sigreturn, (int __user *)(frame->retcode+2)); + err |= __put_user(0x80cd, (short __user *)(frame->retcode+6)); + + if (err) + goto err; + + PT_REGS_SP(regs) = (unsigned long) frame; + PT_REGS_IP(regs) = (unsigned long) ka->sa.sa_handler; + PT_REGS_EAX(regs) = (unsigned long) sig; + PT_REGS_EDX(regs) = (unsigned long) 0; + PT_REGS_ECX(regs) = (unsigned long) 0; + + if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED)) + ptrace_notify(SIGTRAP); + return 0; + +err: + PT_REGS_SP(regs) = save_sp; + return err; +} + +int setup_signal_stack_si(unsigned long stack_top, int sig, + struct k_sigaction *ka, struct pt_regs *regs, + siginfo_t *info, sigset_t *mask) +{ + struct rt_sigframe __user *frame; + void __user *restorer; + unsigned long save_sp = PT_REGS_SP(regs); + int err = 0; + + stack_top &= -8UL; + frame = (struct rt_sigframe __user *) stack_top - 1; + if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) + return 1; + + restorer = frame->retcode; + if (ka->sa.sa_flags & SA_RESTORER) + restorer = ka->sa.sa_restorer; + + /* See comment above about why this is here */ + PT_REGS_SP(regs) = (unsigned long) frame; + + err |= __put_user(restorer, &frame->pretcode); + err |= __put_user(sig, &frame->sig); + err |= __put_user(&frame->info, &frame->pinfo); + err |= __put_user(&frame->uc, &frame->puc); + err |= copy_siginfo_to_user(&frame->info, info); + err |= copy_ucontext_to_user(&frame->uc, &frame->fpstate, mask, + save_sp); + + /* + * This is movl $,%eax ; int $0x80 + * + * WE DO NOT USE IT ANY MORE! It's only left here for historical + * reasons and because gdb uses it as a signature to notice + * signal handler stack frames. + */ + err |= __put_user(0xb8, (char __user *)(frame->retcode+0)); + err |= __put_user(__NR_rt_sigreturn, (int __user *)(frame->retcode+1)); + err |= __put_user(0x80cd, (short __user *)(frame->retcode+5)); + + if (err) + goto err; + + PT_REGS_IP(regs) = (unsigned long) ka->sa.sa_handler; + PT_REGS_EAX(regs) = (unsigned long) sig; + PT_REGS_EDX(regs) = (unsigned long) &frame->info; + PT_REGS_ECX(regs) = (unsigned long) &frame->uc; + + if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED)) + ptrace_notify(SIGTRAP); + return 0; + +err: + PT_REGS_SP(regs) = save_sp; + return err; +} + +long sys_sigreturn(struct pt_regs regs) +{ + unsigned long sp = PT_REGS_SP(¤t->thread.regs); + struct sigframe __user *frame = (struct sigframe __user *)(sp - 8); + sigset_t set; + struct sigcontext __user *sc = &frame->sc; + unsigned long __user *oldmask = &sc->oldmask; + unsigned long __user *extramask = frame->extramask; + int sig_size = (_NSIG_WORDS - 1) * sizeof(unsigned long); + + if (copy_from_user(&set.sig[0], oldmask, sizeof(set.sig[0])) || + copy_from_user(&set.sig[1], extramask, sig_size)) + goto segfault; + + sigdelsetmask(&set, ~_BLOCKABLE); + set_current_blocked(&set); + + if (copy_sc_from_user(¤t->thread.regs, sc)) + goto segfault; + + /* Avoid ERESTART handling */ + PT_REGS_SYSCALL_NR(¤t->thread.regs) = -1; + return PT_REGS_SYSCALL_RET(¤t->thread.regs); + + segfault: + force_sig(SIGSEGV, current); + return 0; +} + +long sys_rt_sigreturn(struct pt_regs regs) +{ + unsigned long sp = PT_REGS_SP(¤t->thread.regs); + struct rt_sigframe __user *frame = + (struct rt_sigframe __user *) (sp - 4); + sigset_t set; + struct ucontext __user *uc = &frame->uc; + int sig_size = _NSIG_WORDS * sizeof(unsigned long); + + if (copy_from_user(&set, &uc->uc_sigmask, sig_size)) + goto segfault; + + sigdelsetmask(&set, ~_BLOCKABLE); + set_current_blocked(&set); + + if (copy_sc_from_user(¤t->thread.regs, &uc->uc_mcontext)) + goto segfault; + + /* Avoid ERESTART handling */ + PT_REGS_SYSCALL_NR(¤t->thread.regs) = -1; + return PT_REGS_SYSCALL_RET(¤t->thread.regs); + + segfault: + force_sig(SIGSEGV, current); + return 0; +} diff --git a/arch/x86/um/signal_64.c b/arch/x86/um/signal_64.c new file mode 100644 index 000000000000..255b2ca0ce67 --- /dev/null +++ b/arch/x86/um/signal_64.c @@ -0,0 +1,255 @@ +/* + * Copyright (C) 2003 PathScale, Inc. + * Copyright (C) 2003 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <linux/personality.h> +#include <linux/ptrace.h> +#include <linux/kernel.h> +#include <asm/unistd.h> +#include <asm/uaccess.h> +#include <asm/ucontext.h> +#include "frame_kern.h" +#include "skas.h" + +static int copy_sc_from_user(struct pt_regs *regs, + struct sigcontext __user *from) +{ + struct sigcontext sc; + struct user_i387_struct fp; + void __user *buf; + int err; + + err = copy_from_user(&sc, from, sizeof(sc)); + if (err) + return err; + +#define GETREG(regno, regname) regs->regs.gp[HOST_##regno] = sc.regname + + GETREG(R8, r8); + GETREG(R9, r9); + GETREG(R10, r10); + GETREG(R11, r11); + GETREG(R12, r12); + GETREG(R13, r13); + GETREG(R14, r14); + GETREG(R15, r15); + GETREG(RDI, di); + GETREG(RSI, si); + GETREG(RBP, bp); + GETREG(RBX, bx); + GETREG(RDX, dx); + GETREG(RAX, ax); + GETREG(RCX, cx); + GETREG(SP, sp); + GETREG(IP, ip); + GETREG(EFLAGS, flags); + GETREG(CS, cs); +#undef GETREG + + buf = sc.fpstate; + + err = copy_from_user(&fp, buf, sizeof(struct user_i387_struct)); + if (err) + return 1; + + err = restore_fp_registers(userspace_pid[current_thread_info()->cpu], + (unsigned long *) &fp); + if (err < 0) { + printk(KERN_ERR "copy_sc_from_user - " + "restore_fp_registers failed, errno = %d\n", + -err); + return 1; + } + + return 0; +} + +static int copy_sc_to_user(struct sigcontext __user *to, + struct _fpstate __user *to_fp, struct pt_regs *regs, + unsigned long mask, unsigned long sp) +{ + struct faultinfo * fi = ¤t->thread.arch.faultinfo; + struct sigcontext sc; + struct user_i387_struct fp; + int err = 0; + memset(&sc, 0, sizeof(struct sigcontext)); + +#define PUTREG(regno, regname) sc.regname = regs->regs.gp[HOST_##regno] + + PUTREG(RDI, di); + PUTREG(RSI, si); + PUTREG(RBP, bp); + /* + * Must use original RSP, which is passed in, rather than what's in + * signal frame. + */ + sc.sp = sp; + PUTREG(RBX, bx); + PUTREG(RDX, dx); + PUTREG(RCX, cx); + PUTREG(RAX, ax); + PUTREG(R8, r8); + PUTREG(R9, r9); + PUTREG(R10, r10); + PUTREG(R11, r11); + PUTREG(R12, r12); + PUTREG(R13, r13); + PUTREG(R14, r14); + PUTREG(R15, r15); + PUTREG(CS, cs); /* XXX x86_64 doesn't do this */ + + sc.cr2 = fi->cr2; + sc.err = fi->error_code; + sc.trapno = fi->trap_no; + + PUTREG(IP, ip); + PUTREG(EFLAGS, flags); +#undef PUTREG + + sc.oldmask = mask; + + err = copy_to_user(to, &sc, sizeof(struct sigcontext)); + if (err) + return 1; + + err = save_fp_registers(userspace_pid[current_thread_info()->cpu], + (unsigned long *) &fp); + if (err < 0) { + printk(KERN_ERR "copy_sc_from_user - restore_fp_registers " + "failed, errno = %d\n", -err); + return 1; + } + + if (copy_to_user(to_fp, &fp, sizeof(struct user_i387_struct))) + return 1; + + return err; +} + +struct rt_sigframe +{ + char __user *pretcode; + struct ucontext uc; + struct siginfo info; + struct _fpstate fpstate; +}; + +int setup_signal_stack_si(unsigned long stack_top, int sig, + struct k_sigaction *ka, struct pt_regs * regs, + siginfo_t *info, sigset_t *set) +{ + struct rt_sigframe __user *frame; + unsigned long save_sp = PT_REGS_RSP(regs); + int err = 0; + struct task_struct *me = current; + + frame = (struct rt_sigframe __user *) + round_down(stack_top - sizeof(struct rt_sigframe), 16); + /* Subtract 128 for a red zone and 8 for proper alignment */ + frame = (struct rt_sigframe __user *) ((unsigned long) frame - 128 - 8); + + if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) + goto out; + + if (ka->sa.sa_flags & SA_SIGINFO) { + err |= copy_siginfo_to_user(&frame->info, info); + if (err) + goto out; + } + + /* + * Update SP now because the page fault handler refuses to extend + * the stack if the faulting address is too far below the current + * SP, which frame now certainly is. If there's an error, the original + * value is restored on the way out. + * When writing the sigcontext to the stack, we have to write the + * original value, so that's passed to copy_sc_to_user, which does + * the right thing with it. + */ + PT_REGS_RSP(regs) = (unsigned long) frame; + + /* Create the ucontext. */ + err |= __put_user(0, &frame->uc.uc_flags); + err |= __put_user(0, &frame->uc.uc_link); + err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp); + err |= __put_user(sas_ss_flags(save_sp), + &frame->uc.uc_stack.ss_flags); + err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size); + err |= copy_sc_to_user(&frame->uc.uc_mcontext, &frame->fpstate, regs, + set->sig[0], save_sp); + err |= __put_user(&frame->fpstate, &frame->uc.uc_mcontext.fpstate); + if (sizeof(*set) == 16) { + __put_user(set->sig[0], &frame->uc.uc_sigmask.sig[0]); + __put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]); + } + else + err |= __copy_to_user(&frame->uc.uc_sigmask, set, + sizeof(*set)); + + /* + * Set up to return from userspace. If provided, use a stub + * already in userspace. + */ + /* x86-64 should always use SA_RESTORER. */ + if (ka->sa.sa_flags & SA_RESTORER) + err |= __put_user(ka->sa.sa_restorer, &frame->pretcode); + else + /* could use a vstub here */ + goto restore_sp; + + if (err) + goto restore_sp; + + /* Set up registers for signal handler */ + { + struct exec_domain *ed = current_thread_info()->exec_domain; + if (unlikely(ed && ed->signal_invmap && sig < 32)) + sig = ed->signal_invmap[sig]; + } + + PT_REGS_RDI(regs) = sig; + /* In case the signal handler was declared without prototypes */ + PT_REGS_RAX(regs) = 0; + + /* + * This also works for non SA_SIGINFO handlers because they expect the + * next argument after the signal number on the stack. + */ + PT_REGS_RSI(regs) = (unsigned long) &frame->info; + PT_REGS_RDX(regs) = (unsigned long) &frame->uc; + PT_REGS_RIP(regs) = (unsigned long) ka->sa.sa_handler; + out: + return err; + +restore_sp: + PT_REGS_RSP(regs) = save_sp; + return err; +} + +long sys_rt_sigreturn(struct pt_regs *regs) +{ + unsigned long sp = PT_REGS_SP(¤t->thread.regs); + struct rt_sigframe __user *frame = + (struct rt_sigframe __user *)(sp - 8); + struct ucontext __user *uc = &frame->uc; + sigset_t set; + + if (copy_from_user(&set, &uc->uc_sigmask, sizeof(set))) + goto segfault; + + sigdelsetmask(&set, ~_BLOCKABLE); + set_current_blocked(&set); + + if (copy_sc_from_user(¤t->thread.regs, &uc->uc_mcontext)) + goto segfault; + + /* Avoid ERESTART handling */ + PT_REGS_SYSCALL_NR(¤t->thread.regs) = -1; + return PT_REGS_SYSCALL_RET(¤t->thread.regs); + + segfault: + force_sig(SIGSEGV, current); + return 0; +} diff --git a/arch/x86/um/stub_32.S b/arch/x86/um/stub_32.S new file mode 100644 index 000000000000..54a36ec20cb7 --- /dev/null +++ b/arch/x86/um/stub_32.S @@ -0,0 +1,51 @@ +#include "as-layout.h" + + .globl syscall_stub +.section .__syscall_stub, "ax" + + .globl batch_syscall_stub +batch_syscall_stub: + /* load pointer to first operation */ + mov $(STUB_DATA+8), %esp + +again: + /* load length of additional data */ + mov 0x0(%esp), %eax + + /* if(length == 0) : end of list */ + /* write possible 0 to header */ + mov %eax, STUB_DATA+4 + cmpl $0, %eax + jz done + + /* save current pointer */ + mov %esp, STUB_DATA+4 + + /* skip additional data */ + add %eax, %esp + + /* load syscall-# */ + pop %eax + + /* load syscall params */ + pop %ebx + pop %ecx + pop %edx + pop %esi + pop %edi + pop %ebp + + /* execute syscall */ + int $0x80 + + /* check return value */ + pop %ebx + cmp %ebx, %eax + je again + +done: + /* save return value */ + mov %eax, STUB_DATA + + /* stop */ + int3 diff --git a/arch/x86/um/stub_64.S b/arch/x86/um/stub_64.S new file mode 100644 index 000000000000..20e4a96a6dcb --- /dev/null +++ b/arch/x86/um/stub_64.S @@ -0,0 +1,66 @@ +#include "as-layout.h" + + .globl syscall_stub +.section .__syscall_stub, "ax" +syscall_stub: + syscall + /* We don't have 64-bit constants, so this constructs the address + * we need. + */ + movq $(STUB_DATA >> 32), %rbx + salq $32, %rbx + movq $(STUB_DATA & 0xffffffff), %rcx + or %rcx, %rbx + movq %rax, (%rbx) + int3 + + .globl batch_syscall_stub +batch_syscall_stub: + mov $(STUB_DATA >> 32), %rbx + sal $32, %rbx + mov $(STUB_DATA & 0xffffffff), %rax + or %rax, %rbx + /* load pointer to first operation */ + mov %rbx, %rsp + add $0x10, %rsp +again: + /* load length of additional data */ + mov 0x0(%rsp), %rax + + /* if(length == 0) : end of list */ + /* write possible 0 to header */ + mov %rax, 8(%rbx) + cmp $0, %rax + jz done + + /* save current pointer */ + mov %rsp, 8(%rbx) + + /* skip additional data */ + add %rax, %rsp + + /* load syscall-# */ + pop %rax + + /* load syscall params */ + pop %rdi + pop %rsi + pop %rdx + pop %r10 + pop %r8 + pop %r9 + + /* execute syscall */ + syscall + + /* check return value */ + pop %rcx + cmp %rcx, %rax + je again + +done: + /* save return value */ + mov %rax, (%rbx) + + /* stop */ + int3 diff --git a/arch/x86/um/stub_segv.c b/arch/x86/um/stub_segv.c new file mode 100644 index 000000000000..b7450bd22e7d --- /dev/null +++ b/arch/x86/um/stub_segv.c @@ -0,0 +1,19 @@ +/* + * Copyright (C) 2004 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include "sysdep/stub.h" +#include "sysdep/faultinfo.h" +#include "sysdep/mcontext.h" + +void __attribute__ ((__section__ (".__syscall_stub"))) +stub_segv_handler(int sig, siginfo_t *info, void *p) +{ + struct ucontext *uc = p; + + GET_FAULTINFO_FROM_MC(*((struct faultinfo *) STUB_DATA), + &uc->uc_mcontext); + trap_myself(); +} + diff --git a/arch/x86/um/sys_call_table_32.S b/arch/x86/um/sys_call_table_32.S new file mode 100644 index 000000000000..de274071455d --- /dev/null +++ b/arch/x86/um/sys_call_table_32.S @@ -0,0 +1,28 @@ +#include <linux/linkage.h> +/* Steal i386 syscall table for our purposes, but with some slight changes.*/ + +#define sys_iopl sys_ni_syscall +#define sys_ioperm sys_ni_syscall + +#define sys_vm86old sys_ni_syscall +#define sys_vm86 sys_ni_syscall + +#define old_mmap sys_old_mmap + +#define ptregs_fork sys_fork +#define ptregs_execve sys_execve +#define ptregs_iopl sys_iopl +#define ptregs_vm86old sys_vm86old +#define ptregs_sigreturn sys_sigreturn +#define ptregs_clone sys_clone +#define ptregs_vm86 sys_vm86 +#define ptregs_rt_sigreturn sys_rt_sigreturn +#define ptregs_sigaltstack sys_sigaltstack +#define ptregs_vfork sys_vfork + +.section .rodata,"a" + +#include "../../x86/kernel/syscall_table_32.S" + +ENTRY(syscall_table_size) +.long .-sys_call_table diff --git a/arch/x86/um/sys_call_table_64.c b/arch/x86/um/sys_call_table_64.c new file mode 100644 index 000000000000..f46de82d675c --- /dev/null +++ b/arch/x86/um/sys_call_table_64.c @@ -0,0 +1,64 @@ +/* + * System call table for UML/x86-64, copied from arch/x86_64/kernel/syscall.c + * with some changes for UML. + */ + +#include <linux/linkage.h> +#include <linux/sys.h> +#include <linux/cache.h> + +#define __NO_STUBS + +/* + * Below you can see, in terms of #define's, the differences between the x86-64 + * and the UML syscall table. + */ + +/* Not going to be implemented by UML, since we have no hardware. */ +#define stub_iopl sys_ni_syscall +#define sys_ioperm sys_ni_syscall + +/* + * The UML TLS problem. Note that x86_64 does not implement this, so the below + * is needed only for the ia32 compatibility. + */ + +/* On UML we call it this way ("old" means it's not mmap2) */ +#define sys_mmap old_mmap + +#define stub_clone sys_clone +#define stub_fork sys_fork +#define stub_vfork sys_vfork +#define stub_execve sys_execve +#define stub_rt_sigsuspend sys_rt_sigsuspend +#define stub_sigaltstack sys_sigaltstack +#define stub_rt_sigreturn sys_rt_sigreturn + +#define __SYSCALL(nr, sym) extern asmlinkage void sym(void) ; +#undef _ASM_X86_UNISTD_64_H +#include "../../x86/include/asm/unistd_64.h" + +#undef __SYSCALL +#define __SYSCALL(nr, sym) [ nr ] = sym, +#undef _ASM_X86_UNISTD_64_H + +typedef void (*sys_call_ptr_t)(void); + +extern void sys_ni_syscall(void); + +/* + * We used to have a trick here which made sure that holes in the + * x86_64 table were filled in with sys_ni_syscall, but a comment in + * unistd_64.h says that holes aren't allowed, so the trick was + * removed. + * The trick looked like this + * [0 ... UM_NR_syscall_max] = &sys_ni_syscall + * before including unistd_64.h - the later initializations overwrote + * the sys_ni_syscall filler. + */ + +sys_call_ptr_t sys_call_table[] __cacheline_aligned = { +#include "../../x86/include/asm/unistd_64.h" +}; + +int syscall_table_size = sizeof(sys_call_table); diff --git a/arch/x86/um/syscalls_32.c b/arch/x86/um/syscalls_32.c new file mode 100644 index 000000000000..70ca357393b8 --- /dev/null +++ b/arch/x86/um/syscalls_32.c @@ -0,0 +1,66 @@ +/* + * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) + * Licensed under the GPL + */ + +#include "linux/sched.h" +#include "linux/shm.h" +#include "linux/ipc.h" +#include "linux/syscalls.h" +#include "asm/mman.h" +#include "asm/uaccess.h" +#include "asm/unistd.h" + +/* + * The prototype on i386 is: + * + * int clone(int flags, void * child_stack, int * parent_tidptr, struct user_desc * newtls, int * child_tidptr) + * + * and the "newtls" arg. on i386 is read by copy_thread directly from the + * register saved on the stack. + */ +long sys_clone(unsigned long clone_flags, unsigned long newsp, + int __user *parent_tid, void *newtls, int __user *child_tid) +{ + long ret; + + if (!newsp) + newsp = UPT_SP(¤t->thread.regs.regs); + + current->thread.forking = 1; + ret = do_fork(clone_flags, newsp, ¤t->thread.regs, 0, parent_tid, + child_tid); + current->thread.forking = 0; + return ret; +} + +long sys_sigaction(int sig, const struct old_sigaction __user *act, + struct old_sigaction __user *oact) +{ + struct k_sigaction new_ka, old_ka; + int ret; + + if (act) { + old_sigset_t mask; + if (!access_ok(VERIFY_READ, act, sizeof(*act)) || + __get_user(new_ka.sa.sa_handler, &act->sa_handler) || + __get_user(new_ka.sa.sa_restorer, &act->sa_restorer)) + return -EFAULT; + __get_user(new_ka.sa.sa_flags, &act->sa_flags); + __get_user(mask, &act->sa_mask); + siginitset(&new_ka.sa.sa_mask, mask); + } + + ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); + + if (!ret && oact) { + if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) || + __put_user(old_ka.sa.sa_handler, &oact->sa_handler) || + __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer)) + return -EFAULT; + __put_user(old_ka.sa.sa_flags, &oact->sa_flags); + __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); + } + + return ret; +} diff --git a/arch/x86/um/syscalls_64.c b/arch/x86/um/syscalls_64.c new file mode 100644 index 000000000000..f3d82bb6e15a --- /dev/null +++ b/arch/x86/um/syscalls_64.c @@ -0,0 +1,102 @@ +/* + * Copyright (C) 2003 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Copyright 2003 PathScale, Inc. + * + * Licensed under the GPL + */ + +#include "linux/linkage.h" +#include "linux/personality.h" +#include "linux/utsname.h" +#include "asm/prctl.h" /* XXX This should get the constants from libc */ +#include "asm/uaccess.h" +#include "os.h" + +long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr) +{ + unsigned long *ptr = addr, tmp; + long ret; + int pid = task->mm->context.id.u.pid; + + /* + * With ARCH_SET_FS (and ARCH_SET_GS is treated similarly to + * be safe), we need to call arch_prctl on the host because + * setting %fs may result in something else happening (like a + * GDT or thread.fs being set instead). So, we let the host + * fiddle the registers and thread struct and restore the + * registers afterwards. + * + * So, the saved registers are stored to the process (this + * needed because a stub may have been the last thing to run), + * arch_prctl is run on the host, then the registers are read + * back. + */ + switch (code) { + case ARCH_SET_FS: + case ARCH_SET_GS: + ret = restore_registers(pid, ¤t->thread.regs.regs); + if (ret) + return ret; + break; + case ARCH_GET_FS: + case ARCH_GET_GS: + /* + * With these two, we read to a local pointer and + * put_user it to the userspace pointer that we were + * given. If addr isn't valid (because it hasn't been + * faulted in or is just bogus), we want put_user to + * fault it in (or return -EFAULT) instead of having + * the host return -EFAULT. + */ + ptr = &tmp; + } + + ret = os_arch_prctl(pid, code, ptr); + if (ret) + return ret; + + switch (code) { + case ARCH_SET_FS: + current->thread.arch.fs = (unsigned long) ptr; + ret = save_registers(pid, ¤t->thread.regs.regs); + break; + case ARCH_SET_GS: + ret = save_registers(pid, ¤t->thread.regs.regs); + break; + case ARCH_GET_FS: + ret = put_user(tmp, addr); + break; + case ARCH_GET_GS: + ret = put_user(tmp, addr); + break; + } + + return ret; +} + +long sys_arch_prctl(int code, unsigned long addr) +{ + return arch_prctl(current, code, (unsigned long __user *) addr); +} + +long sys_clone(unsigned long clone_flags, unsigned long newsp, + void __user *parent_tid, void __user *child_tid) +{ + long ret; + + if (!newsp) + newsp = UPT_SP(¤t->thread.regs.regs); + current->thread.forking = 1; + ret = do_fork(clone_flags, newsp, ¤t->thread.regs, 0, parent_tid, + child_tid); + current->thread.forking = 0; + return ret; +} + +void arch_switch_to(struct task_struct *to) +{ + if ((to->thread.arch.fs == 0) || (to->mm == NULL)) + return; + + arch_prctl(to, ARCH_SET_FS, (void __user *) to->thread.arch.fs); +} diff --git a/arch/x86/um/sysrq_32.c b/arch/x86/um/sysrq_32.c new file mode 100644 index 000000000000..171b3e9dc867 --- /dev/null +++ b/arch/x86/um/sysrq_32.c @@ -0,0 +1,101 @@ +/* + * Copyright (C) 2001 - 2003 Jeff Dike (jdike@addtoit.com) + * Licensed under the GPL + */ + +#include "linux/kernel.h" +#include "linux/smp.h" +#include "linux/sched.h" +#include "linux/kallsyms.h" +#include "asm/ptrace.h" +#include "sysrq.h" + +/* This is declared by <linux/sched.h> */ +void show_regs(struct pt_regs *regs) +{ + printk("\n"); + printk("EIP: %04lx:[<%08lx>] CPU: %d %s", + 0xffff & PT_REGS_CS(regs), PT_REGS_IP(regs), + smp_processor_id(), print_tainted()); + if (PT_REGS_CS(regs) & 3) + printk(" ESP: %04lx:%08lx", 0xffff & PT_REGS_SS(regs), + PT_REGS_SP(regs)); + printk(" EFLAGS: %08lx\n %s\n", PT_REGS_EFLAGS(regs), + print_tainted()); + printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", + PT_REGS_EAX(regs), PT_REGS_EBX(regs), + PT_REGS_ECX(regs), + PT_REGS_EDX(regs)); + printk("ESI: %08lx EDI: %08lx EBP: %08lx", + PT_REGS_ESI(regs), PT_REGS_EDI(regs), + PT_REGS_EBP(regs)); + printk(" DS: %04lx ES: %04lx\n", + 0xffff & PT_REGS_DS(regs), + 0xffff & PT_REGS_ES(regs)); + + show_trace(NULL, (unsigned long *) ®s); +} + +/* Copied from i386. */ +static inline int valid_stack_ptr(struct thread_info *tinfo, void *p) +{ + return p > (void *)tinfo && + p < (void *)tinfo + THREAD_SIZE - 3; +} + +/* Adapted from i386 (we also print the address we read from). */ +static inline unsigned long print_context_stack(struct thread_info *tinfo, + unsigned long *stack, unsigned long ebp) +{ + unsigned long addr; + +#ifdef CONFIG_FRAME_POINTER + while (valid_stack_ptr(tinfo, (void *)ebp)) { + addr = *(unsigned long *)(ebp + 4); + printk("%08lx: [<%08lx>]", ebp + 4, addr); + print_symbol(" %s", addr); + printk("\n"); + ebp = *(unsigned long *)ebp; + } +#else + while (valid_stack_ptr(tinfo, stack)) { + addr = *stack; + if (__kernel_text_address(addr)) { + printk("%08lx: [<%08lx>]", (unsigned long) stack, addr); + print_symbol(" %s", addr); + printk("\n"); + } + stack++; + } +#endif + return ebp; +} + +void show_trace(struct task_struct* task, unsigned long * stack) +{ + unsigned long ebp; + struct thread_info *context; + + /* Turn this into BUG_ON if possible. */ + if (!stack) { + stack = (unsigned long*) &stack; + printk("show_trace: got NULL stack, implicit assumption task == current"); + WARN_ON(1); + } + + if (!task) + task = current; + + if (task != current) { + ebp = (unsigned long) KSTK_EBP(task); + } else { + asm ("movl %%ebp, %0" : "=r" (ebp) : ); + } + + context = (struct thread_info *) + ((unsigned long)stack & (~(THREAD_SIZE - 1))); + print_context_stack(context, stack, ebp); + + printk("\n"); +} + diff --git a/arch/x86/um/sysrq_64.c b/arch/x86/um/sysrq_64.c new file mode 100644 index 000000000000..f4f82beb3508 --- /dev/null +++ b/arch/x86/um/sysrq_64.c @@ -0,0 +1,41 @@ +/* + * Copyright 2003 PathScale, Inc. + * + * Licensed under the GPL + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/utsname.h> +#include <asm/current.h> +#include <asm/ptrace.h> +#include "sysrq.h" + +void __show_regs(struct pt_regs *regs) +{ + printk("\n"); + print_modules(); + printk(KERN_INFO "Pid: %d, comm: %.20s %s %s\n", task_pid_nr(current), + current->comm, print_tainted(), init_utsname()->release); + printk(KERN_INFO "RIP: %04lx:[<%016lx>]\n", PT_REGS_CS(regs) & 0xffff, + PT_REGS_RIP(regs)); + printk(KERN_INFO "RSP: %016lx EFLAGS: %08lx\n", PT_REGS_RSP(regs), + PT_REGS_EFLAGS(regs)); + printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n", + PT_REGS_RAX(regs), PT_REGS_RBX(regs), PT_REGS_RCX(regs)); + printk(KERN_INFO "RDX: %016lx RSI: %016lx RDI: %016lx\n", + PT_REGS_RDX(regs), PT_REGS_RSI(regs), PT_REGS_RDI(regs)); + printk(KERN_INFO "RBP: %016lx R08: %016lx R09: %016lx\n", + PT_REGS_RBP(regs), PT_REGS_R8(regs), PT_REGS_R9(regs)); + printk(KERN_INFO "R10: %016lx R11: %016lx R12: %016lx\n", + PT_REGS_R10(regs), PT_REGS_R11(regs), PT_REGS_R12(regs)); + printk(KERN_INFO "R13: %016lx R14: %016lx R15: %016lx\n", + PT_REGS_R13(regs), PT_REGS_R14(regs), PT_REGS_R15(regs)); +} + +void show_regs(struct pt_regs *regs) +{ + __show_regs(regs); + show_trace(current, (unsigned long *) ®s); +} diff --git a/arch/x86/um/tls_32.c b/arch/x86/um/tls_32.c new file mode 100644 index 000000000000..c6c7131e563b --- /dev/null +++ b/arch/x86/um/tls_32.c @@ -0,0 +1,396 @@ +/* + * Copyright (C) 2005 Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it> + * Licensed under the GPL + */ + +#include "linux/percpu.h" +#include "linux/sched.h" +#include "asm/uaccess.h" +#include "os.h" +#include "skas.h" +#include "sysdep/tls.h" + +/* + * If needed we can detect when it's uninitialized. + * + * These are initialized in an initcall and unchanged thereafter. + */ +static int host_supports_tls = -1; +int host_gdt_entry_tls_min; + +int do_set_thread_area(struct user_desc *info) +{ + int ret; + u32 cpu; + + cpu = get_cpu(); + ret = os_set_thread_area(info, userspace_pid[cpu]); + put_cpu(); + + if (ret) + printk(KERN_ERR "PTRACE_SET_THREAD_AREA failed, err = %d, " + "index = %d\n", ret, info->entry_number); + + return ret; +} + +int do_get_thread_area(struct user_desc *info) +{ + int ret; + u32 cpu; + + cpu = get_cpu(); + ret = os_get_thread_area(info, userspace_pid[cpu]); + put_cpu(); + + if (ret) + printk(KERN_ERR "PTRACE_GET_THREAD_AREA failed, err = %d, " + "index = %d\n", ret, info->entry_number); + + return ret; +} + +/* + * sys_get_thread_area: get a yet unused TLS descriptor index. + * XXX: Consider leaving one free slot for glibc usage at first place. This must + * be done here (and by changing GDT_ENTRY_TLS_* macros) and nowhere else. + * + * Also, this must be tested when compiling in SKAS mode with dynamic linking + * and running against NPTL. + */ +static int get_free_idx(struct task_struct* task) +{ + struct thread_struct *t = &task->thread; + int idx; + + if (!t->arch.tls_array) + return GDT_ENTRY_TLS_MIN; + + for (idx = 0; idx < GDT_ENTRY_TLS_ENTRIES; idx++) + if (!t->arch.tls_array[idx].present) + return idx + GDT_ENTRY_TLS_MIN; + return -ESRCH; +} + +static inline void clear_user_desc(struct user_desc* info) +{ + /* Postcondition: LDT_empty(info) returns true. */ + memset(info, 0, sizeof(*info)); + + /* + * Check the LDT_empty or the i386 sys_get_thread_area code - we obtain + * indeed an empty user_desc. + */ + info->read_exec_only = 1; + info->seg_not_present = 1; +} + +#define O_FORCE 1 + +static int load_TLS(int flags, struct task_struct *to) +{ + int ret = 0; + int idx; + + for (idx = GDT_ENTRY_TLS_MIN; idx < GDT_ENTRY_TLS_MAX; idx++) { + struct uml_tls_struct* curr = + &to->thread.arch.tls_array[idx - GDT_ENTRY_TLS_MIN]; + + /* + * Actually, now if it wasn't flushed it gets cleared and + * flushed to the host, which will clear it. + */ + if (!curr->present) { + if (!curr->flushed) { + clear_user_desc(&curr->tls); + curr->tls.entry_number = idx; + } else { + WARN_ON(!LDT_empty(&curr->tls)); + continue; + } + } + + if (!(flags & O_FORCE) && curr->flushed) + continue; + + ret = do_set_thread_area(&curr->tls); + if (ret) + goto out; + + curr->flushed = 1; + } +out: + return ret; +} + +/* + * Verify if we need to do a flush for the new process, i.e. if there are any + * present desc's, only if they haven't been flushed. + */ +static inline int needs_TLS_update(struct task_struct *task) +{ + int i; + int ret = 0; + + for (i = GDT_ENTRY_TLS_MIN; i < GDT_ENTRY_TLS_MAX; i++) { + struct uml_tls_struct* curr = + &task->thread.arch.tls_array[i - GDT_ENTRY_TLS_MIN]; + + /* + * Can't test curr->present, we may need to clear a descriptor + * which had a value. + */ + if (curr->flushed) + continue; + ret = 1; + break; + } + return ret; +} + +/* + * On a newly forked process, the TLS descriptors haven't yet been flushed. So + * we mark them as such and the first switch_to will do the job. + */ +void clear_flushed_tls(struct task_struct *task) +{ + int i; + + for (i = GDT_ENTRY_TLS_MIN; i < GDT_ENTRY_TLS_MAX; i++) { + struct uml_tls_struct* curr = + &task->thread.arch.tls_array[i - GDT_ENTRY_TLS_MIN]; + + /* + * Still correct to do this, if it wasn't present on the host it + * will remain as flushed as it was. + */ + if (!curr->present) + continue; + + curr->flushed = 0; + } +} + +/* + * In SKAS0 mode, currently, multiple guest threads sharing the same ->mm have a + * common host process. So this is needed in SKAS0 too. + * + * However, if each thread had a different host process (and this was discussed + * for SMP support) this won't be needed. + * + * And this will not need be used when (and if) we'll add support to the host + * SKAS patch. + */ + +int arch_switch_tls(struct task_struct *to) +{ + if (!host_supports_tls) + return 0; + + /* + * We have no need whatsoever to switch TLS for kernel threads; beyond + * that, that would also result in us calling os_set_thread_area with + * userspace_pid[cpu] == 0, which gives an error. + */ + if (likely(to->mm)) + return load_TLS(O_FORCE, to); + + return 0; +} + +static int set_tls_entry(struct task_struct* task, struct user_desc *info, + int idx, int flushed) +{ + struct thread_struct *t = &task->thread; + + if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) + return -EINVAL; + + t->arch.tls_array[idx - GDT_ENTRY_TLS_MIN].tls = *info; + t->arch.tls_array[idx - GDT_ENTRY_TLS_MIN].present = 1; + t->arch.tls_array[idx - GDT_ENTRY_TLS_MIN].flushed = flushed; + + return 0; +} + +int arch_copy_tls(struct task_struct *new) +{ + struct user_desc info; + int idx, ret = -EFAULT; + + if (copy_from_user(&info, + (void __user *) UPT_ESI(&new->thread.regs.regs), + sizeof(info))) + goto out; + + ret = -EINVAL; + if (LDT_empty(&info)) + goto out; + + idx = info.entry_number; + + ret = set_tls_entry(new, &info, idx, 0); +out: + return ret; +} + +/* XXX: use do_get_thread_area to read the host value? I'm not at all sure! */ +static int get_tls_entry(struct task_struct *task, struct user_desc *info, + int idx) +{ + struct thread_struct *t = &task->thread; + + if (!t->arch.tls_array) + goto clear; + + if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) + return -EINVAL; + + if (!t->arch.tls_array[idx - GDT_ENTRY_TLS_MIN].present) + goto clear; + + *info = t->arch.tls_array[idx - GDT_ENTRY_TLS_MIN].tls; + +out: + /* + * Temporary debugging check, to make sure that things have been + * flushed. This could be triggered if load_TLS() failed. + */ + if (unlikely(task == current && + !t->arch.tls_array[idx - GDT_ENTRY_TLS_MIN].flushed)) { + printk(KERN_ERR "get_tls_entry: task with pid %d got here " + "without flushed TLS.", current->pid); + } + + return 0; +clear: + /* + * When the TLS entry has not been set, the values read to user in the + * tls_array are 0 (because it's cleared at boot, see + * arch/i386/kernel/head.S:cpu_gdt_table). Emulate that. + */ + clear_user_desc(info); + info->entry_number = idx; + goto out; +} + +int sys_set_thread_area(struct user_desc __user *user_desc) +{ + struct user_desc info; + int idx, ret; + + if (!host_supports_tls) + return -ENOSYS; + + if (copy_from_user(&info, user_desc, sizeof(info))) + return -EFAULT; + + idx = info.entry_number; + + if (idx == -1) { + idx = get_free_idx(current); + if (idx < 0) + return idx; + info.entry_number = idx; + /* Tell the user which slot we chose for him.*/ + if (put_user(idx, &user_desc->entry_number)) + return -EFAULT; + } + + ret = do_set_thread_area(&info); + if (ret) + return ret; + return set_tls_entry(current, &info, idx, 1); +} + +/* + * Perform set_thread_area on behalf of the traced child. + * Note: error handling is not done on the deferred load, and this differ from + * i386. However the only possible error are caused by bugs. + */ +int ptrace_set_thread_area(struct task_struct *child, int idx, + struct user_desc __user *user_desc) +{ + struct user_desc info; + + if (!host_supports_tls) + return -EIO; + + if (copy_from_user(&info, user_desc, sizeof(info))) + return -EFAULT; + + return set_tls_entry(child, &info, idx, 0); +} + +int sys_get_thread_area(struct user_desc __user *user_desc) +{ + struct user_desc info; + int idx, ret; + + if (!host_supports_tls) + return -ENOSYS; + + if (get_user(idx, &user_desc->entry_number)) + return -EFAULT; + + ret = get_tls_entry(current, &info, idx); + if (ret < 0) + goto out; + + if (copy_to_user(user_desc, &info, sizeof(info))) + ret = -EFAULT; + +out: + return ret; +} + +/* + * Perform get_thread_area on behalf of the traced child. + */ +int ptrace_get_thread_area(struct task_struct *child, int idx, + struct user_desc __user *user_desc) +{ + struct user_desc info; + int ret; + + if (!host_supports_tls) + return -EIO; + + ret = get_tls_entry(child, &info, idx); + if (ret < 0) + goto out; + + if (copy_to_user(user_desc, &info, sizeof(info))) + ret = -EFAULT; +out: + return ret; +} + +/* + * This code is really i386-only, but it detects and logs x86_64 GDT indexes + * if a 32-bit UML is running on a 64-bit host. + */ +static int __init __setup_host_supports_tls(void) +{ + check_host_supports_tls(&host_supports_tls, &host_gdt_entry_tls_min); + if (host_supports_tls) { + printk(KERN_INFO "Host TLS support detected\n"); + printk(KERN_INFO "Detected host type: "); + switch (host_gdt_entry_tls_min) { + case GDT_ENTRY_TLS_MIN_I386: + printk(KERN_CONT "i386"); + break; + case GDT_ENTRY_TLS_MIN_X86_64: + printk(KERN_CONT "x86_64"); + break; + } + printk(KERN_CONT " (GDT indexes %d to %d)\n", + host_gdt_entry_tls_min, + host_gdt_entry_tls_min + GDT_ENTRY_TLS_ENTRIES); + } else + printk(KERN_ERR " Host TLS support NOT detected! " + "TLS support inside UML will not work\n"); + return 0; +} + +__initcall(__setup_host_supports_tls); diff --git a/arch/x86/um/tls_64.c b/arch/x86/um/tls_64.c new file mode 100644 index 000000000000..f7ba46200ecd --- /dev/null +++ b/arch/x86/um/tls_64.c @@ -0,0 +1,17 @@ +#include "linux/sched.h" + +void clear_flushed_tls(struct task_struct *task) +{ +} + +int arch_copy_tls(struct task_struct *t) +{ + /* + * If CLONE_SETTLS is set, we need to save the thread id + * (which is argument 5, child_tid, of clone) so it can be set + * during context switches. + */ + t->thread.arch.fs = t->thread.regs.regs.gp[R8 / sizeof(long)]; + + return 0; +} diff --git a/arch/x86/um/user-offsets.c b/arch/x86/um/user-offsets.c new file mode 100644 index 000000000000..3c19c48a1d48 --- /dev/null +++ b/arch/x86/um/user-offsets.c @@ -0,0 +1,79 @@ +#include <stdio.h> +#include <stddef.h> +#include <signal.h> +#include <sys/poll.h> +#include <sys/mman.h> +#include <sys/user.h> +#define __FRAME_OFFSETS +#include <asm/ptrace.h> +#include <asm/types.h> + +#define DEFINE(sym, val) \ + asm volatile("\n->" #sym " %0 " #val : : "i" (val)) + +#define DEFINE_LONGS(sym, val) \ + asm volatile("\n->" #sym " %0 " #val : : "i" (val/sizeof(unsigned long))) + +void foo(void) +{ +#ifdef __i386__ + DEFINE_LONGS(HOST_FP_SIZE, sizeof(struct user_fpregs_struct)); + DEFINE_LONGS(HOST_FPX_SIZE, sizeof(struct user_fpxregs_struct)); + + DEFINE(HOST_IP, EIP); + DEFINE(HOST_SP, UESP); + DEFINE(HOST_EFLAGS, EFL); + DEFINE(HOST_EAX, EAX); + DEFINE(HOST_EBX, EBX); + DEFINE(HOST_ECX, ECX); + DEFINE(HOST_EDX, EDX); + DEFINE(HOST_ESI, ESI); + DEFINE(HOST_EDI, EDI); + DEFINE(HOST_EBP, EBP); + DEFINE(HOST_CS, CS); + DEFINE(HOST_SS, SS); + DEFINE(HOST_DS, DS); + DEFINE(HOST_FS, FS); + DEFINE(HOST_ES, ES); + DEFINE(HOST_GS, GS); +#else + DEFINE(HOST_FP_SIZE, sizeof(struct _fpstate) / sizeof(unsigned long)); + DEFINE_LONGS(HOST_RBX, RBX); + DEFINE_LONGS(HOST_RCX, RCX); + DEFINE_LONGS(HOST_RDI, RDI); + DEFINE_LONGS(HOST_RSI, RSI); + DEFINE_LONGS(HOST_RDX, RDX); + DEFINE_LONGS(HOST_RBP, RBP); + DEFINE_LONGS(HOST_RAX, RAX); + DEFINE_LONGS(HOST_R8, R8); + DEFINE_LONGS(HOST_R9, R9); + DEFINE_LONGS(HOST_R10, R10); + DEFINE_LONGS(HOST_R11, R11); + DEFINE_LONGS(HOST_R12, R12); + DEFINE_LONGS(HOST_R13, R13); + DEFINE_LONGS(HOST_R14, R14); + DEFINE_LONGS(HOST_R15, R15); + DEFINE_LONGS(HOST_ORIG_RAX, ORIG_RAX); + DEFINE_LONGS(HOST_CS, CS); + DEFINE_LONGS(HOST_SS, SS); + DEFINE_LONGS(HOST_EFLAGS, EFLAGS); +#if 0 + DEFINE_LONGS(HOST_FS, FS); + DEFINE_LONGS(HOST_GS, GS); + DEFINE_LONGS(HOST_DS, DS); + DEFINE_LONGS(HOST_ES, ES); +#endif + + DEFINE_LONGS(HOST_IP, RIP); + DEFINE_LONGS(HOST_SP, RSP); +#endif + + DEFINE(UM_FRAME_SIZE, sizeof(struct user_regs_struct)); + DEFINE(UM_POLLIN, POLLIN); + DEFINE(UM_POLLPRI, POLLPRI); + DEFINE(UM_POLLOUT, POLLOUT); + + DEFINE(UM_PROT_READ, PROT_READ); + DEFINE(UM_PROT_WRITE, PROT_WRITE); + DEFINE(UM_PROT_EXEC, PROT_EXEC); +} diff --git a/arch/x86/um/vdso/Makefile b/arch/x86/um/vdso/Makefile new file mode 100644 index 000000000000..5dffe6d46686 --- /dev/null +++ b/arch/x86/um/vdso/Makefile @@ -0,0 +1,90 @@ +# +# Building vDSO images for x86. +# + +VDSO64-y := y + +vdso-install-$(VDSO64-y) += vdso.so + + +# files to link into the vdso +vobjs-y := vdso-note.o um_vdso.o + +# files to link into kernel +obj-$(VDSO64-y) += vdso.o vma.o + +vobjs := $(foreach F,$(vobjs-y),$(obj)/$F) + +$(obj)/vdso.o: $(obj)/vdso.so + +targets += vdso.so vdso.so.dbg vdso.lds $(vobjs-y) + +export CPPFLAGS_vdso.lds += -P -C + +VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \ + -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 + +$(obj)/vdso.o: $(src)/vdso.S $(obj)/vdso.so + +$(obj)/vdso.so.dbg: $(src)/vdso.lds $(vobjs) FORCE + $(call if_changed,vdso) + +$(obj)/%.so: OBJCOPYFLAGS := -S +$(obj)/%.so: $(obj)/%.so.dbg FORCE + $(call if_changed,objcopy) + +# +# Don't omit frame pointers for ease of userspace debugging, but do +# optimize sibling calls. +# +CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \ + $(filter -g%,$(KBUILD_CFLAGS)) $(call cc-option, -fno-stack-protector) \ + -fno-omit-frame-pointer -foptimize-sibling-calls + +$(vobjs): KBUILD_CFLAGS += $(CFL) + +# +# vDSO code runs in userspace and -pg doesn't help with profiling anyway. +# +CFLAGS_REMOVE_vdso-note.o = -pg +CFLAGS_REMOVE_um_vdso.o = -pg + +targets += vdso-syms.lds +obj-$(VDSO64-y) += vdso-syms.lds + +# +# Match symbols in the DSO that look like VDSO*; produce a file of constants. +# +sed-vdsosym := -e 's/^00*/0/' \ + -e 's/^\([0-9a-fA-F]*\) . \(VDSO[a-zA-Z0-9_]*\)$$/\2 = 0x\1;/p' +quiet_cmd_vdsosym = VDSOSYM $@ +define cmd_vdsosym + $(NM) $< | LC_ALL=C sed -n $(sed-vdsosym) | LC_ALL=C sort > $@ +endef + +$(obj)/%-syms.lds: $(obj)/%.so.dbg FORCE + $(call if_changed,vdsosym) + +# +# The DSO images are built using a special linker script. +# +quiet_cmd_vdso = VDSO $@ + cmd_vdso = $(CC) -nostdlib -o $@ \ + $(VDSO_LDFLAGS) $(VDSO_LDFLAGS_$(filter %.lds,$(^F))) \ + -Wl,-T,$(filter %.lds,$^) $(filter %.o,$^) && \ + sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@' + +VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) +GCOV_PROFILE := n + +# +# Install the unstripped copy of vdso*.so listed in $(vdso-install-y). +# +quiet_cmd_vdso_install = INSTALL $@ + cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@ +$(vdso-install-y): %.so: $(obj)/%.so.dbg FORCE + @mkdir -p $(MODLIB)/vdso + $(call cmd,vdso_install) + +PHONY += vdso_install $(vdso-install-y) +vdso_install: $(vdso-install-y) diff --git a/arch/x86/um/vdso/checkundef.sh b/arch/x86/um/vdso/checkundef.sh new file mode 100644 index 000000000000..7ee90a9b549d --- /dev/null +++ b/arch/x86/um/vdso/checkundef.sh @@ -0,0 +1,10 @@ +#!/bin/sh +nm="$1" +file="$2" +$nm "$file" | grep '^ *U' > /dev/null 2>&1 +if [ $? -eq 1 ]; then + exit 0 +else + echo "$file: undefined symbols found" >&2 + exit 1 +fi diff --git a/arch/x86/um/vdso/um_vdso.c b/arch/x86/um/vdso/um_vdso.c new file mode 100644 index 000000000000..7c441b59d375 --- /dev/null +++ b/arch/x86/um/vdso/um_vdso.c @@ -0,0 +1,71 @@ +/* + * Copyright (C) 2011 Richard Weinberger <richrd@nod.at> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This vDSO turns all calls into a syscall so that UML can trap them. + */ + + +/* Disable profiling for userspace code */ +#define DISABLE_BRANCH_PROFILING + +#include <linux/time.h> +#include <linux/getcpu.h> +#include <asm/unistd.h> + +int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) +{ + long ret; + + asm("syscall" : "=a" (ret) : + "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : "memory"); + + return ret; +} +int clock_gettime(clockid_t, struct timespec *) + __attribute__((weak, alias("__vdso_clock_gettime"))); + +int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) +{ + long ret; + + asm("syscall" : "=a" (ret) : + "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory"); + + return ret; +} +int gettimeofday(struct timeval *, struct timezone *) + __attribute__((weak, alias("__vdso_gettimeofday"))); + +time_t __vdso_time(time_t *t) +{ + long secs; + + asm volatile("syscall" + : "=a" (secs) + : "0" (__NR_time), "D" (t) : "cc", "r11", "cx", "memory"); + + return secs; +} +int time(time_t *t) __attribute__((weak, alias("__vdso_time"))); + +long +__vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused) +{ + /* + * UML does not support SMP, we can cheat here. :) + */ + + if (cpu) + *cpu = 0; + if (node) + *node = 0; + + return 0; +} + +long getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache) + __attribute__((weak, alias("__vdso_getcpu"))); diff --git a/arch/x86/um/vdso/vdso-layout.lds.S b/arch/x86/um/vdso/vdso-layout.lds.S new file mode 100644 index 000000000000..634a2cf62046 --- /dev/null +++ b/arch/x86/um/vdso/vdso-layout.lds.S @@ -0,0 +1,64 @@ +/* + * Linker script for vDSO. This is an ELF shared object prelinked to + * its virtual address, and with only one read-only segment. + * This script controls its layout. + */ + +SECTIONS +{ + . = VDSO_PRELINK + SIZEOF_HEADERS; + + .hash : { *(.hash) } :text + .gnu.hash : { *(.gnu.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + + .note : { *(.note.*) } :text :note + + .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr + .eh_frame : { KEEP (*(.eh_frame)) } :text + + .dynamic : { *(.dynamic) } :text :dynamic + + .rodata : { *(.rodata*) } :text + .data : { + *(.data*) + *(.sdata*) + *(.got.plt) *(.got) + *(.gnu.linkonce.d.*) + *(.bss*) + *(.dynbss*) + *(.gnu.linkonce.b.*) + } + + .altinstructions : { *(.altinstructions) } + .altinstr_replacement : { *(.altinstr_replacement) } + + /* + * Align the actual code well away from the non-instruction data. + * This is the best thing for the I-cache. + */ + . = ALIGN(0x100); + + .text : { *(.text*) } :text =0x90909090 +} + +/* + * Very old versions of ld do not recognize this name token; use the constant. + */ +#define PT_GNU_EH_FRAME 0x6474e550 + +/* + * We must supply the ELF program headers explicitly to get just one + * PT_LOAD segment, and set the flags explicitly to make segments read-only. + */ +PHDRS +{ + text PT_LOAD FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */ + dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ + note PT_NOTE FLAGS(4); /* PF_R */ + eh_frame_hdr PT_GNU_EH_FRAME; +} diff --git a/arch/x86/um/vdso/vdso-note.S b/arch/x86/um/vdso/vdso-note.S new file mode 100644 index 000000000000..79a071e4357e --- /dev/null +++ b/arch/x86/um/vdso/vdso-note.S @@ -0,0 +1,12 @@ +/* + * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text. + * Here we can supply some information useful to userland. + */ + +#include <linux/uts.h> +#include <linux/version.h> +#include <linux/elfnote.h> + +ELFNOTE_START(Linux, 0, "a") + .long LINUX_VERSION_CODE +ELFNOTE_END diff --git a/arch/x86/um/vdso/vdso.S b/arch/x86/um/vdso/vdso.S new file mode 100644 index 000000000000..1cb468adacbb --- /dev/null +++ b/arch/x86/um/vdso/vdso.S @@ -0,0 +1,10 @@ +#include <linux/init.h> + +__INITDATA + + .globl vdso_start, vdso_end +vdso_start: + .incbin "arch/x86/um/vdso/vdso.so" +vdso_end: + +__FINIT diff --git a/arch/x86/um/vdso/vdso.lds.S b/arch/x86/um/vdso/vdso.lds.S new file mode 100644 index 000000000000..b96b2677cad8 --- /dev/null +++ b/arch/x86/um/vdso/vdso.lds.S @@ -0,0 +1,32 @@ +/* + * Linker script for 64-bit vDSO. + * We #include the file to define the layout details. + * Here we only choose the prelinked virtual address. + * + * This file defines the version script giving the user-exported symbols in + * the DSO. We can define local symbols here called VDSO* to make their + * values visible using the asm-x86/vdso.h macros from the kernel proper. + */ + +#define VDSO_PRELINK 0xffffffffff700000 +#include "vdso-layout.lds.S" + +/* + * This controls what userland symbols we export from the vDSO. + */ +VERSION { + LINUX_2.6 { + global: + clock_gettime; + __vdso_clock_gettime; + gettimeofday; + __vdso_gettimeofday; + getcpu; + __vdso_getcpu; + time; + __vdso_time; + local: *; + }; +} + +VDSO64_PRELINK = VDSO_PRELINK; diff --git a/arch/x86/um/vdso/vma.c b/arch/x86/um/vdso/vma.c new file mode 100644 index 000000000000..9495c8d0ce37 --- /dev/null +++ b/arch/x86/um/vdso/vma.c @@ -0,0 +1,74 @@ +/* + * Copyright (C) 2011 Richard Weinberger <richrd@nod.at> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/slab.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <asm/page.h> +#include <linux/init.h> + +unsigned int __read_mostly vdso_enabled = 1; +unsigned long um_vdso_addr; + +extern unsigned long task_size; +extern char vdso_start[], vdso_end[]; + +static struct page **vdsop; + +static int __init init_vdso(void) +{ + struct page *um_vdso; + + BUG_ON(vdso_end - vdso_start > PAGE_SIZE); + + um_vdso_addr = task_size - PAGE_SIZE; + + vdsop = kmalloc(GFP_KERNEL, sizeof(struct page *)); + if (!vdsop) + goto oom; + + um_vdso = alloc_page(GFP_KERNEL); + if (!um_vdso) { + kfree(vdsop); + + goto oom; + } + + copy_page(page_address(um_vdso), vdso_start); + *vdsop = um_vdso; + + return 0; + +oom: + printk(KERN_ERR "Cannot allocate vdso\n"); + vdso_enabled = 0; + + return -ENOMEM; +} +subsys_initcall(init_vdso); + +int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) +{ + int err; + struct mm_struct *mm = current->mm; + + if (!vdso_enabled) + return 0; + + down_write(&mm->mmap_sem); + + err = install_special_mapping(mm, um_vdso_addr, PAGE_SIZE, + VM_READ|VM_EXEC| + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| + VM_ALWAYSDUMP, + vdsop); + + up_write(&mm->mmap_sem); + + return err; +} |