diff options
Diffstat (limited to 'arch')
209 files changed, 7437 insertions, 2880 deletions
diff --git a/arch/blackfin/mach-bf527/boards/ezkit.c b/arch/blackfin/mach-bf527/boards/ezkit.c index 5958eecefcf1..689b69c98ee4 100644 --- a/arch/blackfin/mach-bf527/boards/ezkit.c +++ b/arch/blackfin/mach-bf527/boards/ezkit.c @@ -323,10 +323,15 @@ static struct platform_device smc91x_device = { static struct resource dm9000_resources[] = { [0] = { .start = 0x203FB800, - .end = 0x203FB800 + 8, + .end = 0x203FB800 + 1, .flags = IORESOURCE_MEM, }, [1] = { + .start = 0x203FB800 + 4, + .end = 0x203FB800 + 5, + .flags = IORESOURCE_MEM, + }, + [2] = { .start = IRQ_PF9, .end = IRQ_PF9, .flags = (IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHEDGE), diff --git a/arch/blackfin/mach-bf533/boards/H8606.c b/arch/blackfin/mach-bf533/boards/H8606.c index 7cc4864f6aaf..4103a97c1a70 100644 --- a/arch/blackfin/mach-bf533/boards/H8606.c +++ b/arch/blackfin/mach-bf533/boards/H8606.c @@ -65,10 +65,15 @@ static struct platform_device rtc_device = { static struct resource dm9000_resources[] = { [0] = { .start = 0x20300000, - .end = 0x20300000 + 8, + .end = 0x20300000 + 1, .flags = IORESOURCE_MEM, }, [1] = { + .start = 0x20300000 + 4, + .end = 0x20300000 + 5, + .flags = IORESOURCE_MEM, + }, + [2] = { .start = IRQ_PF10, .end = IRQ_PF10, .flags = (IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHEDGE), diff --git a/arch/blackfin/mach-bf537/boards/generic_board.c b/arch/blackfin/mach-bf537/boards/generic_board.c index 7d250828dad8..01b63e2ec18f 100644 --- a/arch/blackfin/mach-bf537/boards/generic_board.c +++ b/arch/blackfin/mach-bf537/boards/generic_board.c @@ -166,10 +166,15 @@ static struct platform_device smc91x_device = { static struct resource dm9000_resources[] = { [0] = { .start = 0x203FB800, - .end = 0x203FB800 + 8, + .end = 0x203FB800 + 1, .flags = IORESOURCE_MEM, }, [1] = { + .start = 0x203FB800 + 4, + .end = 0x203FB800 + 5, + .flags = IORESOURCE_MEM, + }, + [2] = { .start = IRQ_PF9, .end = IRQ_PF9, .flags = (IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHEDGE), diff --git a/arch/cris/arch-v10/boot/Makefile b/arch/cris/arch-v10/boot/Makefile index 20c83a53caf3..217203014433 100644 --- a/arch/cris/arch-v10/boot/Makefile +++ b/arch/cris/arch-v10/boot/Makefile @@ -2,7 +2,6 @@ # arch/cris/arch-v10/boot/Makefile # -OBJCOPY = objcopy-cris OBJCOPYFLAGS = -O binary --remove-section=.bss subdir- := compressed rescue diff --git a/arch/cris/arch-v10/boot/compressed/Makefile b/arch/cris/arch-v10/boot/compressed/Makefile index 4a031cb27eb9..08d943ce4be7 100644 --- a/arch/cris/arch-v10/boot/compressed/Makefile +++ b/arch/cris/arch-v10/boot/compressed/Makefile @@ -2,12 +2,10 @@ # arch/cris/arch-v10/boot/compressed/Makefile # -CC = gcc-cris -melf $(LINUXINCLUDE) -ccflags-y += -O2 -LD = ld-cris -ldflags-y += -T $(obj)/decompress.ld +asflags-y += $(LINUXINCLUDE) +ccflags-y += -O2 $(LINUXINCLUDE) +ldflags-y += -T $(srctree)/$(obj)/decompress.ld OBJECTS = $(obj)/head.o $(obj)/misc.o -OBJCOPY = objcopy-cris OBJCOPYFLAGS = -O binary --remove-section=.bss quiet_cmd_image = BUILD $@ @@ -21,12 +19,6 @@ $(obj)/decompress.o: $(OBJECTS) FORCE $(obj)/decompress.bin: $(obj)/decompress.o FORCE $(call if_changed,objcopy) -$(obj)/head.o: $(obj)/head.S .config - @$(CC) -D__ASSEMBLY__ -traditional -c $< -o $@ - -$(obj)/misc.o: $(obj)/misc.c .config - @$(CC) -D__KERNEL__ -c $< -o $@ - $(obj)/vmlinux: $(obj)/piggy.gz $(obj)/decompress.bin FORCE $(call if_changed,image) diff --git a/arch/cris/arch-v10/boot/compressed/decompress.ld b/arch/cris/arch-v10/boot/compressed/decompress.ld index 0b0a14fe6177..e80f4594d543 100644 --- a/arch/cris/arch-v10/boot/compressed/decompress.ld +++ b/arch/cris/arch-v10/boot/compressed/decompress.ld @@ -1,4 +1,5 @@ -OUTPUT_FORMAT(elf32-us-cris) +/* OUTPUT_FORMAT(elf32-us-cris) */ +OUTPUT_FORMAT(elf32-cris) MEMORY { diff --git a/arch/cris/arch-v10/boot/compressed/head.S b/arch/cris/arch-v10/boot/compressed/head.S index 610bdb237553..981fbae84959 100644 --- a/arch/cris/arch-v10/boot/compressed/head.S +++ b/arch/cris/arch-v10/boot/compressed/head.S @@ -15,77 +15,77 @@ #define COMMAND_LINE_MAGIC 0x87109563 ;; Exported symbols - - .globl _input_data - + .globl input_data + + .text nop di ;; We need to initialze DRAM registers before we start using the DRAM - - cmp.d RAM_INIT_MAGIC, r8 ; Already initialized? + + cmp.d RAM_INIT_MAGIC, $r8 ; Already initialized? beq dram_init_finished nop - + #include "../../lib/dram_init.S" - -dram_init_finished: - + +dram_init_finished: + ;; Initiate the PA and PB ports - move.b CONFIG_ETRAX_DEF_R_PORT_PA_DATA, r0 - move.b r0, [R_PORT_PA_DATA] + move.b CONFIG_ETRAX_DEF_R_PORT_PA_DATA, $r0 + move.b $r0, [R_PORT_PA_DATA] - move.b CONFIG_ETRAX_DEF_R_PORT_PA_DIR, r0 - move.b r0, [R_PORT_PA_DIR] + move.b CONFIG_ETRAX_DEF_R_PORT_PA_DIR, $r0 + move.b $r0, [R_PORT_PA_DIR] - move.b CONFIG_ETRAX_DEF_R_PORT_PB_DATA, r0 - move.b r0, [R_PORT_PB_DATA] + move.b CONFIG_ETRAX_DEF_R_PORT_PB_DATA, $r0 + move.b $r0, [R_PORT_PB_DATA] - move.b CONFIG_ETRAX_DEF_R_PORT_PB_DIR, r0 - move.b r0, [R_PORT_PB_DIR] + move.b CONFIG_ETRAX_DEF_R_PORT_PB_DIR, $r0 + move.b $r0, [R_PORT_PB_DIR] ;; Setup the stack to a suitably high address. ;; We assume 8 MB is the minimum DRAM in an eLinux ;; product and put the sp at the top for now. - move.d 0x40800000, sp + move.d 0x40800000, $sp ;; Figure out where the compressed piggyback image is ;; in the flash (since we wont try to copy it to DRAM ;; before unpacking). It is at _edata, but in flash. ;; Use (_edata - basse) as offset to the current PC. - -basse: move.d pc, r5 - and.d 0x7fffffff, r5 ; strip any non-cache bit - subq 2, r5 ; compensate for the move.d pc instr - move.d r5, r0 ; save for later - flash address of 'basse' - add.d _edata, r5 - sub.d basse, r5 ; r5 = flash address of '_edata' - + +basse: move.d $pc, $r5 + and.d 0x7fffffff, $r5 ; strip any non-cache bit + subq 2, $r5 ; compensate for the move.d $pc instr + move.d $r5, $r0 ; save for later - flash address of 'basse' + add.d _edata, $r5 + sub.d basse, $r5 ; $r5 = flash address of '_edata' + ;; Copy text+data to DRAM - - move.d basse, r1 ; destination - move.d _edata, r2 ; end destination -1: move.w [r0+], r3 - move.w r3, [r1+] - cmp.d r2, r1 + + move.d basse, $r1 ; destination + move.d _edata, $r2 ; end destination +1: move.w [$r0+], $r3 + move.w $r3, [$r1+] + cmp.d $r2, $r1 bcs 1b nop - move.d r5, [_input_data] ; for the decompressor + move.d $r5, [input_data] ; for the decompressor ;; Clear the decompressors BSS (between _edata and _end) - - moveq 0, r0 - move.d _edata, r1 - move.d _end, r2 -1: move.w r0, [r1+] - cmp.d r2, r1 + + moveq 0, $r0 + move.d _edata, $r1 + move.d _end, $r2 +1: move.w $r0, [$r1+] + cmp.d $r2, $r1 bcs 1b nop @@ -94,16 +94,16 @@ basse: move.d pc, r5 move.d $r10, [$r12] move.d _cmd_line_addr, $r12 move.d $r11, [$r12] - - ;; Do the decompression and save compressed size in _inptr - jsr _decompress_kernel - - ;; Put start address of root partition in r9 so the kernel can use it + ;; Do the decompression and save compressed size in inptr + + jsr decompress_kernel + + ;; Put start address of root partition in $r9 so the kernel can use it ;; when mounting from flash - move.d [_input_data], r9 ; flash address of compressed kernel - add.d [_inptr], r9 ; size of compressed kernel + move.d [input_data], $r9 ; flash address of compressed kernel + add.d [inptr], $r9 ; size of compressed kernel ;; Restore command line magic and address. move.d _cmd_line_magic, $r10 @@ -112,12 +112,12 @@ basse: move.d pc, r5 move.d [$r11], $r11 ;; Enter the decompressed kernel - move.d RAM_INIT_MAGIC, r8 ; Tell kernel that DRAM is initialized + move.d RAM_INIT_MAGIC, $r8 ; Tell kernel that DRAM is initialized jump 0x40004000 ; kernel is linked to this address - + .data -_input_data: +input_data: .dword 0 ; used by the decompressor _cmd_line_magic: .dword 0 diff --git a/arch/cris/arch-v10/boot/compressed/misc.c b/arch/cris/arch-v10/boot/compressed/misc.c index 9a43ab19391e..18e13bce1400 100644 --- a/arch/cris/arch-v10/boot/compressed/misc.c +++ b/arch/cris/arch-v10/boot/compressed/misc.c @@ -29,12 +29,10 @@ #define OF(args) args #define STATIC static -void* memset(void* s, int c, size_t n); -void* memcpy(void* __dest, __const void* __src, - size_t __n); - -#define memzero(s, n) memset ((s), 0, (n)) +void *memset(void *s, int c, size_t n); +void *memcpy(void *__dest, __const void *__src, size_t __n); +#define memzero(s, n) memset((s), 0, (n)) typedef unsigned char uch; typedef unsigned short ush; @@ -62,57 +60,69 @@ static unsigned outcnt = 0; /* bytes in output buffer */ #define ENCRYPTED 0x20 /* bit 5 set: file is encrypted */ #define RESERVED 0xC0 /* bit 6,7: reserved */ -#define get_byte() inbuf[inptr++] - +#define get_byte() (inbuf[inptr++]) + /* Diagnostic functions */ #ifdef DEBUG -# define Assert(cond,msg) {if(!(cond)) error(msg);} +# define Assert(cond, msg) do { \ + if (!(cond)) \ + error(msg); \ + } while (0) # define Trace(x) fprintf x -# define Tracev(x) {if (verbose) fprintf x ;} -# define Tracevv(x) {if (verbose>1) fprintf x ;} -# define Tracec(c,x) {if (verbose && (c)) fprintf x ;} -# define Tracecv(c,x) {if (verbose>1 && (c)) fprintf x ;} +# define Tracev(x) do { \ + if (verbose) \ + fprintf x; \ + } while (0) +# define Tracevv(x) do { \ + if (verbose > 1) \ + fprintf x; \ + } while (0) +# define Tracec(c, x) do { \ + if (verbose && (c)) \ + fprintf x; \ + } while (0) +# define Tracecv(c, x) do { \ + if (verbose > 1 && (c)) \ + fprintf x; \ + } while (0) #else -# define Assert(cond,msg) +# define Assert(cond, msg) # define Trace(x) # define Tracev(x) # define Tracevv(x) -# define Tracec(c,x) -# define Tracecv(c,x) +# define Tracec(c, x) +# define Tracecv(c, x) #endif -static int fill_inbuf(void); static void flush_window(void); static void error(char *m); -static void gzip_mark(void **); -static void gzip_release(void **); extern char *input_data; /* lives in head.S */ static long bytes_out = 0; static uch *output_data; static unsigned long output_ptr = 0; - + static void *malloc(int size); static void free(void *where); -static void error(char *m); static void gzip_mark(void **); static void gzip_release(void **); - + static void puts(const char *); /* the "heap" is put directly after the BSS ends, at end */ - -extern int end; -static long free_mem_ptr = (long)&end; - + +extern int _end; +static long free_mem_ptr = (long)&_end; + #include "../../../../../lib/inflate.c" static void *malloc(int size) { void *p; - if (size <0) error("Malloc error"); + if (size < 0) + error("Malloc error"); free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */ @@ -142,44 +152,47 @@ static void puts(const char *s) { #ifndef CONFIG_ETRAX_DEBUG_PORT_NULL - while(*s) { + while (*s) { #ifdef CONFIG_ETRAX_DEBUG_PORT0 - while(!(*R_SERIAL0_STATUS & (1 << 5))) ; + while (!(*R_SERIAL0_STATUS & (1 << 5))) ; *R_SERIAL0_TR_DATA = *s++; #endif #ifdef CONFIG_ETRAX_DEBUG_PORT1 - while(!(*R_SERIAL1_STATUS & (1 << 5))) ; + while (!(*R_SERIAL1_STATUS & (1 << 5))) ; *R_SERIAL1_TR_DATA = *s++; #endif #ifdef CONFIG_ETRAX_DEBUG_PORT2 - while(!(*R_SERIAL2_STATUS & (1 << 5))) ; + while (!(*R_SERIAL2_STATUS & (1 << 5))) ; *R_SERIAL2_TR_DATA = *s++; #endif #ifdef CONFIG_ETRAX_DEBUG_PORT3 - while(!(*R_SERIAL3_STATUS & (1 << 5))) ; + while (!(*R_SERIAL3_STATUS & (1 << 5))) ; *R_SERIAL3_TR_DATA = *s++; #endif } #endif } -void* -memset(void* s, int c, size_t n) +void *memset(void *s, int c, size_t n) { int i; - char *ss = (char*)s; + char *ss = (char *)s; + + for (i = 0; i < n; i++) + ss[i] = c; - for (i=0;i<n;i++) ss[i] = c; + return s; } -void* -memcpy(void* __dest, __const void* __src, - size_t __n) +void *memcpy(void *__dest, __const void *__src, size_t __n) { int i; char *d = (char *)__dest, *s = (char *)__src; - for (i=0;i<__n;i++) d[i] = s[i]; + for (i = 0; i < __n; i++) + d[i] = s[i]; + + return __dest; } /* =========================================================================== @@ -187,46 +200,44 @@ memcpy(void* __dest, __const void* __src, * (Used for the decompressed data only.) */ -static void -flush_window() +static void flush_window(void) { - ulg c = crc; /* temporary variable */ - unsigned n; - uch *in, *out, ch; - - in = window; - out = &output_data[output_ptr]; - for (n = 0; n < outcnt; n++) { - ch = *out++ = *in++; - c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8); - } - crc = c; - bytes_out += (ulg)outcnt; - output_ptr += (ulg)outcnt; - outcnt = 0; + ulg c = crc; /* temporary variable */ + unsigned n; + uch *in, *out, ch; + + in = window; + out = &output_data[output_ptr]; + for (n = 0; n < outcnt; n++) { + ch = *out = *in; + out++; + in++; + c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8); + } + crc = c; + bytes_out += (ulg)outcnt; + output_ptr += (ulg)outcnt; + outcnt = 0; } -static void -error(char *x) +static void error(char *x) { puts("\n\n"); puts(x); puts("\n\n -- System halted\n"); - while(1); /* Halt */ + while (1); /* Halt */ } -void -setup_normal_output_buffer() +void setup_normal_output_buffer(void) { output_data = (char *)KERNEL_LOAD_ADR; } -void -decompress_kernel() +void decompress_kernel(void) { char revision; - + /* input_data is set in head.S */ inbuf = input_data; @@ -257,11 +268,10 @@ decompress_kernel() makecrc(); - __asm__ volatile ("move vr,%0" : "=rm" (revision)); - if (revision < 10) - { + __asm__ volatile ("move $vr,%0" : "=rm" (revision)); + if (revision < 10) { puts("You need an ETRAX 100LX to run linux 2.6\n"); - while(1); + while (1); } puts("Uncompressing Linux...\n"); diff --git a/arch/cris/arch-v10/boot/rescue/Makefile b/arch/cris/arch-v10/boot/rescue/Makefile index 2e5045b9e19c..07688da92708 100644 --- a/arch/cris/arch-v10/boot/rescue/Makefile +++ b/arch/cris/arch-v10/boot/rescue/Makefile @@ -2,12 +2,9 @@ # Makefile for rescue (bootstrap) code # -CC = gcc-cris -mlinux $(LINUXINCLUDE) -ccflags-y += -O2 -asflags-y += -traditional -LD = gcc-cris -mlinux -nostdlib -ldflags-y += -T $(obj)/rescue.ld -OBJCOPY = objcopy-cris +ccflags-y += -O2 $(LINUXINCLUDE) +asflags-y += $(LINUXINCLUDE) +ldflags-y += -T $(srctree)/$(obj)/rescue.ld OBJCOPYFLAGS = -O binary --remove-section=.bss obj-$(CONFIG_ETRAX_AXISFLASHMAP) = head.o OBJECT := $(obj)/head.o diff --git a/arch/cris/arch-v10/drivers/pcf8563.c b/arch/cris/arch-v10/drivers/pcf8563.c index 52103d16dc6c..8769dc914073 100644 --- a/arch/cris/arch-v10/drivers/pcf8563.c +++ b/arch/cris/arch-v10/drivers/pcf8563.c @@ -233,7 +233,7 @@ int pcf8563_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, if (copy_to_user((struct rtc_time *) arg, &tm, sizeof tm)) { - spin_unlock(&rtc_lock); + mutex_unlock(&rtc_lock); return -EFAULT; } diff --git a/arch/cris/arch-v10/kernel/debugport.c b/arch/cris/arch-v10/kernel/debugport.c index 04d5eee2c90c..3dc6e91ba39e 100644 --- a/arch/cris/arch-v10/kernel/debugport.c +++ b/arch/cris/arch-v10/kernel/debugport.c @@ -426,12 +426,18 @@ static int dummy_write(struct tty_struct * tty, return count; } -static int -dummy_write_room(struct tty_struct *tty) +static int dummy_write_room(struct tty_struct *tty) { return 8192; } +static const struct tty_operations dummy_ops = { + .open = dummy_open, + .close = dummy_close, + .write = dummy_write, + .write_room = dummy_write_room, +}; + void __init init_dummy_console(void) { @@ -444,14 +450,14 @@ init_dummy_console(void) dummy_driver.type = TTY_DRIVER_TYPE_SERIAL; dummy_driver.subtype = SERIAL_TYPE_NORMAL; dummy_driver.init_termios = tty_std_termios; + /* Normally B9600 default... */ dummy_driver.init_termios.c_cflag = - B115200 | CS8 | CREAD | HUPCL | CLOCAL; /* is normally B9600 default... */ + B115200 | CS8 | CREAD | HUPCL | CLOCAL; dummy_driver.flags = TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_DEV; + dummy_driver.init_termios.c_ispeed = 115200; + dummy_driver.init_termios.c_ospeed = 115200; - dummy_driver.open = dummy_open; - dummy_driver.close = dummy_close; - dummy_driver.write = dummy_write; - dummy_driver.write_room = dummy_write_room; + dummy_driver.ops = &dummy_ops; if (tty_register_driver(&dummy_driver)) panic("Couldn't register dummy serial driver\n"); } diff --git a/arch/cris/arch-v32/boot/Makefile b/arch/cris/arch-v32/boot/Makefile index 3f91349c5f12..99896ad60b30 100644 --- a/arch/cris/arch-v32/boot/Makefile +++ b/arch/cris/arch-v32/boot/Makefile @@ -2,7 +2,6 @@ # arch/cris/arch-v32/boot/Makefile # -OBJCOPY = objcopy-cris OBJCOPYFLAGS = -O binary -R .note -R .comment subdir- := compressed rescue diff --git a/arch/cris/arch-v32/boot/compressed/Makefile b/arch/cris/arch-v32/boot/compressed/Makefile index 2c8c2c3039c5..d6335f26083b 100644 --- a/arch/cris/arch-v32/boot/compressed/Makefile +++ b/arch/cris/arch-v32/boot/compressed/Makefile @@ -2,14 +2,10 @@ # arch/cris/arch-v32/boot/compressed/Makefile # -CC = gcc-cris -mlinux -march=v32 $(LINUXINCLUDE) asflags-y += -I $(srctree)/include/asm/mach/ -I $(srctree)/include/asm/arch ccflags-y += -O2 -I $(srctree)/include/asm/mach/ -I $(srctree)/include/asm/arch -LD = gcc-cris -mlinux -march=v32 -nostdlib -ldflags-y += -T $(obj)/decompress.ld -obj-y = head.o misc.o +ldflags-y += -T $(srctree)/$(obj)/decompress.ld OBJECTS = $(obj)/head.o $(obj)/misc.o -OBJCOPY = objcopy-cris OBJCOPYFLAGS = -O binary --remove-section=.bss quiet_cmd_image = BUILD $@ diff --git a/arch/cris/arch-v32/boot/rescue/Makefile b/arch/cris/arch-v32/boot/rescue/Makefile index c0987795dcb7..44ae0ad61f90 100644 --- a/arch/cris/arch-v32/boot/rescue/Makefile +++ b/arch/cris/arch-v32/boot/rescue/Makefile @@ -7,9 +7,8 @@ ccflags-y += -O2 -I $(srctree)/include/asm/arch/mach/ \ -I $(srctree)/include/asm/arch asflags-y += -I $(srctree)/include/asm/arch/mach/ -I $(srctree)/include/asm/arch LD = gcc-cris -mlinux -march=v32 -nostdlib -ldflags-y += -T $(obj)/rescue.ld +ldflags-y += -T $(srctree)/$(obj)/rescue.ld LDPOSTFLAGS = -lgcc -OBJCOPY = objcopy-cris OBJCOPYFLAGS = -O binary --remove-section=.bss obj-$(CONFIG_ETRAX_AXISFLASHMAP) = head.o OBJECT := $(obj)/head.o diff --git a/arch/cris/arch-v32/drivers/pcf8563.c b/arch/cris/arch-v32/drivers/pcf8563.c index 53db3870ba04..f263ab571221 100644 --- a/arch/cris/arch-v32/drivers/pcf8563.c +++ b/arch/cris/arch-v32/drivers/pcf8563.c @@ -229,7 +229,7 @@ int pcf8563_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, if (copy_to_user((struct rtc_time *) arg, &tm, sizeof tm)) { - spin_unlock(&rtc_lock); + mutex_unlock(&rtc_lock); return -EFAULT; } diff --git a/arch/ia64/hp/sim/simserial.c b/arch/ia64/hp/sim/simserial.c index 23cafc80d2a4..24b1ad5334cb 100644 --- a/arch/ia64/hp/sim/simserial.c +++ b/arch/ia64/hp/sim/simserial.c @@ -193,18 +193,6 @@ static irqreturn_t rs_interrupt_single(int irq, void *dev_id) * ------------------------------------------------------------------- */ -#if 0 -/* - * not really used in our situation so keep them commented out for now - */ -static DECLARE_TASK_QUEUE(tq_serial); /* used to be at the top of the file */ -static void do_serial_bh(void) -{ - run_task_queue(&tq_serial); - printk(KERN_ERR "do_serial_bh: called\n"); -} -#endif - static void do_softint(struct work_struct *private_) { printk(KERN_ERR "simserial: do_softint called\n"); @@ -351,11 +339,7 @@ static void rs_flush_buffer(struct tty_struct *tty) info->xmit.head = info->xmit.tail = 0; local_irq_restore(flags); - wake_up_interruptible(&tty->write_wait); - - if ((tty->flags & (1 << TTY_DO_WRITE_WAKEUP)) && - tty->ldisc.write_wakeup) - (tty->ldisc.write_wakeup)(tty); + tty_wakeup(tty); } /* @@ -404,12 +388,6 @@ static void rs_unthrottle(struct tty_struct * tty) printk(KERN_INFO "simrs_unthrottle called\n"); } -/* - * rs_break() --- routine which turns the break handling on or off - */ -static void rs_break(struct tty_struct *tty, int break_state) -{ -} static int rs_ioctl(struct tty_struct *tty, struct file * file, unsigned int cmd, unsigned long arg) @@ -422,14 +400,6 @@ static int rs_ioctl(struct tty_struct *tty, struct file * file, } switch (cmd) { - case TIOCMGET: - printk(KERN_INFO "rs_ioctl: TIOCMGET called\n"); - return -EINVAL; - case TIOCMBIS: - case TIOCMBIC: - case TIOCMSET: - printk(KERN_INFO "rs_ioctl: TIOCMBIS/BIC/SET called\n"); - return -EINVAL; case TIOCGSERIAL: printk(KERN_INFO "simrs_ioctl TIOCGSERIAL called\n"); return 0; @@ -488,14 +458,6 @@ static int rs_ioctl(struct tty_struct *tty, struct file * file, static void rs_set_termios(struct tty_struct *tty, struct ktermios *old_termios) { - unsigned int cflag = tty->termios->c_cflag; - - if ( (cflag == old_termios->c_cflag) - && ( RELEVANT_IFLAG(tty->termios->c_iflag) - == RELEVANT_IFLAG(old_termios->c_iflag))) - return; - - /* Handle turning off CRTSCTS */ if ((old_termios->c_cflag & CRTSCTS) && !(tty->termios->c_cflag & CRTSCTS)) { @@ -623,9 +585,8 @@ static void rs_close(struct tty_struct *tty, struct file * filp) * the line discipline to only process XON/XOFF characters. */ shutdown(info); - if (tty->ops->flush_buffer) - tty->ops->flush_buffer(tty); - if (tty->ldisc.flush_buffer) tty->ldisc.flush_buffer(tty); + rs_flush_buffer(tty); + tty_ldisc_flush(tty); info->event = 0; info->tty = NULL; if (info->blocked_open) { @@ -955,7 +916,6 @@ static const struct tty_operations hp_ops = { .stop = rs_stop, .start = rs_start, .hangup = rs_hangup, - .break_ctl = rs_break, .wait_until_sent = rs_wait_until_sent, .read_proc = rs_read_proc, }; diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile index 112791dd2542..bf22fb9e6dcf 100644 --- a/arch/ia64/kvm/Makefile +++ b/arch/ia64/kvm/Makefile @@ -43,7 +43,8 @@ $(obj)/$(offsets-file): arch/ia64/kvm/asm-offsets.s EXTRA_CFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/ EXTRA_AFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/ -common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o) +common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ + coalesced_mmio.o) kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o obj-$(CONFIG_KVM) += kvm.o diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 68c978be9a51..2672f4d278ac 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -187,6 +187,9 @@ int kvm_dev_ioctl_check_extension(long ext) r = 1; break; + case KVM_CAP_COALESCED_MMIO: + r = KVM_COALESCED_MMIO_PAGE_OFFSET; + break; default: r = 0; } @@ -195,11 +198,11 @@ int kvm_dev_ioctl_check_extension(long ext) } static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu, - gpa_t addr) + gpa_t addr, int len, int is_write) { struct kvm_io_device *dev; - dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr); + dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr, len, is_write); return dev; } @@ -231,7 +234,7 @@ static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) kvm_run->exit_reason = KVM_EXIT_MMIO; return 0; mmio: - mmio_dev = vcpu_find_mmio_dev(vcpu, p->addr); + mmio_dev = vcpu_find_mmio_dev(vcpu, p->addr, p->size, !p->dir); if (mmio_dev) { if (!p->dir) kvm_iodevice_write(mmio_dev, p->addr, p->size, @@ -1035,14 +1038,6 @@ static void kvm_free_vmm_area(void) } } -/* - * Make sure that a cpu that is being hot-unplugged does not have any vcpus - * cached on it. Leave it as blank for IA64. - */ -void decache_vcpus_on_cpu(int cpu) -{ -} - static void vti_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { } @@ -1460,6 +1455,9 @@ int kvm_arch_set_memory_region(struct kvm *kvm, return 0; } +void kvm_arch_flush_shadow(struct kvm *kvm) +{ +} long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index 55ea52fe6aca..8c5e1de68fcb 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -490,28 +490,6 @@ config ATARI_MFPSER Note for Falcon users: You also have an MFP port, it's just not wired to the outside... But you could use the port under Linux. -config ATARI_SCC - tristate "Atari SCC serial support" - depends on ATARI - ---help--- - If you have serial ports based on a Zilog SCC chip (Modem2, Serial2, - LAN) and like to use them under Linux, say Y. All built-in SCC's are - supported (TT, MegaSTE, Falcon), and also the ST-ESCC. If you have - two connectors for channel A (Serial2 and LAN), they are visible as - two separate devices. - - To compile this driver as a module, choose M here. - -config ATARI_SCC_DMA - bool "Atari SCC serial DMA support" - depends on ATARI_SCC - help - This enables DMA support for receiving data on channel A of the SCC. - If you have a TT you may say Y here and read - drivers/char/atari_SCC.README. All other users should say N here, - because only the TT has SCC-DMA, even if your machine keeps claiming - so at boot time. - config ATARI_MIDI tristate "Atari MIDI serial support" depends on ATARI @@ -578,18 +556,6 @@ config MAC_HID depends on INPUT_ADBHID default y -config ADB_KEYBOARD - bool "Support for ADB keyboard (old driver)" - depends on MAC && !INPUT_ADBHID - help - This option allows you to use an ADB keyboard attached to your - machine. Note that this disables any other (ie. PS/2) keyboard - support, even if your machine is physically capable of using both at - the same time. - - If you use an ADB keyboard (4 pin connector), say Y here. - If you use a PS/2 keyboard (6 pin connector), say N here. - config HPDCA tristate "HP DCA serial support" depends on DIO && SERIAL_8250 @@ -640,7 +606,7 @@ config DN_SERIAL config SERIAL_CONSOLE bool "Support for serial port console" - depends on (AMIGA || ATARI || MAC || SUN3 || SUN3X || VME || APOLLO) && (ATARI_MFPSER=y || ATARI_SCC=y || ATARI_MIDI=y || MAC_SCC=y || AMIGA_BUILTIN_SERIAL=y || GVPIOEXT=y || MULTIFACE_III_TTY=y || SERIAL=y || MVME147_SCC || SERIAL167 || MVME162_SCC || BVME6000_SCC || DN_SERIAL) + depends on (AMIGA || ATARI || MAC || SUN3 || SUN3X || VME || APOLLO) && (ATARI_MFPSER=y || ATARI_MIDI=y || MAC_SCC=y || AMIGA_BUILTIN_SERIAL=y || GVPIOEXT=y || MULTIFACE_III_TTY=y || SERIAL=y || MVME147_SCC || SERIAL167 || MVME162_SCC || BVME6000_SCC || DN_SERIAL) ---help--- If you say Y here, it will be possible to use a serial port as the system console (the system console is the device which receives all diff --git a/arch/m68k/Makefile b/arch/m68k/Makefile index b15173f28a23..8133dbc44964 100644 --- a/arch/m68k/Makefile +++ b/arch/m68k/Makefile @@ -13,7 +13,7 @@ # Copyright (C) 1994 by Hamish Macdonald # -KBUILD_DEFCONFIG := amiga_defconfig +KBUILD_DEFCONFIG := multi_defconfig # override top level makefile AS += -m68020 diff --git a/arch/m68k/amiga/config.c b/arch/m68k/amiga/config.c index 50f5daab46b7..df679d96b1cb 100644 --- a/arch/m68k/amiga/config.c +++ b/arch/m68k/amiga/config.c @@ -36,14 +36,11 @@ #include <asm/machdep.h> #include <asm/io.h> -unsigned long amiga_model; -EXPORT_SYMBOL(amiga_model); +static unsigned long amiga_model; unsigned long amiga_eclock; EXPORT_SYMBOL(amiga_eclock); -unsigned long amiga_masterclock; - unsigned long amiga_colorclock; EXPORT_SYMBOL(amiga_colorclock); @@ -51,7 +48,9 @@ unsigned long amiga_chipset; EXPORT_SYMBOL(amiga_chipset); unsigned char amiga_vblank; -unsigned char amiga_psfreq; +EXPORT_SYMBOL(amiga_vblank); + +static unsigned char amiga_psfreq; struct amiga_hw_present amiga_hw_present; EXPORT_SYMBOL(amiga_hw_present); @@ -92,8 +91,6 @@ static char *amiga_models[] __initdata = { static char amiga_model_name[13] = "Amiga "; static void amiga_sched_init(irq_handler_t handler); -/* amiga specific irq functions */ -extern void amiga_init_IRQ(void); static void amiga_get_model(char *model); static int amiga_get_hardware_list(char *buffer); /* amiga specific timer functions */ @@ -107,8 +104,6 @@ static void amiga_reset(void); extern void amiga_init_sound(void); static void amiga_mem_console_write(struct console *co, const char *b, unsigned int count); -void amiga_serial_console_write(struct console *co, const char *s, - unsigned int count); #ifdef CONFIG_HEARTBEAT static void amiga_heartbeat(int on); #endif @@ -418,8 +413,7 @@ void __init config_amiga(void) mach_heartbeat = amiga_heartbeat; #endif - /* Fill in the clock values (based on the 700 kHz E-Clock) */ - amiga_masterclock = 40*amiga_eclock; /* 28 MHz */ + /* Fill in the clock value (based on the 700 kHz E-Clock) */ amiga_colorclock = 5*amiga_eclock; /* 3.5 MHz */ /* clear all DMA bits */ @@ -817,8 +811,8 @@ static void amiga_serial_putc(char c) ; } -void amiga_serial_console_write(struct console *co, const char *s, - unsigned int count) +static void amiga_serial_console_write(struct console *co, const char *s, + unsigned int count) { while (count--) { if (*s == '\n') @@ -827,7 +821,7 @@ void amiga_serial_console_write(struct console *co, const char *s, } } -#ifdef CONFIG_SERIAL_CONSOLE +#if 0 void amiga_serial_puts(const char *s) { amiga_serial_console_write(NULL, s, strlen(s)); diff --git a/arch/m68k/atari/debug.c b/arch/m68k/atari/debug.c index 043ddbc61c7b..702b15ccfab7 100644 --- a/arch/m68k/atari/debug.c +++ b/arch/m68k/atari/debug.c @@ -20,14 +20,6 @@ #include <asm/atarihw.h> #include <asm/atariints.h> -/* Flag that Modem1 port is already initialized and used */ -int atari_MFP_init_done; -EXPORT_SYMBOL(atari_MFP_init_done); - -/* Flag that Modem1 port is already initialized and used */ -int atari_SCC_init_done; -EXPORT_SYMBOL(atari_SCC_init_done); - /* Can be set somewhere, if a SCC master reset has already be done and should * not be repeated; used by kgdb */ int atari_SCC_reset_done; @@ -47,8 +39,8 @@ static inline void ata_mfp_out(char c) mfp.usart_dta = c; } -void atari_mfp_console_write(struct console *co, const char *str, - unsigned int count) +static void atari_mfp_console_write(struct console *co, const char *str, + unsigned int count) { while (count--) { if (*str == '\n') @@ -66,8 +58,8 @@ static inline void ata_scc_out(char c) scc.cha_b_data = c; } -void atari_scc_console_write(struct console *co, const char *str, - unsigned int count) +static void atari_scc_console_write(struct console *co, const char *str, + unsigned int count) { while (count--) { if (*str == '\n') @@ -83,8 +75,8 @@ static inline void ata_midi_out(char c) acia.mid_data = c; } -void atari_midi_console_write(struct console *co, const char *str, - unsigned int count) +static void atari_midi_console_write(struct console *co, const char *str, + unsigned int count) { while (count--) { if (*str == '\n') @@ -136,7 +128,7 @@ static void atari_par_console_write(struct console *co, const char *str, } } -#ifdef CONFIG_SERIAL_CONSOLE +#if 0 int atari_mfp_console_wait_key(struct console *co) { while (!(mfp.rcv_stat & 0x80)) /* wait for rx buf filled */ @@ -166,11 +158,7 @@ int atari_midi_console_wait_key(struct console *co) * SCC serial ports. They're used by the debugging interface, kgdb, and the * serial console code. */ -#ifndef CONFIG_SERIAL_CONSOLE static void __init atari_init_mfp_port(int cflag) -#else -void atari_init_mfp_port(int cflag) -#endif { /* * timer values for 1200...115200 bps; > 38400 select 110, 134, or 150 @@ -193,8 +181,6 @@ void atari_init_mfp_port(int cflag) mfp.tim_dt_d = baud_table[baud]; mfp.tim_ct_cd |= 0x01; /* start timer D, 1:4 */ mfp.trn_stat |= 0x01; /* enable TX */ - - atari_MFP_init_done = 1; } #define SCC_WRITE(reg, val) \ @@ -214,11 +200,7 @@ void atari_init_mfp_port(int cflag) MFPDELAY(); \ } while (0) -#ifndef CONFIG_SERIAL_CONSOLE static void __init atari_init_scc_port(int cflag) -#else -void atari_init_scc_port(int cflag) -#endif { extern int atari_SCC_reset_done; static int clksrc_table[9] = @@ -277,14 +259,9 @@ void atari_init_scc_port(int cflag) SCC_WRITE(5, reg5 | 8); atari_SCC_reset_done = 1; - atari_SCC_init_done = 1; } -#ifndef CONFIG_SERIAL_CONSOLE static void __init atari_init_midi_port(int cflag) -#else -void atari_init_midi_port(int cflag) -#endif { int baud = cflag & CBAUD; int csize = ((cflag & CSIZE) == CS8) ? 0x10 : 0x00; diff --git a/arch/m68k/fpsp040/Makefile b/arch/m68k/fpsp040/Makefile index 0214d2f6f8b0..9506d883ace5 100644 --- a/arch/m68k/fpsp040/Makefile +++ b/arch/m68k/fpsp040/Makefile @@ -10,7 +10,6 @@ obj-y := bindec.o binstr.o decbin.o do_func.o gen_except.o get_op.o \ x_bsun.o x_fline.o x_operr.o x_ovfl.o x_snan.o x_store.o \ x_unfl.o x_unimp.o x_unsupp.o bugfix.o skeleton.o -EXTRA_AFLAGS := -traditional EXTRA_LDFLAGS := -x $(OS_OBJS): fpsp.h diff --git a/arch/m68k/ifpsp060/Makefile b/arch/m68k/ifpsp060/Makefile index 2fe8472cb5e3..43b435049452 100644 --- a/arch/m68k/ifpsp060/Makefile +++ b/arch/m68k/ifpsp060/Makefile @@ -6,5 +6,4 @@ obj-y := fskeleton.o iskeleton.o os.o -EXTRA_AFLAGS := -traditional EXTRA_LDFLAGS := -x diff --git a/arch/m68k/kernel/Makefile b/arch/m68k/kernel/Makefile index 7a62a718143b..3a7f62225504 100644 --- a/arch/m68k/kernel/Makefile +++ b/arch/m68k/kernel/Makefile @@ -16,5 +16,3 @@ devres-y = ../../../kernel/irq/devres.o obj-$(CONFIG_PCI) += bios32.o obj-y$(CONFIG_MMU_SUN3) += dma.o # no, it's not a typo - -EXTRA_AFLAGS := -traditional diff --git a/arch/m68k/kernel/setup.c b/arch/m68k/kernel/setup.c index a9fb83a8c180..ea1e44da19b9 100644 --- a/arch/m68k/kernel/setup.c +++ b/arch/m68k/kernel/setup.c @@ -26,6 +26,7 @@ #include <asm/bootinfo.h> #include <asm/setup.h> +#include <asm/fpu.h> #include <asm/irq.h> #include <asm/io.h> #include <asm/machdep.h> @@ -40,6 +41,11 @@ #include <asm/dvma.h> #endif +#if !FPSTATESIZE || !NR_IRQS +#warning No CPU/platform type selected, your kernel will not work! +#warning Are you building an allnoconfig kernel? +#endif + unsigned long m68k_machtype; EXPORT_SYMBOL(m68k_machtype); unsigned long m68k_cputype; @@ -116,6 +122,7 @@ extern int bvme6000_parse_bootinfo(const struct bi_record *); extern int mvme16x_parse_bootinfo(const struct bi_record *); extern int mvme147_parse_bootinfo(const struct bi_record *); extern int hp300_parse_bootinfo(const struct bi_record *); +extern int apollo_parse_bootinfo(const struct bi_record *); extern void config_amiga(void); extern void config_atari(void); @@ -183,6 +190,8 @@ static void __init m68k_parse_bootinfo(const struct bi_record *record) unknown = mvme147_parse_bootinfo(record); else if (MACH_IS_HP300) unknown = hp300_parse_bootinfo(record); + else if (MACH_IS_APOLLO) + unknown = apollo_parse_bootinfo(record); else unknown = 1; } diff --git a/arch/m68k/kernel/vmlinux-std.lds b/arch/m68k/kernel/vmlinux-std.lds index 7537cc5e6159..99b0784c0552 100644 --- a/arch/m68k/kernel/vmlinux-std.lds +++ b/arch/m68k/kernel/vmlinux-std.lds @@ -1,6 +1,7 @@ /* ld script to make m68k Linux kernel */ #include <asm-generic/vmlinux.lds.h> +#include <asm/page.h> OUTPUT_FORMAT("elf32-m68k", "elf32-m68k", "elf32-m68k") OUTPUT_ARCH(m68k) @@ -41,7 +42,7 @@ SECTIONS _edata = .; /* End of data section */ /* will be freed after init */ - . = ALIGN(4096); /* Init code and data */ + . = ALIGN(PAGE_SIZE); /* Init code and data */ __init_begin = .; .init.text : { _sinittext = .; diff --git a/arch/m68k/kernel/vmlinux-sun3.lds b/arch/m68k/kernel/vmlinux-sun3.lds index cdc313e7c299..8a4919e4d36a 100644 --- a/arch/m68k/kernel/vmlinux-sun3.lds +++ b/arch/m68k/kernel/vmlinux-sun3.lds @@ -1,6 +1,7 @@ /* ld script to make m68k Linux kernel */ #include <asm-generic/vmlinux.lds.h> +#include <asm/page.h> OUTPUT_FORMAT("elf32-m68k", "elf32-m68k", "elf32-m68k") OUTPUT_ARCH(m68k) @@ -34,7 +35,7 @@ SECTIONS _edata = .; /* will be freed after init */ - . = ALIGN(8192); /* Init code and data */ + . = ALIGN(PAGE_SIZE); /* Init code and data */ __init_begin = .; .init.text : { _sinittext = .; @@ -61,12 +62,12 @@ __init_begin = .; } SECURITY_INIT #ifdef CONFIG_BLK_DEV_INITRD - . = ALIGN(8192); + . = ALIGN(PAGE_SIZE); __initramfs_start = .; .init.ramfs : { *(.init.ramfs) } __initramfs_end = .; #endif - . = ALIGN(8192); + . = ALIGN(PAGE_SIZE); __init_end = .; .data.init.task : { *(.data.init_task) } diff --git a/arch/m68k/lib/Makefile b/arch/m68k/lib/Makefile index a18af095cd7c..af9abf8d9d98 100644 --- a/arch/m68k/lib/Makefile +++ b/arch/m68k/lib/Makefile @@ -2,7 +2,5 @@ # Makefile for m68k-specific library files.. # -EXTRA_AFLAGS := -traditional - lib-y := ashldi3.o ashrdi3.o lshrdi3.o muldi3.o \ checksum.o string.o uaccess.o diff --git a/arch/m68k/mac/Makefile b/arch/m68k/mac/Makefile index 1d265ba365ad..daebd80bdef0 100644 --- a/arch/m68k/mac/Makefile +++ b/arch/m68k/mac/Makefile @@ -2,5 +2,5 @@ # Makefile for Linux arch/m68k/mac source directory # -obj-y := config.o bootparse.o macints.o iop.o via.o oss.o psc.o \ +obj-y := config.o macints.o iop.o via.o oss.o psc.o \ baboon.o macboing.o debug.o misc.o diff --git a/arch/m68k/mac/baboon.c b/arch/m68k/mac/baboon.c index 673a1085984d..dae9c982aa89 100644 --- a/arch/m68k/mac/baboon.c +++ b/arch/m68k/mac/baboon.c @@ -23,9 +23,7 @@ /* #define DEBUG_IRQS */ int baboon_present; -volatile struct baboon *baboon; - -irqreturn_t baboon_irq(int, void *); +static volatile struct baboon *baboon; #if 0 extern int macide_ack_intr(struct ata_channel *); @@ -50,20 +48,10 @@ void __init baboon_init(void) } /* - * Register the Baboon interrupt dispatcher on nubus slot $C. - */ - -void __init baboon_register_interrupts(void) -{ - request_irq(IRQ_NUBUS_C, baboon_irq, IRQ_FLG_LOCK|IRQ_FLG_FAST, - "baboon", (void *) baboon); -} - -/* * Baboon interrupt handler. This works a lot like a VIA. */ -irqreturn_t baboon_irq(int irq, void *dev_id) +static irqreturn_t baboon_irq(int irq, void *dev_id) { int irq_bit, irq_num; unsigned char events; @@ -95,6 +83,16 @@ irqreturn_t baboon_irq(int irq, void *dev_id) return IRQ_HANDLED; } +/* + * Register the Baboon interrupt dispatcher on nubus slot $C. + */ + +void __init baboon_register_interrupts(void) +{ + request_irq(IRQ_NUBUS_C, baboon_irq, IRQ_FLG_LOCK|IRQ_FLG_FAST, + "baboon", (void *) baboon); +} + void baboon_irq_enable(int irq) { #ifdef DEBUG_IRQUSE printk("baboon_irq_enable(%d)\n", irq); diff --git a/arch/m68k/mac/bootparse.c b/arch/m68k/mac/bootparse.c deleted file mode 100644 index 36d223609823..000000000000 --- a/arch/m68k/mac/bootparse.c +++ /dev/null @@ -1,122 +0,0 @@ -#include <linux/string.h> -#include <linux/kernel.h> -#include <linux/sched.h> -#include <asm/irq.h> -#include <asm/setup.h> -#include <asm/bootinfo.h> -#include <asm/macintosh.h> - -/* - * Booter vars - */ - -int boothowto; -int _boothowto; - -/* - * Called early to parse the environment (passed to us from the booter) - * into a bootinfo struct. Will die as soon as we have our own booter - */ - -#define atol(x) simple_strtoul(x,NULL,0) - -void parse_booter(char *env) -{ - char *name; - char *value; -#if 0 - while(0 && *env) -#else - while(*env) -#endif - { - name=env; - value=name; - while(*value!='='&&*value) - value++; - if(*value=='=') - *value++=0; - env=value; - while(*env) - env++; - env++; -#if 0 - if(strcmp(name,"VIDEO_ADDR")==0) - mac_mch.videoaddr=atol(value); - if(strcmp(name,"ROW_BYTES")==0) - mac_mch.videorow=atol(value); - if(strcmp(name,"SCREEN_DEPTH")==0) - mac_mch.videodepth=atol(value); - if(strcmp(name,"DIMENSIONS")==0) - mac_mch.dimensions=atol(value); -#endif - if(strcmp(name,"BOOTTIME")==0) - mac_bi_data.boottime=atol(value); - if(strcmp(name,"GMTBIAS")==0) - mac_bi_data.gmtbias=atol(value); - if(strcmp(name,"BOOTERVER")==0) - mac_bi_data.bootver=atol(value); - if(strcmp(name,"MACOS_VIDEO")==0) - mac_bi_data.videological=atol(value); - if(strcmp(name,"MACOS_SCC")==0) - mac_bi_data.sccbase=atol(value); - if(strcmp(name,"MACHINEID")==0) - mac_bi_data.id=atol(value); - if(strcmp(name,"MEMSIZE")==0) - mac_bi_data.memsize=atol(value); - if(strcmp(name,"SERIAL_MODEM_FLAGS")==0) - mac_bi_data.serialmf=atol(value); - if(strcmp(name,"SERIAL_MODEM_HSKICLK")==0) - mac_bi_data.serialhsk=atol(value); - if(strcmp(name,"SERIAL_MODEM_GPICLK")==0) - mac_bi_data.serialgpi=atol(value); - if(strcmp(name,"SERIAL_PRINT_FLAGS")==0) - mac_bi_data.printmf=atol(value); - if(strcmp(name,"SERIAL_PRINT_HSKICLK")==0) - mac_bi_data.printhsk=atol(value); - if(strcmp(name,"SERIAL_PRINT_GPICLK")==0) - mac_bi_data.printgpi=atol(value); - if(strcmp(name,"PROCESSOR")==0) - mac_bi_data.cpuid=atol(value); - if(strcmp(name,"ROMBASE")==0) - mac_bi_data.rombase=atol(value); - if(strcmp(name,"TIMEDBRA")==0) - mac_bi_data.timedbra=atol(value); - if(strcmp(name,"ADBDELAY")==0) - mac_bi_data.adbdelay=atol(value); - } -#if 0 /* XXX: TODO with m68k_mach_* */ - /* Fill in the base stuff */ - boot_info.machtype=MACH_MAC; - /* Read this from the macinfo we got ! */ -/* boot_info.cputype=CPU_68020|FPUB_68881;*/ -/* boot_info.memory[0].addr=0;*/ -/* boot_info.memory[0].size=((mac_bi_data.id>>7)&31)<<20;*/ - boot_info.num_memory=1; /* On a MacII */ - boot_info.ramdisk_size=0; /* For now */ - *boot_info.command_line=0; -#endif - } - - -void print_booter(char *env) -{ - char *name; - char *value; - while(*env) - { - name=env; - value=name; - while(*value!='='&&*value) - value++; - if(*value=='=') - *value++=0; - env=value; - while(*env) - env++; - env++; - printk("%s=%s\n", name,value); - } - } - - diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c index ad3e3bacae39..c45e18449f32 100644 --- a/arch/m68k/mac/config.c +++ b/arch/m68k/mac/config.c @@ -46,7 +46,6 @@ /* Mac bootinfo struct */ struct mac_booter_data mac_bi_data; -int mac_bisize = sizeof mac_bi_data; /* New m68k bootinfo stuff and videobase */ @@ -55,10 +54,8 @@ extern struct mem_info m68k_memory[NUM_MEMINFO]; extern struct mem_info m68k_ramdisk; -void *mac_env; /* Loaded by the boot asm */ - /* The phys. video addr. - might be bogus on some machines */ -unsigned long mac_orig_videoaddr; +static unsigned long mac_orig_videoaddr; /* Mac specific timer functions */ extern unsigned long mac_gettimeoffset(void); @@ -79,6 +76,8 @@ extern void mac_mksound(unsigned int, unsigned int); extern void nubus_sweep_video(void); static void mac_get_model(char *str); +static void mac_identify(void); +static void mac_report_hardware(void); static void __init mac_sched_init(irq_handler_t vector) { @@ -765,7 +764,7 @@ static struct mac_model mac_data_table[] = { } }; -void __init mac_identify(void) +static void __init mac_identify(void) { struct mac_model *m; @@ -821,7 +820,7 @@ void __init mac_identify(void) baboon_init(); } -void __init mac_report_hardware(void) +static void __init mac_report_hardware(void) { printk(KERN_INFO "Apple Macintosh %s\n", macintosh_config->name); } diff --git a/arch/m68k/mac/debug.c b/arch/m68k/mac/debug.c index e8a57138b4a6..2165740786a5 100644 --- a/arch/m68k/mac/debug.c +++ b/arch/m68k/mac/debug.c @@ -51,6 +51,8 @@ extern void mac_serial_print(const char *); static int peng, line; #endif +#if 0 + void mac_debugging_short(int pos, short num) { #ifdef DEBUG_SCREEN @@ -125,6 +127,8 @@ void mac_debugging_long(int pos, long addr) #endif } +#endif /* 0 */ + #ifdef DEBUG_SERIAL /* * TODO: serial debug code @@ -142,12 +146,6 @@ struct mac_SCC { # define scc (*((volatile struct mac_SCC*)mac_bi_data.sccbase)) -/* Flag that serial port is already initialized and used */ -int mac_SCC_init_done; -/* Can be set somewhere, if a SCC master reset has already be done and should - * not be repeated; used by kgdb */ -int mac_SCC_reset_done; - static int scc_port = -1; static struct console mac_console_driver = { @@ -171,8 +169,8 @@ static struct console mac_console_driver = { * this driver if Mac. */ -void mac_debug_console_write(struct console *co, const char *str, - unsigned int count) +static void mac_debug_console_write(struct console *co, const char *str, + unsigned int count) { mac_serial_print(str); } @@ -209,8 +207,8 @@ static inline void mac_scca_out(char c) scc.cha_a_data = c; } -void mac_sccb_console_write(struct console *co, const char *str, - unsigned int count) +static void mac_sccb_console_write(struct console *co, const char *str, + unsigned int count) { while (count--) { if (*str == '\n') @@ -219,8 +217,8 @@ void mac_sccb_console_write(struct console *co, const char *str, } } -void mac_scca_console_write(struct console *co, const char *str, - unsigned int count) +static void mac_scca_console_write(struct console *co, const char *str, + unsigned int count) { while (count--) { if (*str == '\n') @@ -265,14 +263,8 @@ void mac_scca_console_write(struct console *co, const char *str, barrier(); \ } while(0) -#ifndef CONFIG_SERIAL_CONSOLE static void __init mac_init_scc_port(int cflag, int port) -#else -void mac_init_scc_port(int cflag, int port) -#endif { - extern int mac_SCC_reset_done; - /* * baud rates: 1200, 1800, 2400, 4800, 9600, 19.2k, 38.4k, 57.6k, 115.2k */ @@ -340,22 +332,9 @@ void mac_init_scc_port(int cflag, int port) SCCA_WRITE(3, reg3 | 1); SCCA_WRITE(5, reg5 | 8); } - - mac_SCC_reset_done = 1; - mac_SCC_init_done = 1; } #endif /* DEBUG_SERIAL */ -void mac_init_scca_port(int cflag) -{ - mac_init_scc_port(cflag, 0); -} - -void mac_init_sccb_port(int cflag) -{ - mac_init_scc_port(cflag, 1); -} - static int __init mac_debug_setup(char *arg) { if (!MACH_IS_MAC) diff --git a/arch/m68k/mac/oss.c b/arch/m68k/mac/oss.c index 3c943d2ec570..43d83e054b8e 100644 --- a/arch/m68k/mac/oss.c +++ b/arch/m68k/mac/oss.c @@ -30,8 +30,8 @@ int oss_present; volatile struct mac_oss *oss; -irqreturn_t oss_irq(int, void *); -irqreturn_t oss_nubus_irq(int, void *); +static irqreturn_t oss_irq(int, void *); +static irqreturn_t oss_nubus_irq(int, void *); extern irqreturn_t via1_irq(int, void *); extern irqreturn_t mac_scc_dispatch(int, void *); @@ -92,7 +92,7 @@ void __init oss_nubus_init(void) * and SCSI; everything else is routed to its own autovector IRQ. */ -irqreturn_t oss_irq(int irq, void *dev_id) +static irqreturn_t oss_irq(int irq, void *dev_id) { int events; @@ -126,7 +126,7 @@ irqreturn_t oss_irq(int irq, void *dev_id) * Unlike the VIA/RBV this is on its own autovector interrupt level. */ -irqreturn_t oss_nubus_irq(int irq, void *dev_id) +static irqreturn_t oss_nubus_irq(int irq, void *dev_id) { int events, irq_bit, i; diff --git a/arch/m68k/mac/psc.c b/arch/m68k/mac/psc.c index d66f723b17c3..f84a4dd64f94 100644 --- a/arch/m68k/mac/psc.c +++ b/arch/m68k/mac/psc.c @@ -36,7 +36,7 @@ irqreturn_t psc_irq(int, void *); * Debugging dump, used in various places to see what's going on. */ -void psc_debug_dump(void) +static void psc_debug_dump(void) { int i; @@ -55,7 +55,7 @@ void psc_debug_dump(void) * expanded to cover what I think are the other 7 channels. */ -void psc_dma_die_die_die(void) +static void psc_dma_die_die_die(void) { int i; diff --git a/arch/m68k/mac/via.c b/arch/m68k/mac/via.c index fa485df4160e..f3b27d04a31f 100644 --- a/arch/m68k/mac/via.c +++ b/arch/m68k/mac/via.c @@ -45,7 +45,7 @@ volatile long *via_memory_bogon=(long *)&via_memory_bogon; int rbv_present; int via_alt_mapping; EXPORT_SYMBOL(via_alt_mapping); -__u8 rbv_clear; +static __u8 rbv_clear; /* * Globals for accessing the VIA chip registers without having to diff --git a/arch/m68k/math-emu/Makefile b/arch/m68k/math-emu/Makefile index 539940401814..a0935bf98362 100644 --- a/arch/m68k/math-emu/Makefile +++ b/arch/m68k/math-emu/Makefile @@ -2,8 +2,6 @@ # Makefile for the linux kernel. # -EXTRA_AFLAGS := -traditional - #EXTRA_AFLAGS += -DFPU_EMU_DEBUG #EXTRA_CFLAGS += -DFPU_EMU_DEBUG diff --git a/arch/m68k/mm/motorola.c b/arch/m68k/mm/motorola.c index 30d34f285024..226795bdf355 100644 --- a/arch/m68k/mm/motorola.c +++ b/arch/m68k/mm/motorola.c @@ -285,7 +285,6 @@ void __init paging_init(void) * to a couple of allocated pages */ empty_zero_page = alloc_bootmem_pages(PAGE_SIZE); - memset(empty_zero_page, 0, PAGE_SIZE); /* * Set up SFC/DFC registers diff --git a/arch/m68k/mm/sun3mmu.c b/arch/m68k/mm/sun3mmu.c index 6a6513aa1ce8..edceefc18870 100644 --- a/arch/m68k/mm/sun3mmu.c +++ b/arch/m68k/mm/sun3mmu.c @@ -53,7 +53,6 @@ void __init paging_init(void) wp_works_ok = 0; #endif empty_zero_page = alloc_bootmem_pages(PAGE_SIZE); - memset(empty_zero_page, 0, PAGE_SIZE); address = PAGE_OFFSET; pg_dir = swapper_pg_dir; diff --git a/arch/m68k/q40/config.c b/arch/m68k/q40/config.c index 476e18eca758..be9de2f3dc48 100644 --- a/arch/m68k/q40/config.c +++ b/arch/m68k/q40/config.c @@ -41,14 +41,12 @@ static void q40_get_model(char *model); static int q40_get_hardware_list(char *buffer); extern void q40_sched_init(irq_handler_t handler); -extern unsigned long q40_gettimeoffset(void); -extern int q40_hwclk(int, struct rtc_time *); -extern unsigned int q40_get_ss(void); -extern int q40_set_clock_mmss(unsigned long); +static unsigned long q40_gettimeoffset(void); +static int q40_hwclk(int, struct rtc_time *); +static unsigned int q40_get_ss(void); +static int q40_set_clock_mmss(unsigned long); static int q40_get_rtc_pll(struct rtc_pll_info *pll); static int q40_set_rtc_pll(struct rtc_pll_info *pll); -extern void q40_reset(void); -void q40_halt(void); extern void q40_waitbut(void); void q40_set_vectors(void); @@ -127,7 +125,7 @@ static void q40_heartbeat(int on) } #endif -void q40_reset(void) +static void q40_reset(void) { halted = 1; printk("\n\n*******************************************\n" @@ -137,7 +135,8 @@ void q40_reset(void) while (1) ; } -void q40_halt(void) + +static void q40_halt(void) { halted = 1; printk("\n\n*******************\n" @@ -165,7 +164,8 @@ static unsigned int serports[] = { 0x3f8,0x2f8,0x3e8,0x2e8,0 }; -void q40_disable_irqs(void) + +static void q40_disable_irqs(void) { unsigned i, j; @@ -227,7 +227,7 @@ static inline unsigned char bin2bcd(unsigned char b) } -unsigned long q40_gettimeoffset(void) +static unsigned long q40_gettimeoffset(void) { return 5000 * (ql_ticks != 0); } @@ -248,7 +248,7 @@ unsigned long q40_gettimeoffset(void) * }; */ -int q40_hwclk(int op, struct rtc_time *t) +static int q40_hwclk(int op, struct rtc_time *t) { if (op) { /* Write.... */ @@ -285,7 +285,7 @@ int q40_hwclk(int op, struct rtc_time *t) return 0; } -unsigned int q40_get_ss(void) +static unsigned int q40_get_ss(void) { return bcd2bin(Q40_RTC_SECS); } @@ -295,7 +295,7 @@ unsigned int q40_get_ss(void) * clock is out by > 30 minutes. Logic lifted from atari code. */ -int q40_set_clock_mmss(unsigned long nowtime) +static int q40_set_clock_mmss(unsigned long nowtime) { int retval = 0; short real_seconds = nowtime % 60, real_minutes = (nowtime / 60) % 60; diff --git a/arch/m68k/sun3/Makefile b/arch/m68k/sun3/Makefile index be1a8470d636..38ba0e0cedad 100644 --- a/arch/m68k/sun3/Makefile +++ b/arch/m68k/sun3/Makefile @@ -2,6 +2,6 @@ # Makefile for Linux arch/m68k/sun3 source directory # -obj-y := sun3ints.o sun3dvma.o sbus.o idprom.o +obj-y := sun3ints.o sun3dvma.o idprom.o obj-$(CONFIG_SUN3) += config.o mmu_emu.o leds.o dvma.o intersil.o diff --git a/arch/m68k/sun3/config.c b/arch/m68k/sun3/config.c index c0fbd278fbb1..732087d0735c 100644 --- a/arch/m68k/sun3/config.c +++ b/arch/m68k/sun3/config.c @@ -36,7 +36,7 @@ extern char _text, _end; char sun3_reserved_pmeg[SUN3_PMEGS_NUM]; extern unsigned long sun3_gettimeoffset(void); -extern void sun3_sched_init(irq_handler_t handler); +static void sun3_sched_init(irq_handler_t handler); extern void sun3_get_model (char* model); extern void idprom_init (void); extern int sun3_hwclk(int set, struct rtc_time *t); @@ -114,7 +114,8 @@ static void sun3_halt (void) /* sun3 bootmem allocation */ -void __init sun3_bootmem_alloc(unsigned long memory_start, unsigned long memory_end) +static void __init sun3_bootmem_alloc(unsigned long memory_start, + unsigned long memory_end) { unsigned long start_page; @@ -164,7 +165,7 @@ void __init config_sun3(void) sun3_bootmem_alloc(memory_start, memory_end); } -void __init sun3_sched_init(irq_handler_t timer_routine) +static void __init sun3_sched_init(irq_handler_t timer_routine) { sun3_disable_interrupts(); intersil_clock->cmd_reg=(INTERSIL_RUN|INTERSIL_INT_DISABLE|INTERSIL_24H_MODE); diff --git a/arch/m68k/sun3/dvma.c b/arch/m68k/sun3/dvma.c index d2b3093f2405..d522eaab4551 100644 --- a/arch/m68k/sun3/dvma.c +++ b/arch/m68k/sun3/dvma.c @@ -19,7 +19,7 @@ static unsigned long ptelist[120]; -inline unsigned long dvma_page(unsigned long kaddr, unsigned long vaddr) +static unsigned long dvma_page(unsigned long kaddr, unsigned long vaddr) { unsigned long pte; unsigned long j; diff --git a/arch/m68k/sun3/idprom.c b/arch/m68k/sun3/idprom.c index dca6ab6a4ede..c86ac37d1983 100644 --- a/arch/m68k/sun3/idprom.c +++ b/arch/m68k/sun3/idprom.c @@ -1,4 +1,4 @@ -/* $Id: idprom.c,v 1.22 1996/11/13 05:09:25 davem Exp $ +/* * idprom.c: Routines to load the idprom into kernel addresses and * interpret the data contained within. * @@ -25,7 +25,7 @@ static struct idprom idprom_buffer; * of the Sparc CPU and have a meaningful IDPROM machtype value that we * know about. See asm-sparc/machines.h for empirical constants. */ -struct Sun_Machine_Models Sun_Machines[NUM_SUN_MACHINES] = { +static struct Sun_Machine_Models Sun_Machines[NUM_SUN_MACHINES] = { /* First, Sun3's */ { .name = "Sun 3/160 Series", .id_machtype = (SM_SUN3 | SM_3_160) }, { .name = "Sun 3/50", .id_machtype = (SM_SUN3 | SM_3_50) }, diff --git a/arch/m68k/sun3/mmu_emu.c b/arch/m68k/sun3/mmu_emu.c index fb0f6a20cc3c..60f9d4500d72 100644 --- a/arch/m68k/sun3/mmu_emu.c +++ b/arch/m68k/sun3/mmu_emu.c @@ -55,7 +55,7 @@ unsigned char pmeg_ctx[PMEGS_NUM]; /* pointers to the mm structs for each task in each context. 0xffffffff is a marker for kernel context */ -struct mm_struct *ctx_alloc[CONTEXTS_NUM] = { +static struct mm_struct *ctx_alloc[CONTEXTS_NUM] = { [0] = (struct mm_struct *)0xffffffff }; diff --git a/arch/m68k/sun3/prom/Makefile b/arch/m68k/sun3/prom/Makefile index 6e48ae2a7175..da7eac06bca0 100644 --- a/arch/m68k/sun3/prom/Makefile +++ b/arch/m68k/sun3/prom/Makefile @@ -1,4 +1,3 @@ -# $Id: Makefile,v 1.5 1995/11/25 00:59:48 davem Exp $ # Makefile for the Sun Boot PROM interface library under # Linux. # diff --git a/arch/m68k/sun3/prom/console.c b/arch/m68k/sun3/prom/console.c index 52c1427863de..2bcb6e4bfe54 100644 --- a/arch/m68k/sun3/prom/console.c +++ b/arch/m68k/sun3/prom/console.c @@ -1,4 +1,4 @@ -/* $Id: console.c,v 1.10 1996/12/18 06:46:54 tridge Exp $ +/* * console.c: Routines that deal with sending and receiving IO * to/from the current console device using the PROM. * @@ -104,8 +104,6 @@ prom_query_input_device() return PROMDEV_ITTYB; } return PROMDEV_I_UNK; - case PROM_AP1000: - return PROMDEV_I_UNK; }; } #endif @@ -166,8 +164,6 @@ prom_query_output_device() }; } break; - case PROM_AP1000: - return PROMDEV_I_UNK; }; return PROMDEV_O_UNK; } diff --git a/arch/m68k/sun3/prom/init.c b/arch/m68k/sun3/prom/init.c index 202adfcc316e..d8e6349336b4 100644 --- a/arch/m68k/sun3/prom/init.c +++ b/arch/m68k/sun3/prom/init.c @@ -1,4 +1,4 @@ -/* $Id: init.c,v 1.9 1996/12/18 06:46:55 tridge Exp $ +/* * init.c: Initialize internal variables used by the PROM * library functions. * @@ -31,11 +31,6 @@ extern void prom_ranges_init(void); void __init prom_init(struct linux_romvec *rp) { -#ifdef CONFIG_AP1000 - extern struct linux_romvec *ap_prom_init(void); - rp = ap_prom_init(); -#endif - romvec = rp; #ifndef CONFIG_SUN3 switch(romvec->pv_romvers) { @@ -53,10 +48,6 @@ void __init prom_init(struct linux_romvec *rp) prom_printf("PROMLIB: Sun IEEE Prom not supported yet\n"); prom_halt(); break; - case 42: /* why not :-) */ - prom_vers = PROM_AP1000; - break; - default: prom_printf("PROMLIB: Bad PROM version %d\n", romvec->pv_romvers); diff --git a/arch/m68k/sun3/prom/misc.c b/arch/m68k/sun3/prom/misc.c index b88716f2c68c..3d60e1337f75 100644 --- a/arch/m68k/sun3/prom/misc.c +++ b/arch/m68k/sun3/prom/misc.c @@ -1,4 +1,4 @@ -/* $Id: misc.c,v 1.15 1997/05/14 20:45:00 davem Exp $ +/* * misc.c: Miscellaneous prom functions that don't belong * anywhere else. * diff --git a/arch/m68k/sun3/prom/printf.c b/arch/m68k/sun3/prom/printf.c index e7bfde377b5e..df85018f487a 100644 --- a/arch/m68k/sun3/prom/printf.c +++ b/arch/m68k/sun3/prom/printf.c @@ -1,4 +1,4 @@ -/* $Id: printf.c,v 1.5 1996/04/04 16:31:07 tridge Exp $ +/* * printf.c: Internal prom library printf facility. * * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu) @@ -37,10 +37,6 @@ prom_printf(char *fmt, ...) bptr = ppbuf; -#ifdef CONFIG_AP1000 - ap_write(1,bptr,strlen(bptr)); -#else - #ifdef CONFIG_KGDB if (kgdb_initialized) { printk("kgdb_initialized = %d\n", kgdb_initialized); @@ -54,7 +50,6 @@ prom_printf(char *fmt, ...) prom_putchar(ch); } #endif -#endif va_end(args); return; } diff --git a/arch/m68k/sun3/sbus.c b/arch/m68k/sun3/sbus.c deleted file mode 100644 index babdbfa3cda7..000000000000 --- a/arch/m68k/sun3/sbus.c +++ /dev/null @@ -1,27 +0,0 @@ -/* - * SBus helper functions - * - * Sun3 don't have a sbus, but many of the used devices are also - * used on Sparc machines with sbus. To avoid having a lot of - * duplicate code, we provide necessary glue stuff to make using - * of the sbus driver code possible. - * - * (C) 1999 Thomas Bogendoerfer (tsbogend@alpha.franken.de) - */ - -#include <linux/types.h> -#include <linux/compiler.h> -#include <linux/init.h> - -int __init sbus_init(void) -{ - return 0; -} - -void *sparc_alloc_io (u32 address, void *virtual, int len, char *name, - u32 bus_type, int rdonly) -{ - return (void *)address; -} - -subsys_initcall(sbus_init); diff --git a/arch/m68k/sun3/sun3dvma.c b/arch/m68k/sun3/sun3dvma.c index 8709677fa025..f9277e8b4159 100644 --- a/arch/m68k/sun3/sun3dvma.c +++ b/arch/m68k/sun3/sun3dvma.c @@ -29,7 +29,7 @@ static inline void dvma_unmap_iommu(unsigned long a, int b) extern void sun3_dvma_init(void); #endif -unsigned long iommu_use[IOMMU_TOTAL_ENTRIES]; +static unsigned long iommu_use[IOMMU_TOTAL_ENTRIES]; #define dvma_index(baddr) ((baddr - DVMA_START) >> DVMA_PAGE_SHIFT) diff --git a/arch/m68k/sun3/sun3ints.c b/arch/m68k/sun3/sun3ints.c index cf93481adb1d..7364cd67455e 100644 --- a/arch/m68k/sun3/sun3ints.c +++ b/arch/m68k/sun3/sun3ints.c @@ -30,7 +30,7 @@ void sun3_enable_interrupts(void) sun3_enable_irq(0); } -int led_pattern[8] = { +static int led_pattern[8] = { ~(0x80), ~(0x01), ~(0x40), ~(0x02), ~(0x20), ~(0x04), diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index d21df5f1b1f3..b9c754f4070c 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -330,6 +330,7 @@ config SGI_IP22 select SGI_HAS_DS1286 select SGI_HAS_I8042 select SGI_HAS_INDYDOG + select SGI_HAS_HAL2 select SGI_HAS_SEEQ select SGI_HAS_WD93 select SGI_HAS_ZILOG @@ -386,7 +387,6 @@ config SGI_IP28 select SGI_HAS_I8042 select SGI_HAS_INDYDOG select SGI_HAS_HAL2 - select SGI_HAS_HAL2 select SGI_HAS_SEEQ select SGI_HAS_WD93 select SGI_HAS_ZILOG @@ -558,6 +558,24 @@ config MACH_TX39XX config MACH_TX49XX bool "Toshiba TX49 series based machines" +config MIKROTIK_RB532 + bool "Mikrotik RB532 boards" + select CEVT_R4K + select CSRC_R4K + select DMA_NONCOHERENT + select GENERIC_HARDIRQS_NO__DO_IRQ + select HW_HAS_PCI + select IRQ_CPU + select SYS_HAS_CPU_MIPS32_R1 + select SYS_SUPPORTS_32BIT_KERNEL + select SYS_SUPPORTS_LITTLE_ENDIAN + select SWAP_IO_SPACE + select BOOT_RAW + select GENERIC_GPIO + help + Support the Mikrotik(tm) RouterBoard 532 series, + based on the IDT RC32434 SoC. + config WR_PPMC bool "Wind River PPMC board" select CEVT_R4K @@ -899,7 +917,7 @@ config BOOT_ELF32 config MIPS_L1_CACHE_SHIFT int - default "4" if MACH_DECSTATION + default "4" if MACH_DECSTATION || MIKROTIK_RB532 default "7" if SGI_IP22 || SGI_IP27 || SGI_IP28 || SNI_RM default "4" if PMC_MSP4200_EVAL default "5" diff --git a/arch/mips/Makefile b/arch/mips/Makefile index 356453322b49..9aab51caf16a 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -560,6 +560,13 @@ load-$(CONFIG_MACH_TX49XX) += 0xffffffff80100000 core-$(CONFIG_TOSHIBA_JMR3927) += arch/mips/txx9/jmr3927/ # +# Routerboard 532 board +# +core-$(CONFIG_MIKROTIK_RB532) += arch/mips/rb532/ +cflags-$(CONFIG_MIKROTIK_RB532) += -Iinclude/asm-mips/mach-rc32434 +load-$(CONFIG_MIKROTIK_RB532) += 0xffffffff80101000 + +# # Toshiba RBTX4927 board or # Toshiba RBTX4937 board # diff --git a/arch/mips/cobalt/setup.c b/arch/mips/cobalt/setup.c index dd23beb8604f..b51644227241 100644 --- a/arch/mips/cobalt/setup.c +++ b/arch/mips/cobalt/setup.c @@ -81,8 +81,8 @@ void __init plat_mem_setup(void) set_io_port_base(CKSEG1ADDR(GT_DEF_PCI0_IO_BASE)); - /* I/O port resource must include LCD/buttons */ - ioport_resource.end = 0x0fffffff; + /* I/O port resource */ + ioport_resource.end = 0x01ffffff; /* These resources have been reserved by VIA SuperI/O chip. */ for (i = 0; i < ARRAY_SIZE(cobalt_reserved_resources); i++) diff --git a/arch/mips/configs/rb532_defconfig b/arch/mips/configs/rb532_defconfig new file mode 100644 index 000000000000..f28dc32974e5 --- /dev/null +++ b/arch/mips/configs/rb532_defconfig @@ -0,0 +1,1314 @@ +# +# Automatically generated make config: don't edit +# Linux kernel version: 2.6.25 +# Mon Apr 28 12:24:17 2008 +# +CONFIG_MIPS=y + +# +# Machine selection +# +# CONFIG_MACH_ALCHEMY is not set +# CONFIG_BASLER_EXCITE is not set +# CONFIG_BCM47XX is not set +# CONFIG_MIPS_COBALT is not set +# CONFIG_MACH_DECSTATION is not set +# CONFIG_MACH_JAZZ is not set +# CONFIG_LASAT is not set +# CONFIG_LEMOTE_FULONG is not set +# CONFIG_MIPS_ATLAS is not set +# CONFIG_MIPS_MALTA is not set +# CONFIG_MIPS_SEAD is not set +# CONFIG_MIPS_SIM is not set +# CONFIG_MARKEINS is not set +# CONFIG_MACH_VR41XX is not set +# CONFIG_PNX8550_JBS is not set +# CONFIG_PNX8550_STB810 is not set +# CONFIG_PMC_MSP is not set +# CONFIG_PMC_YOSEMITE is not set +# CONFIG_SGI_IP22 is not set +# CONFIG_SGI_IP27 is not set +# CONFIG_SGI_IP28 is not set +# CONFIG_SGI_IP32 is not set +# CONFIG_SIBYTE_CRHINE is not set +# CONFIG_SIBYTE_CARMEL is not set +# CONFIG_SIBYTE_CRHONE is not set +# CONFIG_SIBYTE_RHONE is not set +# CONFIG_SIBYTE_SWARM is not set +# CONFIG_SIBYTE_LITTLESUR is not set +# CONFIG_SIBYTE_SENTOSA is not set +# CONFIG_SIBYTE_BIGSUR is not set +# CONFIG_SNI_RM is not set +# CONFIG_TOSHIBA_JMR3927 is not set +CONFIG_MIKROTIK_RB532=y +# CONFIG_TOSHIBA_RBTX4927 is not set +# CONFIG_TOSHIBA_RBTX4938 is not set +# CONFIG_WR_PPMC is not set +CONFIG_RWSEM_GENERIC_SPINLOCK=y +# CONFIG_ARCH_HAS_ILOG2_U32 is not set +# CONFIG_ARCH_HAS_ILOG2_U64 is not set +CONFIG_ARCH_SUPPORTS_OPROFILE=y +CONFIG_GENERIC_FIND_NEXT_BIT=y +CONFIG_GENERIC_HWEIGHT=y +CONFIG_GENERIC_CALIBRATE_DELAY=y +CONFIG_GENERIC_CLOCKEVENTS=y +CONFIG_GENERIC_TIME=y +CONFIG_GENERIC_CMOS_UPDATE=y +CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y +CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y +CONFIG_BOOT_RAW=y +CONFIG_CEVT_R4K=y +CONFIG_CSRC_R4K=y +CONFIG_DMA_NONCOHERENT=y +CONFIG_DMA_NEED_PCI_MAP_STATE=y +# CONFIG_HOTPLUG_CPU is not set +# CONFIG_NO_IOPORT is not set +CONFIG_GENERIC_GPIO=y +# CONFIG_CPU_BIG_ENDIAN is not set +CONFIG_CPU_LITTLE_ENDIAN=y +CONFIG_SYS_SUPPORTS_LITTLE_ENDIAN=y +CONFIG_IRQ_CPU=y +CONFIG_SWAP_IO_SPACE=y +CONFIG_MIPS_L1_CACHE_SHIFT=4 + +# +# CPU selection +# +# CONFIG_CPU_LOONGSON2 is not set +CONFIG_CPU_MIPS32_R1=y +# CONFIG_CPU_MIPS32_R2 is not set +# CONFIG_CPU_MIPS64_R1 is not set +# CONFIG_CPU_MIPS64_R2 is not set +# CONFIG_CPU_R3000 is not set +# CONFIG_CPU_TX39XX is not set +# CONFIG_CPU_VR41XX is not set +# CONFIG_CPU_R4300 is not set +# CONFIG_CPU_R4X00 is not set +# CONFIG_CPU_TX49XX is not set +# CONFIG_CPU_R5000 is not set +# CONFIG_CPU_R5432 is not set +# CONFIG_CPU_R6000 is not set +# CONFIG_CPU_NEVADA is not set +# CONFIG_CPU_R8000 is not set +# CONFIG_CPU_R10000 is not set +# CONFIG_CPU_RM7000 is not set +# CONFIG_CPU_RM9000 is not set +# CONFIG_CPU_SB1 is not set +CONFIG_SYS_HAS_CPU_MIPS32_R1=y +CONFIG_CPU_MIPS32=y +CONFIG_CPU_MIPSR1=y +CONFIG_SYS_SUPPORTS_32BIT_KERNEL=y +CONFIG_CPU_SUPPORTS_32BIT_KERNEL=y + +# +# Kernel type +# +CONFIG_32BIT=y +# CONFIG_64BIT is not set +CONFIG_PAGE_SIZE_4KB=y +# CONFIG_PAGE_SIZE_8KB is not set +# CONFIG_PAGE_SIZE_16KB is not set +# CONFIG_PAGE_SIZE_64KB is not set +CONFIG_CPU_HAS_PREFETCH=y +CONFIG_MIPS_MT_DISABLED=y +# CONFIG_MIPS_MT_SMP is not set +# CONFIG_MIPS_MT_SMTC is not set +CONFIG_CPU_HAS_LLSC=y +CONFIG_CPU_HAS_SYNC=y +CONFIG_GENERIC_HARDIRQS=y +CONFIG_GENERIC_IRQ_PROBE=y +CONFIG_CPU_SUPPORTS_HIGHMEM=y +CONFIG_ARCH_FLATMEM_ENABLE=y +CONFIG_ARCH_POPULATES_NODE_MAP=y +CONFIG_SELECT_MEMORY_MODEL=y +CONFIG_FLATMEM_MANUAL=y +# CONFIG_DISCONTIGMEM_MANUAL is not set +# CONFIG_SPARSEMEM_MANUAL is not set +CONFIG_FLATMEM=y +CONFIG_FLAT_NODE_MEM_MAP=y +# CONFIG_SPARSEMEM_STATIC is not set +# CONFIG_SPARSEMEM_VMEMMAP_ENABLE is not set +CONFIG_SPLIT_PTLOCK_CPUS=4 +# CONFIG_RESOURCES_64BIT is not set +CONFIG_ZONE_DMA_FLAG=0 +CONFIG_VIRT_TO_BUS=y +CONFIG_TICK_ONESHOT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_GENERIC_CLOCKEVENTS_BUILD=y +# CONFIG_HZ_48 is not set +CONFIG_HZ_100=y +# CONFIG_HZ_128 is not set +# CONFIG_HZ_250 is not set +# CONFIG_HZ_256 is not set +# CONFIG_HZ_1000 is not set +# CONFIG_HZ_1024 is not set +CONFIG_SYS_SUPPORTS_ARBIT_HZ=y +CONFIG_HZ=100 +CONFIG_PREEMPT_NONE=y +# CONFIG_PREEMPT_VOLUNTARY is not set +# CONFIG_PREEMPT is not set +# CONFIG_KEXEC is not set +# CONFIG_SECCOMP is not set +CONFIG_LOCKDEP_SUPPORT=y +CONFIG_STACKTRACE_SUPPORT=y +CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" + +# +# General setup +# +CONFIG_EXPERIMENTAL=y +CONFIG_BROKEN_ON_SMP=y +CONFIG_INIT_ENV_ARG_LIMIT=32 +CONFIG_LOCALVERSION="" +# CONFIG_LOCALVERSION_AUTO is not set +CONFIG_SWAP=y +CONFIG_SYSVIPC=y +CONFIG_SYSVIPC_SYSCTL=y +# CONFIG_POSIX_MQUEUE is not set +CONFIG_BSD_PROCESS_ACCT=y +# CONFIG_BSD_PROCESS_ACCT_V3 is not set +# CONFIG_TASKSTATS is not set +# CONFIG_AUDIT is not set +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_LOG_BUF_SHIFT=14 +# CONFIG_CGROUPS is not set +CONFIG_GROUP_SCHED=y +CONFIG_FAIR_GROUP_SCHED=y +# CONFIG_RT_GROUP_SCHED is not set +CONFIG_USER_SCHED=y +# CONFIG_CGROUP_SCHED is not set +CONFIG_SYSFS_DEPRECATED=y +CONFIG_SYSFS_DEPRECATED_V2=y +# CONFIG_RELAY is not set +# CONFIG_NAMESPACES is not set +CONFIG_BLK_DEV_INITRD=y +CONFIG_INITRAMFS_SOURCE="" +CONFIG_CC_OPTIMIZE_FOR_SIZE=y +CONFIG_SYSCTL=y +CONFIG_EMBEDDED=y +CONFIG_SYSCTL_SYSCALL=y +# CONFIG_KALLSYMS is not set +CONFIG_HOTPLUG=y +CONFIG_PRINTK=y +CONFIG_BUG=y +# CONFIG_ELF_CORE is not set +CONFIG_COMPAT_BRK=y +CONFIG_BASE_FULL=y +CONFIG_FUTEX=y +CONFIG_ANON_INODES=y +CONFIG_EPOLL=y +CONFIG_SIGNALFD=y +CONFIG_TIMERFD=y +CONFIG_EVENTFD=y +CONFIG_SHMEM=y +# CONFIG_VM_EVENT_COUNTERS is not set +CONFIG_SLAB=y +# CONFIG_SLUB is not set +# CONFIG_SLOB is not set +# CONFIG_PROFILING is not set +# CONFIG_MARKERS is not set +CONFIG_HAVE_OPROFILE=y +# CONFIG_HAVE_KPROBES is not set +# CONFIG_HAVE_KRETPROBES is not set +CONFIG_PROC_PAGE_MONITOR=y +CONFIG_SLABINFO=y +CONFIG_RT_MUTEXES=y +# CONFIG_TINY_SHMEM is not set +CONFIG_BASE_SMALL=0 +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +# CONFIG_MODULE_FORCE_UNLOAD is not set +# CONFIG_MODVERSIONS is not set +# CONFIG_MODULE_SRCVERSION_ALL is not set +# CONFIG_KMOD is not set +CONFIG_BLOCK=y +# CONFIG_LBD is not set +# CONFIG_BLK_DEV_IO_TRACE is not set +# CONFIG_LSF is not set +# CONFIG_BLK_DEV_BSG is not set + +# +# IO Schedulers +# +CONFIG_IOSCHED_NOOP=y +# CONFIG_IOSCHED_AS is not set +CONFIG_IOSCHED_DEADLINE=y +# CONFIG_IOSCHED_CFQ is not set +# CONFIG_DEFAULT_AS is not set +CONFIG_DEFAULT_DEADLINE=y +# CONFIG_DEFAULT_CFQ is not set +# CONFIG_DEFAULT_NOOP is not set +CONFIG_DEFAULT_IOSCHED="deadline" +CONFIG_CLASSIC_RCU=y + +# +# Bus options (PCI, PCMCIA, EISA, ISA, TC) +# +CONFIG_HW_HAS_PCI=y +CONFIG_PCI=y +CONFIG_PCI_DOMAINS=y +# CONFIG_ARCH_SUPPORTS_MSI is not set +CONFIG_PCI_LEGACY=y +CONFIG_MMU=y +# CONFIG_PCCARD is not set +# CONFIG_HOTPLUG_PCI is not set + +# +# Executable file formats +# +CONFIG_BINFMT_ELF=y +# CONFIG_BINFMT_MISC is not set +CONFIG_TRAD_SIGNALS=y + +# +# Power management options +# +CONFIG_ARCH_SUSPEND_POSSIBLE=y +# CONFIG_PM is not set + +# +# Networking +# +CONFIG_NET=y + +# +# Networking options +# +CONFIG_PACKET=y +CONFIG_PACKET_MMAP=y +CONFIG_UNIX=y +# CONFIG_NET_KEY is not set +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_ASK_IP_FIB_HASH=y +# CONFIG_IP_FIB_TRIE is not set +CONFIG_IP_FIB_HASH=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_ROUTE_MULTIPATH=y +CONFIG_IP_ROUTE_VERBOSE=y +# CONFIG_IP_PNP is not set +# CONFIG_NET_IPIP is not set +# CONFIG_NET_IPGRE is not set +# CONFIG_IP_MROUTE is not set +CONFIG_ARPD=y +CONFIG_SYN_COOKIES=y +# CONFIG_INET_AH is not set +# CONFIG_INET_ESP is not set +# CONFIG_INET_IPCOMP is not set +# CONFIG_INET_XFRM_TUNNEL is not set +# CONFIG_INET_TUNNEL is not set +# CONFIG_INET_XFRM_MODE_TRANSPORT is not set +# CONFIG_INET_XFRM_MODE_TUNNEL is not set +# CONFIG_INET_XFRM_MODE_BEET is not set +# CONFIG_INET_LRO is not set +CONFIG_INET_DIAG=m +CONFIG_INET_TCP_DIAG=m +CONFIG_TCP_CONG_ADVANCED=y +CONFIG_TCP_CONG_BIC=m +CONFIG_TCP_CONG_CUBIC=m +CONFIG_TCP_CONG_WESTWOOD=m +CONFIG_TCP_CONG_HTCP=m +CONFIG_TCP_CONG_HSTCP=m +CONFIG_TCP_CONG_HYBLA=m +CONFIG_TCP_CONG_VEGAS=y +CONFIG_TCP_CONG_SCALABLE=m +CONFIG_TCP_CONG_LP=m +CONFIG_TCP_CONG_VENO=m +CONFIG_TCP_CONG_YEAH=m +CONFIG_TCP_CONG_ILLINOIS=m +# CONFIG_DEFAULT_BIC is not set +# CONFIG_DEFAULT_CUBIC is not set +# CONFIG_DEFAULT_HTCP is not set +CONFIG_DEFAULT_VEGAS=y +# CONFIG_DEFAULT_WESTWOOD is not set +# CONFIG_DEFAULT_RENO is not set +CONFIG_DEFAULT_TCP_CONG="vegas" +# CONFIG_TCP_MD5SIG is not set +# CONFIG_IP_VS is not set +# CONFIG_IPV6 is not set +# CONFIG_NETWORK_SECMARK is not set +CONFIG_NETFILTER=y +# CONFIG_NETFILTER_DEBUG is not set +CONFIG_NETFILTER_ADVANCED=y +# CONFIG_BRIDGE_NETFILTER is not set + +# +# Core Netfilter Configuration +# +# CONFIG_NETFILTER_NETLINK_QUEUE is not set +# CONFIG_NETFILTER_NETLINK_LOG is not set +CONFIG_NF_CONNTRACK=y +CONFIG_NF_CT_ACCT=y +CONFIG_NF_CONNTRACK_MARK=y +# CONFIG_NF_CONNTRACK_EVENTS is not set +# CONFIG_NF_CT_PROTO_DCCP is not set +# CONFIG_NF_CT_PROTO_SCTP is not set +# CONFIG_NF_CT_PROTO_UDPLITE is not set +# CONFIG_NF_CONNTRACK_AMANDA is not set +CONFIG_NF_CONNTRACK_FTP=m +# CONFIG_NF_CONNTRACK_H323 is not set +CONFIG_NF_CONNTRACK_IRC=m +# CONFIG_NF_CONNTRACK_NETBIOS_NS is not set +# CONFIG_NF_CONNTRACK_PPTP is not set +# CONFIG_NF_CONNTRACK_SANE is not set +# CONFIG_NF_CONNTRACK_SIP is not set +CONFIG_NF_CONNTRACK_TFTP=m +# CONFIG_NF_CT_NETLINK is not set +CONFIG_NETFILTER_XTABLES=y +# CONFIG_NETFILTER_XT_TARGET_CLASSIFY is not set +# CONFIG_NETFILTER_XT_TARGET_CONNMARK is not set +# CONFIG_NETFILTER_XT_TARGET_DSCP is not set +# CONFIG_NETFILTER_XT_TARGET_MARK is not set +CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m +CONFIG_NETFILTER_XT_TARGET_NFLOG=m +# CONFIG_NETFILTER_XT_TARGET_NOTRACK is not set +# CONFIG_NETFILTER_XT_TARGET_RATEEST is not set +CONFIG_NETFILTER_XT_TARGET_TRACE=m +# CONFIG_NETFILTER_XT_TARGET_TCPMSS is not set +# CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP is not set +CONFIG_NETFILTER_XT_MATCH_COMMENT=m +# CONFIG_NETFILTER_XT_MATCH_CONNBYTES is not set +CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m +# CONFIG_NETFILTER_XT_MATCH_CONNMARK is not set +# CONFIG_NETFILTER_XT_MATCH_CONNTRACK is not set +CONFIG_NETFILTER_XT_MATCH_DCCP=m +# CONFIG_NETFILTER_XT_MATCH_DSCP is not set +# CONFIG_NETFILTER_XT_MATCH_ESP is not set +# CONFIG_NETFILTER_XT_MATCH_HELPER is not set +# CONFIG_NETFILTER_XT_MATCH_IPRANGE is not set +# CONFIG_NETFILTER_XT_MATCH_LENGTH is not set +CONFIG_NETFILTER_XT_MATCH_LIMIT=y +# CONFIG_NETFILTER_XT_MATCH_MAC is not set +# CONFIG_NETFILTER_XT_MATCH_MARK is not set +# CONFIG_NETFILTER_XT_MATCH_OWNER is not set +CONFIG_NETFILTER_XT_MATCH_MULTIPORT=y +# CONFIG_NETFILTER_XT_MATCH_PKTTYPE is not set +# CONFIG_NETFILTER_XT_MATCH_QUOTA is not set +# CONFIG_NETFILTER_XT_MATCH_RATEEST is not set +CONFIG_NETFILTER_XT_MATCH_REALM=m +CONFIG_NETFILTER_XT_MATCH_SCTP=m +CONFIG_NETFILTER_XT_MATCH_STATE=y +# CONFIG_NETFILTER_XT_MATCH_STATISTIC is not set +# CONFIG_NETFILTER_XT_MATCH_STRING is not set +# CONFIG_NETFILTER_XT_MATCH_TCPMSS is not set +# CONFIG_NETFILTER_XT_MATCH_TIME is not set +CONFIG_NETFILTER_XT_MATCH_U32=m +CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m + +# +# IP: Netfilter Configuration +# +CONFIG_NF_CONNTRACK_IPV4=y +CONFIG_NF_CONNTRACK_PROC_COMPAT=y +# CONFIG_IP_NF_QUEUE is not set +CONFIG_IP_NF_IPTABLES=y +# CONFIG_IP_NF_MATCH_RECENT is not set +# CONFIG_IP_NF_MATCH_ECN is not set +# CONFIG_IP_NF_MATCH_AH is not set +# CONFIG_IP_NF_MATCH_TTL is not set +CONFIG_IP_NF_MATCH_ADDRTYPE=m +CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_TARGET_REJECT=y +# CONFIG_IP_NF_TARGET_LOG is not set +# CONFIG_IP_NF_TARGET_ULOG is not set +CONFIG_NF_NAT=y +CONFIG_NF_NAT_NEEDED=y +CONFIG_IP_NF_TARGET_MASQUERADE=y +# CONFIG_IP_NF_TARGET_REDIRECT is not set +# CONFIG_IP_NF_TARGET_NETMAP is not set +# CONFIG_NF_NAT_SNMP_BASIC is not set +CONFIG_NF_NAT_FTP=m +CONFIG_NF_NAT_IRC=m +CONFIG_NF_NAT_TFTP=m +# CONFIG_NF_NAT_AMANDA is not set +# CONFIG_NF_NAT_PPTP is not set +# CONFIG_NF_NAT_H323 is not set +# CONFIG_NF_NAT_SIP is not set +CONFIG_IP_NF_MANGLE=y +# CONFIG_IP_NF_TARGET_ECN is not set +# CONFIG_IP_NF_TARGET_TTL is not set +# CONFIG_IP_NF_TARGET_CLUSTERIP is not set +CONFIG_IP_NF_RAW=m +# CONFIG_IP_NF_ARPTABLES is not set +# CONFIG_IP_DCCP is not set +# CONFIG_IP_SCTP is not set +# CONFIG_TIPC is not set +# CONFIG_ATM is not set +CONFIG_BRIDGE=y +CONFIG_VLAN_8021Q=y +# CONFIG_DECNET is not set +CONFIG_LLC=y +CONFIG_LLC2=m +# CONFIG_IPX is not set +# CONFIG_ATALK is not set +# CONFIG_X25 is not set +# CONFIG_LAPB is not set +# CONFIG_ECONET is not set +# CONFIG_WAN_ROUTER is not set +CONFIG_NET_SCHED=y + +# +# Queueing/Scheduling +# +CONFIG_NET_SCH_CBQ=m +# CONFIG_NET_SCH_HTB is not set +# CONFIG_NET_SCH_HFSC is not set +CONFIG_NET_SCH_PRIO=m +CONFIG_NET_SCH_RR=m +# CONFIG_NET_SCH_RED is not set +# CONFIG_NET_SCH_SFQ is not set +# CONFIG_NET_SCH_TEQL is not set +# CONFIG_NET_SCH_TBF is not set +# CONFIG_NET_SCH_GRED is not set +# CONFIG_NET_SCH_DSMARK is not set +CONFIG_NET_SCH_NETEM=m +# CONFIG_NET_SCH_INGRESS is not set + +# +# Classification +# +CONFIG_NET_CLS=y +CONFIG_NET_CLS_BASIC=m +CONFIG_NET_CLS_TCINDEX=m +CONFIG_NET_CLS_ROUTE4=m +CONFIG_NET_CLS_ROUTE=y +CONFIG_NET_CLS_FW=m +CONFIG_NET_CLS_U32=m +CONFIG_CLS_U32_PERF=y +CONFIG_CLS_U32_MARK=y +CONFIG_NET_CLS_RSVP=m +CONFIG_NET_CLS_RSVP6=m +# CONFIG_NET_CLS_FLOW is not set +CONFIG_NET_EMATCH=y +CONFIG_NET_EMATCH_STACK=32 +CONFIG_NET_EMATCH_CMP=m +CONFIG_NET_EMATCH_NBYTE=m +CONFIG_NET_EMATCH_U32=m +CONFIG_NET_EMATCH_META=m +CONFIG_NET_EMATCH_TEXT=m +CONFIG_NET_CLS_ACT=y +CONFIG_NET_ACT_POLICE=y +CONFIG_NET_ACT_GACT=m +CONFIG_GACT_PROB=y +CONFIG_NET_ACT_MIRRED=m +CONFIG_NET_ACT_IPT=m +# CONFIG_NET_ACT_NAT is not set +CONFIG_NET_ACT_PEDIT=m +# CONFIG_NET_ACT_SIMP is not set +CONFIG_NET_CLS_IND=y +CONFIG_NET_SCH_FIFO=y + +# +# Network testing +# +# CONFIG_NET_PKTGEN is not set +CONFIG_HAMRADIO=y + +# +# Packet Radio protocols +# +# CONFIG_AX25 is not set +# CONFIG_CAN is not set +# CONFIG_IRDA is not set +# CONFIG_BT is not set +# CONFIG_AF_RXRPC is not set +CONFIG_FIB_RULES=y + +# +# Wireless +# +# CONFIG_CFG80211 is not set +CONFIG_WIRELESS_EXT=y +# CONFIG_MAC80211 is not set +# CONFIG_IEEE80211 is not set +# CONFIG_RFKILL is not set +# CONFIG_NET_9P is not set + +# +# Device Drivers +# + +# +# Generic Driver Options +# +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_STANDALONE=y +CONFIG_PREVENT_FIRMWARE_BUILD=y +CONFIG_FW_LOADER=y +# CONFIG_SYS_HYPERVISOR is not set +# CONFIG_CONNECTOR is not set +CONFIG_MTD=y +# CONFIG_MTD_DEBUG is not set +# CONFIG_MTD_CONCAT is not set +CONFIG_MTD_PARTITIONS=y +# CONFIG_MTD_REDBOOT_PARTS is not set +# CONFIG_MTD_CMDLINE_PARTS is not set +# CONFIG_MTD_AR7_PARTS is not set + +# +# User Modules And Translation Layers +# +CONFIG_MTD_CHAR=y +CONFIG_MTD_BLKDEVS=y +CONFIG_MTD_BLOCK=y +# CONFIG_FTL is not set +# CONFIG_NFTL is not set +# CONFIG_INFTL is not set +# CONFIG_RFD_FTL is not set +# CONFIG_SSFDC is not set +# CONFIG_MTD_OOPS is not set + +# +# RAM/ROM/Flash chip drivers +# +# CONFIG_MTD_CFI is not set +# CONFIG_MTD_JEDECPROBE is not set +CONFIG_MTD_MAP_BANK_WIDTH_1=y +CONFIG_MTD_MAP_BANK_WIDTH_2=y +CONFIG_MTD_MAP_BANK_WIDTH_4=y +# CONFIG_MTD_MAP_BANK_WIDTH_8 is not set +# CONFIG_MTD_MAP_BANK_WIDTH_16 is not set +# CONFIG_MTD_MAP_BANK_WIDTH_32 is not set +CONFIG_MTD_CFI_I1=y +CONFIG_MTD_CFI_I2=y +# CONFIG_MTD_CFI_I4 is not set +# CONFIG_MTD_CFI_I8 is not set +# CONFIG_MTD_RAM is not set +# CONFIG_MTD_ROM is not set +# CONFIG_MTD_ABSENT is not set + +# +# Mapping drivers for chip access +# +# CONFIG_MTD_COMPLEX_MAPPINGS is not set +# CONFIG_MTD_INTEL_VR_NOR is not set +# CONFIG_MTD_PLATRAM is not set + +# +# Self-contained MTD device drivers +# +# CONFIG_MTD_PMC551 is not set +# CONFIG_MTD_SLRAM is not set +# CONFIG_MTD_PHRAM is not set +# CONFIG_MTD_MTDRAM is not set +CONFIG_MTD_BLOCK2MTD=y + +# +# Disk-On-Chip Device Drivers +# +# CONFIG_MTD_DOC2000 is not set +# CONFIG_MTD_DOC2001 is not set +# CONFIG_MTD_DOC2001PLUS is not set +CONFIG_MTD_NAND=y +CONFIG_MTD_NAND_VERIFY_WRITE=y +# CONFIG_MTD_NAND_ECC_SMC is not set +# CONFIG_MTD_NAND_MUSEUM_IDS is not set +CONFIG_MTD_NAND_IDS=y +# CONFIG_MTD_NAND_DISKONCHIP is not set +# CONFIG_MTD_NAND_CAFE is not set +# CONFIG_MTD_NAND_NANDSIM is not set +CONFIG_MTD_NAND_PLATFORM=y +# CONFIG_MTD_ONENAND is not set + +# +# UBI - Unsorted block images +# +# CONFIG_MTD_UBI is not set +# CONFIG_PARPORT is not set +CONFIG_BLK_DEV=y +# CONFIG_BLK_CPQ_DA is not set +# CONFIG_BLK_CPQ_CISS_DA is not set +# CONFIG_BLK_DEV_DAC960 is not set +# CONFIG_BLK_DEV_UMEM is not set +# CONFIG_BLK_DEV_COW_COMMON is not set +# CONFIG_BLK_DEV_LOOP is not set +# CONFIG_BLK_DEV_NBD is not set +# CONFIG_BLK_DEV_SX8 is not set +# CONFIG_BLK_DEV_RAM is not set +# CONFIG_CDROM_PKTCDVD is not set +# CONFIG_ATA_OVER_ETH is not set +CONFIG_MISC_DEVICES=y +# CONFIG_PHANTOM is not set +# CONFIG_EEPROM_93CX6 is not set +# CONFIG_SGI_IOC4 is not set +# CONFIG_TIFM_CORE is not set +# CONFIG_ENCLOSURE_SERVICES is not set +CONFIG_HAVE_IDE=y +# CONFIG_IDE is not set + +# +# SCSI device support +# +# CONFIG_RAID_ATTRS is not set +CONFIG_SCSI=y +CONFIG_SCSI_DMA=y +# CONFIG_SCSI_TGT is not set +# CONFIG_SCSI_NETLINK is not set +CONFIG_SCSI_PROC_FS=y + +# +# SCSI support type (disk, tape, CD-ROM) +# +# CONFIG_BLK_DEV_SD is not set +# CONFIG_CHR_DEV_ST is not set +# CONFIG_CHR_DEV_OSST is not set +# CONFIG_BLK_DEV_SR is not set +# CONFIG_CHR_DEV_SG is not set +# CONFIG_CHR_DEV_SCH is not set + +# +# Some SCSI devices (e.g. CD jukebox) support multiple LUNs +# +# CONFIG_SCSI_MULTI_LUN is not set +# CONFIG_SCSI_CONSTANTS is not set +# CONFIG_SCSI_LOGGING is not set +# CONFIG_SCSI_SCAN_ASYNC is not set +CONFIG_SCSI_WAIT_SCAN=m + +# +# SCSI Transports +# +# CONFIG_SCSI_SPI_ATTRS is not set +# CONFIG_SCSI_FC_ATTRS is not set +# CONFIG_SCSI_ISCSI_ATTRS is not set +# CONFIG_SCSI_SAS_LIBSAS is not set +# CONFIG_SCSI_SRP_ATTRS is not set +CONFIG_SCSI_LOWLEVEL=y +# CONFIG_ISCSI_TCP is not set +# CONFIG_BLK_DEV_3W_XXXX_RAID is not set +# CONFIG_SCSI_3W_9XXX is not set +# CONFIG_SCSI_ACARD is not set +# CONFIG_SCSI_AACRAID is not set +# CONFIG_SCSI_AIC7XXX is not set +# CONFIG_SCSI_AIC7XXX_OLD is not set +# CONFIG_SCSI_AIC79XX is not set +# CONFIG_SCSI_AIC94XX is not set +# CONFIG_SCSI_DPT_I2O is not set +# CONFIG_SCSI_ADVANSYS is not set +# CONFIG_SCSI_ARCMSR is not set +# CONFIG_MEGARAID_NEWGEN is not set +# CONFIG_MEGARAID_LEGACY is not set +# CONFIG_MEGARAID_SAS is not set +# CONFIG_SCSI_HPTIOP is not set +# CONFIG_SCSI_DMX3191D is not set +# CONFIG_SCSI_FUTURE_DOMAIN is not set +# CONFIG_SCSI_IPS is not set +# CONFIG_SCSI_INITIO is not set +# CONFIG_SCSI_INIA100 is not set +# CONFIG_SCSI_MVSAS is not set +# CONFIG_SCSI_STEX is not set +# CONFIG_SCSI_SYM53C8XX_2 is not set +# CONFIG_SCSI_IPR is not set +# CONFIG_SCSI_QLOGIC_1280 is not set +# CONFIG_SCSI_QLA_FC is not set +# CONFIG_SCSI_QLA_ISCSI is not set +# CONFIG_SCSI_LPFC is not set +# CONFIG_SCSI_DC395x is not set +# CONFIG_SCSI_DC390T is not set +# CONFIG_SCSI_NSP32 is not set +# CONFIG_SCSI_DEBUG is not set +# CONFIG_SCSI_SRP is not set +CONFIG_ATA=y +# CONFIG_ATA_NONSTANDARD is not set +# CONFIG_SATA_PMP is not set +# CONFIG_SATA_AHCI is not set +# CONFIG_SATA_SIL24 is not set +CONFIG_ATA_SFF=y +# CONFIG_SATA_SVW is not set +# CONFIG_ATA_PIIX is not set +# CONFIG_SATA_MV is not set +# CONFIG_SATA_NV is not set +# CONFIG_PDC_ADMA is not set +# CONFIG_SATA_QSTOR is not set +# CONFIG_SATA_PROMISE is not set +# CONFIG_SATA_SX4 is not set +# CONFIG_SATA_SIL is not set +# CONFIG_SATA_SIS is not set +# CONFIG_SATA_ULI is not set +# CONFIG_SATA_VIA is not set +# CONFIG_SATA_VITESSE is not set +# CONFIG_SATA_INIC162X is not set +# CONFIG_PATA_ALI is not set +# CONFIG_PATA_AMD is not set +# CONFIG_PATA_ARTOP is not set +# CONFIG_PATA_ATIIXP is not set +# CONFIG_PATA_CMD640_PCI is not set +# CONFIG_PATA_CMD64X is not set +# CONFIG_PATA_CS5520 is not set +# CONFIG_PATA_CS5530 is not set +# CONFIG_PATA_CYPRESS is not set +# CONFIG_PATA_EFAR is not set +# CONFIG_ATA_GENERIC is not set +# CONFIG_PATA_HPT366 is not set +# CONFIG_PATA_HPT37X is not set +# CONFIG_PATA_HPT3X2N is not set +# CONFIG_PATA_HPT3X3 is not set +# CONFIG_PATA_IT821X is not set +# CONFIG_PATA_IT8213 is not set +# CONFIG_PATA_JMICRON is not set +# CONFIG_PATA_TRIFLEX is not set +# CONFIG_PATA_MARVELL is not set +# CONFIG_PATA_MPIIX is not set +# CONFIG_PATA_OLDPIIX is not set +# CONFIG_PATA_NETCELL is not set +# CONFIG_PATA_NINJA32 is not set +# CONFIG_PATA_NS87410 is not set +# CONFIG_PATA_NS87415 is not set +# CONFIG_PATA_OPTI is not set +# CONFIG_PATA_OPTIDMA is not set +# CONFIG_PATA_PDC_OLD is not set +# CONFIG_PATA_RADISYS is not set +CONFIG_PATA_RB532=y +# CONFIG_PATA_RZ1000 is not set +# CONFIG_PATA_SC1200 is not set +# CONFIG_PATA_SERVERWORKS is not set +# CONFIG_PATA_PDC2027X is not set +# CONFIG_PATA_SIL680 is not set +# CONFIG_PATA_SIS is not set +# CONFIG_PATA_VIA is not set +# CONFIG_PATA_WINBOND is not set +# CONFIG_PATA_PLATFORM is not set +# CONFIG_MD is not set +# CONFIG_FUSION is not set + +# +# IEEE 1394 (FireWire) support +# +# CONFIG_FIREWIRE is not set +# CONFIG_IEEE1394 is not set +# CONFIG_I2O is not set +CONFIG_NETDEVICES=y +# CONFIG_NETDEVICES_MULTIQUEUE is not set +CONFIG_IFB=m +# CONFIG_DUMMY is not set +# CONFIG_BONDING is not set +# CONFIG_MACVLAN is not set +# CONFIG_EQUALIZER is not set +# CONFIG_TUN is not set +# CONFIG_VETH is not set +# CONFIG_ARCNET is not set +# CONFIG_PHYLIB is not set +CONFIG_NET_ETHERNET=y +CONFIG_MII=y +# CONFIG_AX88796 is not set +CONFIG_KORINA=y +# CONFIG_HAPPYMEAL is not set +# CONFIG_SUNGEM is not set +# CONFIG_CASSINI is not set +# CONFIG_NET_VENDOR_3COM is not set +# CONFIG_DM9000 is not set +# CONFIG_NET_TULIP is not set +# CONFIG_HP100 is not set +# CONFIG_IBM_NEW_EMAC_ZMII is not set +# CONFIG_IBM_NEW_EMAC_RGMII is not set +# CONFIG_IBM_NEW_EMAC_TAH is not set +# CONFIG_IBM_NEW_EMAC_EMAC4 is not set +CONFIG_NET_PCI=y +# CONFIG_PCNET32 is not set +# CONFIG_AMD8111_ETH is not set +# CONFIG_ADAPTEC_STARFIRE is not set +# CONFIG_B44 is not set +# CONFIG_FORCEDETH is not set +# CONFIG_TC35815 is not set +# CONFIG_EEPRO100 is not set +# CONFIG_E100 is not set +# CONFIG_FEALNX is not set +# CONFIG_NATSEMI is not set +# CONFIG_NE2K_PCI is not set +# CONFIG_8139CP is not set +# CONFIG_8139TOO is not set +# CONFIG_R6040 is not set +# CONFIG_SIS900 is not set +# CONFIG_EPIC100 is not set +# CONFIG_SUNDANCE is not set +# CONFIG_TLAN is not set +CONFIG_VIA_RHINE=y +# CONFIG_VIA_RHINE_MMIO is not set +CONFIG_VIA_RHINE_NAPI=y +# CONFIG_SC92031 is not set +# CONFIG_NETDEV_1000 is not set +# CONFIG_NETDEV_10000 is not set +# CONFIG_TR is not set + +# +# Wireless LAN +# +# CONFIG_WLAN_PRE80211 is not set +CONFIG_WLAN_80211=y +# CONFIG_IPW2100 is not set +# CONFIG_IPW2200 is not set +# CONFIG_LIBERTAS is not set +# CONFIG_HERMES is not set +CONFIG_ATMEL=m +# CONFIG_PCI_ATMEL is not set +# CONFIG_PRISM54 is not set +# CONFIG_IWLWIFI_LEDS is not set +# CONFIG_HOSTAP is not set +# CONFIG_WAN is not set +# CONFIG_FDDI is not set +# CONFIG_HIPPI is not set +CONFIG_PPP=m +CONFIG_PPP_MULTILINK=y +CONFIG_PPP_FILTER=y +CONFIG_PPP_ASYNC=m +# CONFIG_PPP_SYNC_TTY is not set +CONFIG_PPP_DEFLATE=m +CONFIG_PPP_BSDCOMP=m +# CONFIG_PPP_MPPE is not set +CONFIG_PPPOE=m +CONFIG_PPPOL2TP=m +# CONFIG_SLIP is not set +CONFIG_SLHC=m +# CONFIG_NET_FC is not set +# CONFIG_NETCONSOLE is not set +# CONFIG_NETPOLL is not set +# CONFIG_NET_POLL_CONTROLLER is not set +# CONFIG_ISDN is not set +# CONFIG_PHONE is not set + +# +# Input device support +# +CONFIG_INPUT=y +# CONFIG_INPUT_FF_MEMLESS is not set +# CONFIG_INPUT_POLLDEV is not set + +# +# Userland interfaces +# +# CONFIG_INPUT_MOUSEDEV is not set +# CONFIG_INPUT_JOYDEV is not set +# CONFIG_INPUT_EVDEV is not set +# CONFIG_INPUT_EVBUG is not set + +# +# Input Device Drivers +# +CONFIG_INPUT_KEYBOARD=y +# CONFIG_KEYBOARD_ATKBD is not set +# CONFIG_KEYBOARD_SUNKBD is not set +# CONFIG_KEYBOARD_LKKBD is not set +# CONFIG_KEYBOARD_XTKBD is not set +# CONFIG_KEYBOARD_NEWTON is not set +# CONFIG_KEYBOARD_STOWAWAY is not set +# CONFIG_KEYBOARD_GPIO is not set +# CONFIG_INPUT_MOUSE is not set +# CONFIG_INPUT_JOYSTICK is not set +# CONFIG_INPUT_TABLET is not set +# CONFIG_INPUT_TOUCHSCREEN is not set +# CONFIG_INPUT_MISC is not set + +# +# Hardware I/O ports +# +# CONFIG_SERIO is not set +# CONFIG_GAMEPORT is not set + +# +# Character devices +# +# CONFIG_VT is not set +# CONFIG_SERIAL_NONSTANDARD is not set +# CONFIG_NOZOMI is not set + +# +# Serial drivers +# +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +# CONFIG_SERIAL_8250_PCI is not set +CONFIG_SERIAL_8250_NR_UARTS=2 +CONFIG_SERIAL_8250_RUNTIME_UARTS=2 +# CONFIG_SERIAL_8250_EXTENDED is not set + +# +# Non-8250 serial port support +# +CONFIG_SERIAL_CORE=y +CONFIG_SERIAL_CORE_CONSOLE=y +# CONFIG_SERIAL_JSM is not set +CONFIG_UNIX98_PTYS=y +# CONFIG_LEGACY_PTYS is not set +# CONFIG_IPMI_HANDLER is not set +CONFIG_HW_RANDOM=y +# CONFIG_RTC is not set +# CONFIG_R3964 is not set +# CONFIG_APPLICOM is not set +# CONFIG_RAW_DRIVER is not set +# CONFIG_TCG_TPM is not set +CONFIG_DEVPORT=y +# CONFIG_I2C is not set + +# +# SPI support +# +# CONFIG_SPI is not set +# CONFIG_SPI_MASTER is not set +# CONFIG_W1 is not set +# CONFIG_POWER_SUPPLY is not set +# CONFIG_HWMON is not set +# CONFIG_THERMAL is not set +CONFIG_WATCHDOG=y +# CONFIG_WATCHDOG_NOWAYOUT is not set + +# +# Watchdog Device Drivers +# +# CONFIG_SOFT_WATCHDOG is not set + +# +# PCI-based Watchdog Cards +# +# CONFIG_PCIPCWATCHDOG is not set +# CONFIG_WDTPCI is not set + +# +# Sonics Silicon Backplane +# +CONFIG_SSB_POSSIBLE=y +# CONFIG_SSB is not set + +# +# Multifunction device drivers +# +# CONFIG_MFD_SM501 is not set +# CONFIG_HTC_PASIC3 is not set + +# +# Multimedia devices +# +CONFIG_VIDEO_DEV=m +CONFIG_VIDEO_V4L2_COMMON=m +CONFIG_VIDEO_ALLOW_V4L1=y +CONFIG_VIDEO_V4L1_COMPAT=y +CONFIG_VIDEO_V4L2=m +CONFIG_VIDEO_V4L1=m +CONFIG_VIDEO_CAPTURE_DRIVERS=y +# CONFIG_VIDEO_ADV_DEBUG is not set +# CONFIG_VIDEO_HELPER_CHIPS_AUTO is not set + +# +# Encoders/decoders and other helper chips +# + +# +# Audio decoders +# + +# +# Video decoders +# + +# +# Video and audio decoders +# + +# +# MPEG video encoders +# +# CONFIG_VIDEO_CX2341X is not set + +# +# Video encoders +# + +# +# Video improvement chips +# +# CONFIG_VIDEO_VIVI is not set +# CONFIG_VIDEO_CPIA is not set +# CONFIG_VIDEO_STRADIS is not set +# CONFIG_SOC_CAMERA is not set +# CONFIG_RADIO_ADAPTERS is not set +# CONFIG_DVB_CORE is not set +# CONFIG_DAB is not set + +# +# Graphics support +# +# CONFIG_DRM is not set +# CONFIG_VGASTATE is not set +# CONFIG_VIDEO_OUTPUT_CONTROL is not set +# CONFIG_FB is not set +# CONFIG_BACKLIGHT_LCD_SUPPORT is not set + +# +# Display device support +# +# CONFIG_DISPLAY_SUPPORT is not set + +# +# Sound +# +# CONFIG_SOUND is not set +CONFIG_HID_SUPPORT=y +# CONFIG_HID is not set +CONFIG_USB_SUPPORT=y +CONFIG_USB_ARCH_HAS_HCD=y +CONFIG_USB_ARCH_HAS_OHCI=y +CONFIG_USB_ARCH_HAS_EHCI=y +# CONFIG_USB is not set +# CONFIG_USB_OTG_WHITELIST is not set +# CONFIG_USB_OTG_BLACKLIST_HUB is not set + +# +# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' +# +# CONFIG_USB_GADGET is not set +# CONFIG_MMC is not set +# CONFIG_MEMSTICK is not set +CONFIG_NEW_LEDS=y +CONFIG_LEDS_CLASS=y + +# +# LED drivers +# +# CONFIG_LEDS_GPIO is not set + +# +# LED Triggers +# +CONFIG_LEDS_TRIGGERS=y +CONFIG_LEDS_TRIGGER_TIMER=y +CONFIG_LEDS_TRIGGER_HEARTBEAT=y +# CONFIG_LEDS_TRIGGER_DEFAULT_ON is not set +# CONFIG_INFINIBAND is not set +CONFIG_RTC_LIB=y +# CONFIG_RTC_CLASS is not set +# CONFIG_UIO is not set + +# +# File systems +# +CONFIG_EXT2_FS=y +# CONFIG_EXT2_FS_XATTR is not set +# CONFIG_EXT2_FS_XIP is not set +# CONFIG_EXT3_FS is not set +# CONFIG_EXT4DEV_FS is not set +# CONFIG_REISERFS_FS is not set +# CONFIG_JFS_FS is not set +# CONFIG_FS_POSIX_ACL is not set +# CONFIG_XFS_FS is not set +# CONFIG_OCFS2_FS is not set +# CONFIG_DNOTIFY is not set +# CONFIG_INOTIFY is not set +# CONFIG_QUOTA is not set +# CONFIG_AUTOFS_FS is not set +# CONFIG_AUTOFS4_FS is not set +# CONFIG_FUSE_FS is not set + +# +# CD-ROM/DVD Filesystems +# +# CONFIG_ISO9660_FS is not set +# CONFIG_UDF_FS is not set + +# +# DOS/FAT/NT Filesystems +# +# CONFIG_MSDOS_FS is not set +# CONFIG_VFAT_FS is not set +# CONFIG_NTFS_FS is not set + +# +# Pseudo filesystems +# +CONFIG_PROC_FS=y +CONFIG_PROC_KCORE=y +CONFIG_PROC_SYSCTL=y +CONFIG_SYSFS=y +CONFIG_TMPFS=y +# CONFIG_TMPFS_POSIX_ACL is not set +# CONFIG_HUGETLB_PAGE is not set +CONFIG_CONFIGFS_FS=y + +# +# Miscellaneous filesystems +# +# CONFIG_ADFS_FS is not set +# CONFIG_AFFS_FS is not set +# CONFIG_HFS_FS is not set +# CONFIG_HFSPLUS_FS is not set +# CONFIG_BEFS_FS is not set +# CONFIG_BFS_FS is not set +# CONFIG_EFS_FS is not set +CONFIG_JFFS2_FS=y +CONFIG_JFFS2_FS_DEBUG=0 +CONFIG_JFFS2_FS_WRITEBUFFER=y +# CONFIG_JFFS2_FS_WBUF_VERIFY is not set +CONFIG_JFFS2_SUMMARY=y +# CONFIG_JFFS2_FS_XATTR is not set +CONFIG_JFFS2_COMPRESSION_OPTIONS=y +CONFIG_JFFS2_ZLIB=y +# CONFIG_JFFS2_LZO is not set +CONFIG_JFFS2_RTIME=y +# CONFIG_JFFS2_RUBIN is not set +# CONFIG_JFFS2_CMODE_NONE is not set +CONFIG_JFFS2_CMODE_PRIORITY=y +# CONFIG_JFFS2_CMODE_SIZE is not set +# CONFIG_JFFS2_CMODE_FAVOURLZO is not set +# CONFIG_CRAMFS is not set +# CONFIG_VXFS_FS is not set +# CONFIG_MINIX_FS is not set +# CONFIG_HPFS_FS is not set +# CONFIG_QNX4FS_FS is not set +# CONFIG_ROMFS_FS is not set +# CONFIG_SYSV_FS is not set +# CONFIG_UFS_FS is not set +CONFIG_NETWORK_FILESYSTEMS=y +# CONFIG_NFS_FS is not set +# CONFIG_NFSD is not set +# CONFIG_SMB_FS is not set +# CONFIG_CIFS is not set +# CONFIG_NCP_FS is not set +# CONFIG_CODA_FS is not set +# CONFIG_AFS_FS is not set + +# +# Partition Types +# +CONFIG_PARTITION_ADVANCED=y +# CONFIG_ACORN_PARTITION is not set +# CONFIG_OSF_PARTITION is not set +# CONFIG_AMIGA_PARTITION is not set +# CONFIG_ATARI_PARTITION is not set +CONFIG_MAC_PARTITION=y +CONFIG_MSDOS_PARTITION=y +CONFIG_BSD_DISKLABEL=y +# CONFIG_MINIX_SUBPARTITION is not set +# CONFIG_SOLARIS_X86_PARTITION is not set +# CONFIG_UNIXWARE_DISKLABEL is not set +# CONFIG_LDM_PARTITION is not set +# CONFIG_SGI_PARTITION is not set +# CONFIG_ULTRIX_PARTITION is not set +# CONFIG_SUN_PARTITION is not set +# CONFIG_KARMA_PARTITION is not set +# CONFIG_EFI_PARTITION is not set +# CONFIG_SYSV68_PARTITION is not set +# CONFIG_NLS is not set +# CONFIG_DLM is not set + +# +# Kernel hacking +# +CONFIG_TRACE_IRQFLAGS_SUPPORT=y +# CONFIG_PRINTK_TIME is not set +CONFIG_ENABLE_WARN_DEPRECATED=y +# CONFIG_ENABLE_MUST_CHECK is not set +CONFIG_FRAME_WARN=1024 +# CONFIG_MAGIC_SYSRQ is not set +# CONFIG_UNUSED_SYMBOLS is not set +# CONFIG_DEBUG_FS is not set +# CONFIG_HEADERS_CHECK is not set +# CONFIG_DEBUG_KERNEL is not set +# CONFIG_SAMPLES is not set +CONFIG_CMDLINE="" + +# +# Security options +# +# CONFIG_KEYS is not set +# CONFIG_SECURITY is not set +# CONFIG_SECURITY_FILE_CAPABILITIES is not set +CONFIG_CRYPTO=y + +# +# Crypto core or helper +# +CONFIG_CRYPTO_ALGAPI=m +CONFIG_CRYPTO_AEAD=m +CONFIG_CRYPTO_BLKCIPHER=m +# CONFIG_CRYPTO_MANAGER is not set +# CONFIG_CRYPTO_GF128MUL is not set +# CONFIG_CRYPTO_NULL is not set +# CONFIG_CRYPTO_CRYPTD is not set +# CONFIG_CRYPTO_AUTHENC is not set +CONFIG_CRYPTO_TEST=m + +# +# Authenticated Encryption with Associated Data +# +# CONFIG_CRYPTO_CCM is not set +# CONFIG_CRYPTO_GCM is not set +# CONFIG_CRYPTO_SEQIV is not set + +# +# Block modes +# +# CONFIG_CRYPTO_CBC is not set +# CONFIG_CRYPTO_CTR is not set +# CONFIG_CRYPTO_CTS is not set +# CONFIG_CRYPTO_ECB is not set +# CONFIG_CRYPTO_LRW is not set +# CONFIG_CRYPTO_PCBC is not set +# CONFIG_CRYPTO_XTS is not set + +# +# Hash modes +# +# CONFIG_CRYPTO_HMAC is not set +# CONFIG_CRYPTO_XCBC is not set + +# +# Digest +# +# CONFIG_CRYPTO_CRC32C is not set +# CONFIG_CRYPTO_MD4 is not set +# CONFIG_CRYPTO_MD5 is not set +# CONFIG_CRYPTO_MICHAEL_MIC is not set +# CONFIG_CRYPTO_SHA1 is not set +# CONFIG_CRYPTO_SHA256 is not set +# CONFIG_CRYPTO_SHA512 is not set +# CONFIG_CRYPTO_TGR192 is not set +# CONFIG_CRYPTO_WP512 is not set + +# +# Ciphers +# +# CONFIG_CRYPTO_AES is not set +# CONFIG_CRYPTO_ANUBIS is not set +# CONFIG_CRYPTO_ARC4 is not set +# CONFIG_CRYPTO_BLOWFISH is not set +# CONFIG_CRYPTO_CAMELLIA is not set +# CONFIG_CRYPTO_CAST5 is not set +# CONFIG_CRYPTO_CAST6 is not set +# CONFIG_CRYPTO_DES is not set +# CONFIG_CRYPTO_FCRYPT is not set +# CONFIG_CRYPTO_KHAZAD is not set +# CONFIG_CRYPTO_SALSA20 is not set +# CONFIG_CRYPTO_SEED is not set +# CONFIG_CRYPTO_SERPENT is not set +# CONFIG_CRYPTO_TEA is not set +# CONFIG_CRYPTO_TWOFISH is not set + +# +# Compression +# +# CONFIG_CRYPTO_DEFLATE is not set +# CONFIG_CRYPTO_LZO is not set +# CONFIG_CRYPTO_HW is not set + +# +# Library routines +# +CONFIG_BITREVERSE=y +# CONFIG_GENERIC_FIND_FIRST_BIT is not set +CONFIG_CRC_CCITT=m +CONFIG_CRC16=m +# CONFIG_CRC_ITU_T is not set +CONFIG_CRC32=y +# CONFIG_CRC7 is not set +CONFIG_LIBCRC32C=m +CONFIG_ZLIB_INFLATE=y +CONFIG_ZLIB_DEFLATE=y +CONFIG_TEXTSEARCH=y +CONFIG_TEXTSEARCH_KMP=m +CONFIG_TEXTSEARCH_BM=m +CONFIG_TEXTSEARCH_FSM=m +CONFIG_PLIST=y +CONFIG_HAS_IOMEM=y +CONFIG_HAS_IOPORT=y +CONFIG_HAS_DMA=y diff --git a/arch/mips/kernel/linux32.c b/arch/mips/kernel/linux32.c index 65af3cc90abb..c266211ed653 100644 --- a/arch/mips/kernel/linux32.c +++ b/arch/mips/kernel/linux32.c @@ -129,23 +129,6 @@ out: return error; } - -asmlinkage int sys_truncate64(const char __user *path, unsigned int high, - unsigned int low) -{ - if ((int)high < 0) - return -EINVAL; - return sys_truncate(path, ((long) high << 32) | low); -} - -asmlinkage int sys_ftruncate64(unsigned int fd, unsigned int high, - unsigned int low) -{ - if ((int)high < 0) - return -EINVAL; - return sys_ftruncate(fd, ((long) high << 32) | low); -} - /* * sys_execve() executes a new program. */ diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S index c058c0b61a2a..fc4fd4d705e2 100644 --- a/arch/mips/kernel/scall32-o32.S +++ b/arch/mips/kernel/scall32-o32.S @@ -354,7 +354,7 @@ einval: li v0, -EINVAL sys sys_mkdir 2 sys sys_rmdir 1 /* 4040 */ sys sys_dup 1 - sys sys_pipe 0 + sys sysm_pipe 0 sys sys_times 1 sys sys_ni_syscall 0 sys sys_brk 1 /* 4045 */ diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S index dc597b600c68..2b73fd1e4528 100644 --- a/arch/mips/kernel/scall64-64.S +++ b/arch/mips/kernel/scall64-64.S @@ -219,7 +219,7 @@ sys_call_table: PTR sys_readv PTR sys_writev PTR sys_access /* 5020 */ - PTR sys_pipe + PTR sysm_pipe PTR sys_select PTR sys_sched_yield PTR sys_mremap diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S index 12940eca7893..2654e75d2fef 100644 --- a/arch/mips/kernel/scall64-n32.S +++ b/arch/mips/kernel/scall64-n32.S @@ -141,7 +141,7 @@ EXPORT(sysn32_call_table) PTR compat_sys_readv PTR compat_sys_writev PTR sys_access /* 6020 */ - PTR sys_pipe + PTR sysm_pipe PTR compat_sys_select PTR sys_sched_yield PTR sys_mremap diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index 9a275efb4f04..76167bea5a70 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -247,7 +247,7 @@ sys_call_table: PTR sys_mkdir PTR sys_rmdir /* 4040 */ PTR sys_dup - PTR sys_pipe + PTR sysm_pipe PTR compat_sys_times PTR sys_ni_syscall PTR sys_brk /* 4045 */ diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c index af1bdc897488..3523c8d12eda 100644 --- a/arch/mips/kernel/syscall.c +++ b/arch/mips/kernel/syscall.c @@ -40,7 +40,14 @@ #include <asm/sysmips.h> #include <asm/uaccess.h> -asmlinkage int sys_pipe(nabi_no_regargs volatile struct pt_regs regs) +/* + * For historic reasons the pipe(2) syscall on MIPS has an unusual calling + * convention. It returns results in registers $v0 / $v1 which means there + * is no need for it to do verify the validity of a userspace pointer + * argument. Historically that used to be expensive in Linux. These days + * the performance advantage is negligible. + */ +asmlinkage int sysm_pipe(nabi_no_regargs volatile struct pt_regs regs) { int fd[2]; int error, res; diff --git a/arch/mips/math-emu/kernel_linkage.c b/arch/mips/math-emu/kernel_linkage.c index ed49ef01ac53..52e6c58c8de1 100644 --- a/arch/mips/math-emu/kernel_linkage.c +++ b/arch/mips/math-emu/kernel_linkage.c @@ -24,6 +24,7 @@ #include <asm/signal.h> #include <asm/uaccess.h> +#include <asm/fpu.h> #include <asm/fpu_emulator.h> #define SIGNALLING_NAN 0x7ff800007ff80000LL diff --git a/arch/mips/pci/Makefile b/arch/mips/pci/Makefile index 57e34cafa497..15e01aec37fd 100644 --- a/arch/mips/pci/Makefile +++ b/arch/mips/pci/Makefile @@ -49,3 +49,4 @@ obj-$(CONFIG_TOSHIBA_RBTX4938) += fixup-rbtx4938.o obj-$(CONFIG_VICTOR_MPC30X) += fixup-mpc30x.o obj-$(CONFIG_ZAO_CAPCELLA) += fixup-capcella.o obj-$(CONFIG_WR_PPMC) += fixup-wrppmc.o +obj-$(CONFIG_MIKROTIK_RB532) += pci-rc32434.o ops-rc32434.o fixup-rc32434.o diff --git a/arch/mips/pci/fixup-rc32434.c b/arch/mips/pci/fixup-rc32434.c new file mode 100644 index 000000000000..75b90dcb7a09 --- /dev/null +++ b/arch/mips/pci/fixup-rc32434.c @@ -0,0 +1,69 @@ +/* + * Copyright 2001 MontaVista Software Inc. + * Author: MontaVista Software, Inc. + * stevel@mvista.com or source@mvista.com + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN + * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/types.h> +#include <linux/pci.h> +#include <linux/kernel.h> +#include <linux/init.h> + +#include <asm/mach-rc32434/rc32434.h> + +static int __devinitdata irq_map[2][12] = { + {0, 0, 2, 3, 2, 3, 0, 0, 0, 0, 0, 1}, + {0, 0, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3} +}; + +int __devinit pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +{ + int irq = 0; + + if (dev->bus->number < 2 && PCI_SLOT(dev->devfn) < 12) + irq = irq_map[dev->bus->number][PCI_SLOT(dev->devfn)]; + + return irq + GROUP4_IRQ_BASE + 4; +} + +static void rc32434_pci_early_fixup(struct pci_dev *dev) +{ + if (PCI_SLOT(dev->devfn) == 6 && dev->bus->number == 0) { + /* disable prefetched memory range */ + pci_write_config_word(dev, PCI_PREF_MEMORY_LIMIT, 0); + pci_write_config_word(dev, PCI_PREF_MEMORY_BASE, 0x10); + + pci_write_config_byte(dev, PCI_CACHE_LINE_SIZE, 4); + } +} + +/* + * The fixup applies to both the IDT and VIA devices present on the board + */ +DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, rc32434_pci_early_fixup); + +/* Do platform specific device initialization at pci_enable_device() time */ +int pcibios_plat_dev_init(struct pci_dev *dev) +{ + return 0; +} diff --git a/arch/mips/pci/ops-rc32434.c b/arch/mips/pci/ops-rc32434.c new file mode 100644 index 000000000000..d1f8fa210ca1 --- /dev/null +++ b/arch/mips/pci/ops-rc32434.c @@ -0,0 +1,207 @@ +/* + * BRIEF MODULE DESCRIPTION + * pci_ops for IDT EB434 board + * + * Copyright 2004 IDT Inc. (rischelp@idt.com) + * Copyright 2006 Felix Fietkau <nbd@openwrt.org> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN + * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#include <linux/delay.h> +#include <linux/init.h> +#include <linux/io.h> +#include <linux/pci.h> +#include <linux/types.h> + +#include <asm/cpu.h> +#include <asm/mach-rc32434/rc32434.h> +#include <asm/mach-rc32434/pci.h> + +#define PCI_ACCESS_READ 0 +#define PCI_ACCESS_WRITE 1 + + +#define PCI_CFG_SET(bus, slot, func, off) \ + (rc32434_pci->pcicfga = (0x80000000 | \ + ((bus) << 16) | ((slot)<<11) | \ + ((func)<<8) | (off))) + +static inline int config_access(unsigned char access_type, + struct pci_bus *bus, unsigned int devfn, + unsigned char where, u32 *data) +{ + unsigned int slot = PCI_SLOT(devfn); + u8 func = PCI_FUNC(devfn); + + /* Setup address */ + PCI_CFG_SET(bus->number, slot, func, where); + rc32434_sync(); + + if (access_type == PCI_ACCESS_WRITE) + rc32434_pci->pcicfgd = *data; + else + *data = rc32434_pci->pcicfgd; + + rc32434_sync(); + + return 0; +} + + +/* + * We can't address 8 and 16 bit words directly. Instead we have to + * read/write a 32bit word and mask/modify the data we actually want. + */ +static int read_config_byte(struct pci_bus *bus, unsigned int devfn, + int where, u8 *val) +{ + u32 data; + int ret; + + ret = config_access(PCI_ACCESS_READ, bus, devfn, where, &data); + *val = (data >> ((where & 3) << 3)) & 0xff; + return ret; +} + +static int read_config_word(struct pci_bus *bus, unsigned int devfn, + int where, u16 *val) +{ + u32 data; + int ret; + + ret = config_access(PCI_ACCESS_READ, bus, devfn, where, &data); + *val = (data >> ((where & 3) << 3)) & 0xffff; + return ret; +} + +static int read_config_dword(struct pci_bus *bus, unsigned int devfn, + int where, u32 *val) +{ + int ret; + int delay = 1; + + /* + * Don't scan too far, else there will be errors with plugged in + * daughterboard (rb564). + */ + if (bus->number == 0 && PCI_SLOT(devfn) > 21) + return 0; + +retry: + ret = config_access(PCI_ACCESS_READ, bus, devfn, where, val); + + /* + * Certain devices react delayed at device scan time, this + * gives them time to settle + */ + if (where == PCI_VENDOR_ID) { + if (ret == 0xffffffff || ret == 0x00000000 || + ret == 0x0000ffff || ret == 0xffff0000) { + if (delay > 4) + return 0; + delay *= 2; + msleep(delay); + goto retry; + } + } + + return ret; +} + +static int +write_config_byte(struct pci_bus *bus, unsigned int devfn, int where, + u8 val) +{ + u32 data = 0; + + if (config_access(PCI_ACCESS_READ, bus, devfn, where, &data)) + return -1; + + data = (data & ~(0xff << ((where & 3) << 3))) | + (val << ((where & 3) << 3)); + + if (config_access(PCI_ACCESS_WRITE, bus, devfn, where, &data)) + return -1; + + return PCIBIOS_SUCCESSFUL; +} + + +static int +write_config_word(struct pci_bus *bus, unsigned int devfn, int where, + u16 val) +{ + u32 data = 0; + + if (config_access(PCI_ACCESS_READ, bus, devfn, where, &data)) + return -1; + + data = (data & ~(0xffff << ((where & 3) << 3))) | + (val << ((where & 3) << 3)); + + if (config_access(PCI_ACCESS_WRITE, bus, devfn, where, &data)) + return -1; + + + return PCIBIOS_SUCCESSFUL; +} + + +static int +write_config_dword(struct pci_bus *bus, unsigned int devfn, int where, + u32 val) +{ + if (config_access(PCI_ACCESS_WRITE, bus, devfn, where, &val)) + return -1; + + return PCIBIOS_SUCCESSFUL; +} + +static int pci_config_read(struct pci_bus *bus, unsigned int devfn, + int where, int size, u32 *val) +{ + switch (size) { + case 1: + return read_config_byte(bus, devfn, where, (u8 *) val); + case 2: + return read_config_word(bus, devfn, where, (u16 *) val); + default: + return read_config_dword(bus, devfn, where, val); + } +} + +static int pci_config_write(struct pci_bus *bus, unsigned int devfn, + int where, int size, u32 val) +{ + switch (size) { + case 1: + return write_config_byte(bus, devfn, where, (u8) val); + case 2: + return write_config_word(bus, devfn, where, (u16) val); + default: + return write_config_dword(bus, devfn, where, val); + } +} + +struct pci_ops rc32434_pci_ops = { + .read = pci_config_read, + .write = pci_config_write, +}; diff --git a/arch/mips/pci/pci-rc32434.c b/arch/mips/pci/pci-rc32434.c new file mode 100644 index 000000000000..1c2821e2f494 --- /dev/null +++ b/arch/mips/pci/pci-rc32434.c @@ -0,0 +1,221 @@ +/* + * BRIEF MODULE DESCRIPTION + * PCI initialization for IDT EB434 board + * + * Copyright 2004 IDT Inc. (rischelp@idt.com) + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN + * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/types.h> +#include <linux/pci.h> +#include <linux/kernel.h> +#include <linux/init.h> + +#include <asm/mach-rc32434/rc32434.h> +#include <asm/mach-rc32434/pci.h> + +#define PCI_ACCESS_READ 0 +#define PCI_ACCESS_WRITE 1 + +/* define an unsigned array for the PCI registers */ +static unsigned int korina_cnfg_regs[25] = { + KORINA_CNFG1, KORINA_CNFG2, KORINA_CNFG3, KORINA_CNFG4, + KORINA_CNFG5, KORINA_CNFG6, KORINA_CNFG7, KORINA_CNFG8, + KORINA_CNFG9, KORINA_CNFG10, KORINA_CNFG11, KORINA_CNFG12, + KORINA_CNFG13, KORINA_CNFG14, KORINA_CNFG15, KORINA_CNFG16, + KORINA_CNFG17, KORINA_CNFG18, KORINA_CNFG19, KORINA_CNFG20, + KORINA_CNFG21, KORINA_CNFG22, KORINA_CNFG23, KORINA_CNFG24 +}; +static struct resource rc32434_res_pci_mem1; +static struct resource rc32434_res_pci_mem2; + +static struct resource rc32434_res_pci_mem1 = { + .name = "PCI MEM1", + .start = 0x50000000, + .end = 0x5FFFFFFF, + .flags = IORESOURCE_MEM, + .parent = &rc32434_res_pci_mem1, + .sibling = NULL, + .child = &rc32434_res_pci_mem2 +}; + +static struct resource rc32434_res_pci_mem2 = { + .name = "PCI Mem2", + .start = 0x60000000, + .end = 0x6FFFFFFF, + .flags = IORESOURCE_MEM, + .parent = &rc32434_res_pci_mem1, + .sibling = NULL, + .child = NULL +}; + +static struct resource rc32434_res_pci_io1 = { + .name = "PCI I/O1", + .start = 0x18800000, + .end = 0x188FFFFF, + .flags = IORESOURCE_IO, +}; + +extern struct pci_ops rc32434_pci_ops; + +#define PCI_MEM1_START PCI_ADDR_START +#define PCI_MEM1_END (PCI_ADDR_START + CPUTOPCI_MEM_WIN - 1) +#define PCI_MEM2_START (PCI_ADDR_START + CPUTOPCI_MEM_WIN) +#define PCI_MEM2_END (PCI_ADDR_START + (2 * CPUTOPCI_MEM_WIN) - 1) +#define PCI_IO1_START (PCI_ADDR_START + (2 * CPUTOPCI_MEM_WIN)) +#define PCI_IO1_END \ + (PCI_ADDR_START + (2 * CPUTOPCI_MEM_WIN) + CPUTOPCI_IO_WIN - 1) +#define PCI_IO2_START \ + (PCI_ADDR_START + (2 * CPUTOPCI_MEM_WIN) + CPUTOPCI_IO_WIN) +#define PCI_IO2_END \ + (PCI_ADDR_START + (2 * CPUTOPCI_MEM_WIN) + (2 * CPUTOPCI_IO_WIN) - 1) + +struct pci_controller rc32434_controller2; + +struct pci_controller rc32434_controller = { + .pci_ops = &rc32434_pci_ops, + .mem_resource = &rc32434_res_pci_mem1, + .io_resource = &rc32434_res_pci_io1, + .mem_offset = 0, + .io_offset = 0, + +}; + +#ifdef __MIPSEB__ +#define PCI_ENDIAN_FLAG PCILBAC_sb_m +#else +#define PCI_ENDIAN_FLAG 0 +#endif + +static int __init rc32434_pcibridge_init(void) +{ + unsigned int pcicvalue, pcicdata = 0; + unsigned int dummyread, pcicntlval; + int loopCount; + unsigned int pci_config_addr; + + pcicvalue = rc32434_pci->pcic; + pcicvalue = (pcicvalue >> PCIM_SHFT) & PCIM_BIT_LEN; + if (!((pcicvalue == PCIM_H_EA) || + (pcicvalue == PCIM_H_IA_FIX) || + (pcicvalue == PCIM_H_IA_RR))) { + pr_err(KERN_ERR "PCI init error!!!\n"); + /* Not in Host Mode, return ERROR */ + return -1; + } + /* Enables the Idle Grant mode, Arbiter Parking */ + pcicdata |= (PCI_CTL_IGM | PCI_CTL_EAP | PCI_CTL_EN); + rc32434_pci->pcic = pcicdata; /* Enable the PCI bus Interface */ + /* Zero out the PCI status & PCI Status Mask */ + for (;;) { + pcicdata = rc32434_pci->pcis; + if (!(pcicdata & PCI_STAT_RIP)) + break; + } + + rc32434_pci->pcis = 0; + rc32434_pci->pcism = 0xFFFFFFFF; + /* Zero out the PCI decoupled registers */ + rc32434_pci->pcidac = 0; /* + * disable PCI decoupled accesses at + * initialization + */ + rc32434_pci->pcidas = 0; /* clear the status */ + rc32434_pci->pcidasm = 0x0000007F; /* Mask all the interrupts */ + /* Mask PCI Messaging Interrupts */ + rc32434_pci_msg->pciiic = 0; + rc32434_pci_msg->pciiim = 0xFFFFFFFF; + rc32434_pci_msg->pciioic = 0; + rc32434_pci_msg->pciioim = 0; + + + /* Setup PCILB0 as Memory Window */ + rc32434_pci->pcilba[0].address = (unsigned int) (PCI_ADDR_START); + + /* setup the PCI map address as same as the local address */ + + rc32434_pci->pcilba[0].mapping = (unsigned int) (PCI_ADDR_START); + + + /* Setup PCILBA1 as MEM */ + rc32434_pci->pcilba[0].control = + (((SIZE_256MB & 0x1f) << PCI_LBAC_SIZE_BIT) | PCI_ENDIAN_FLAG); + dummyread = rc32434_pci->pcilba[0].control; /* flush the CPU write Buffers */ + rc32434_pci->pcilba[1].address = 0x60000000; + rc32434_pci->pcilba[1].mapping = 0x60000000; + + /* setup PCILBA2 as IO Window */ + rc32434_pci->pcilba[1].control = + (((SIZE_256MB & 0x1f) << PCI_LBAC_SIZE_BIT) | PCI_ENDIAN_FLAG); + dummyread = rc32434_pci->pcilba[1].control; /* flush the CPU write Buffers */ + rc32434_pci->pcilba[2].address = 0x18C00000; + rc32434_pci->pcilba[2].mapping = 0x18FFFFFF; + + /* setup PCILBA2 as IO Window */ + rc32434_pci->pcilba[2].control = + (((SIZE_4MB & 0x1f) << PCI_LBAC_SIZE_BIT) | PCI_ENDIAN_FLAG); + dummyread = rc32434_pci->pcilba[2].control; /* flush the CPU write Buffers */ + + /* Setup PCILBA3 as IO Window */ + rc32434_pci->pcilba[3].address = 0x18800000; + rc32434_pci->pcilba[3].mapping = 0x18800000; + rc32434_pci->pcilba[3].control = + ((((SIZE_1MB & 0x1ff) << PCI_LBAC_SIZE_BIT) | PCI_LBAC_MSI) | + PCI_ENDIAN_FLAG); + dummyread = rc32434_pci->pcilba[3].control; /* flush the CPU write Buffers */ + + pci_config_addr = (unsigned int) (0x80000004); + for (loopCount = 0; loopCount < 24; loopCount++) { + rc32434_pci->pcicfga = pci_config_addr; + dummyread = rc32434_pci->pcicfga; + rc32434_pci->pcicfgd = korina_cnfg_regs[loopCount]; + dummyread = rc32434_pci->pcicfgd; + pci_config_addr += 4; + } + rc32434_pci->pcitc = + (unsigned int) ((PCITC_RTIMER_VAL & 0xff) << PCI_TC_RTIMER_BIT) | + ((PCITC_DTIMER_VAL & 0xff) << PCI_TC_DTIMER_BIT); + + pcicntlval = rc32434_pci->pcic; + pcicntlval &= ~PCI_CTL_TNR; + rc32434_pci->pcic = pcicntlval; + pcicntlval = rc32434_pci->pcic; + + return 0; +} + +static int __init rc32434_pci_init(void) +{ + pr_info("PCI: Initializing PCI\n"); + + ioport_resource.start = rc32434_res_pci_io1.start; + ioport_resource.end = rc32434_res_pci_io1.end; + + rc32434_pcibridge_init(); + + register_pci_controller(&rc32434_controller); + rc32434_sync(); + + return 0; +} + +arch_initcall(rc32434_pci_init); diff --git a/arch/mips/pci/pci.c b/arch/mips/pci/pci.c index d7d6cb063d26..77bd5b68dc43 100644 --- a/arch/mips/pci/pci.c +++ b/arch/mips/pci/pci.c @@ -204,7 +204,7 @@ static int pcibios_enable_resources(struct pci_dev *dev, int mask) * If we set up a device for bus mastering, we need to check the latency * timer as certain crappy BIOSes forget to set it properly. */ -unsigned int pcibios_max_latency = 255; +static unsigned int pcibios_max_latency = 255; void pcibios_set_master(struct pci_dev *dev) { diff --git a/arch/mips/rb532/Makefile b/arch/mips/rb532/Makefile new file mode 100644 index 000000000000..8f0b6b6a1625 --- /dev/null +++ b/arch/mips/rb532/Makefile @@ -0,0 +1,7 @@ +# +# Makefile for the RB532 board specific parts of the kernel +# + +obj-y += irq.o time.o setup.o serial.o prom.o gpio.o devices.o + +EXTRA_CFLAGS += -Werror diff --git a/arch/mips/rb532/devices.c b/arch/mips/rb532/devices.c new file mode 100644 index 000000000000..44fb0a62877f --- /dev/null +++ b/arch/mips/rb532/devices.c @@ -0,0 +1,331 @@ +/* + * RouterBoard 500 Platform devices + * + * Copyright (C) 2006 Felix Fietkau <nbd@openwrt.org> + * Copyright (C) 2007 Florian Fainelli <florian@openwrt.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/ctype.h> +#include <linux/string.h> +#include <linux/platform_device.h> +#include <linux/mtd/nand.h> +#include <linux/mtd/mtd.h> +#include <linux/mtd/partitions.h> +#include <linux/gpio_keys.h> +#include <linux/input.h> + +#include <asm/bootinfo.h> + +#include <asm/mach-rc32434/rc32434.h> +#include <asm/mach-rc32434/dma.h> +#include <asm/mach-rc32434/dma_v.h> +#include <asm/mach-rc32434/eth.h> +#include <asm/mach-rc32434/rb.h> +#include <asm/mach-rc32434/integ.h> +#include <asm/mach-rc32434/gpio.h> + +#define ETH0_DMA_RX_IRQ (GROUP1_IRQ_BASE + 0) +#define ETH0_DMA_TX_IRQ (GROUP1_IRQ_BASE + 1) +#define ETH0_RX_OVR_IRQ (GROUP3_IRQ_BASE + 9) +#define ETH0_TX_UND_IRQ (GROUP3_IRQ_BASE + 10) + +#define ETH0_RX_DMA_ADDR (DMA0_BASE_ADDR + 0 * DMA_CHAN_OFFSET) +#define ETH0_TX_DMA_ADDR (DMA0_BASE_ADDR + 1 * DMA_CHAN_OFFSET) + +/* NAND definitions */ +#define GPIO_RDY (1 << 0x08) +#define GPIO_WPX (1 << 0x09) +#define GPIO_ALE (1 << 0x0a) +#define GPIO_CLE (1 << 0x0b) + +extern char *board_type; + +static struct resource korina_dev0_res[] = { + { + .name = "korina_regs", + .start = ETH0_BASE_ADDR, + .end = ETH0_BASE_ADDR + sizeof(struct eth_regs), + .flags = IORESOURCE_MEM, + }, { + .name = "korina_rx", + .start = ETH0_DMA_RX_IRQ, + .end = ETH0_DMA_RX_IRQ, + .flags = IORESOURCE_IRQ + }, { + .name = "korina_tx", + .start = ETH0_DMA_TX_IRQ, + .end = ETH0_DMA_TX_IRQ, + .flags = IORESOURCE_IRQ + }, { + .name = "korina_ovr", + .start = ETH0_RX_OVR_IRQ, + .end = ETH0_RX_OVR_IRQ, + .flags = IORESOURCE_IRQ + }, { + .name = "korina_und", + .start = ETH0_TX_UND_IRQ, + .end = ETH0_TX_UND_IRQ, + .flags = IORESOURCE_IRQ + }, { + .name = "korina_dma_rx", + .start = ETH0_RX_DMA_ADDR, + .end = ETH0_RX_DMA_ADDR + DMA_CHAN_OFFSET - 1, + .flags = IORESOURCE_MEM, + }, { + .name = "korina_dma_tx", + .start = ETH0_TX_DMA_ADDR, + .end = ETH0_TX_DMA_ADDR + DMA_CHAN_OFFSET - 1, + .flags = IORESOURCE_MEM, + } +}; + +static struct korina_device korina_dev0_data = { + .name = "korina0", + .mac = {0xde, 0xca, 0xff, 0xc0, 0xff, 0xee} +}; + +static struct platform_device korina_dev0 = { + .id = 0, + .name = "korina", + .dev.platform_data = &korina_dev0_data, + .resource = korina_dev0_res, + .num_resources = ARRAY_SIZE(korina_dev0_res), +}; + +#define CF_GPIO_NUM 13 + +static struct resource cf_slot0_res[] = { + { + .name = "cf_membase", + .flags = IORESOURCE_MEM + }, { + .name = "cf_irq", + .start = (8 + 4 * 32 + CF_GPIO_NUM), /* 149 */ + .end = (8 + 4 * 32 + CF_GPIO_NUM), + .flags = IORESOURCE_IRQ + } +}; + +static struct cf_device cf_slot0_data = { + .gpio_pin = 13 +}; + +static struct platform_device cf_slot0 = { + .id = 0, + .name = "pata-rb532-cf", + .dev.platform_data = &cf_slot0_data, + .resource = cf_slot0_res, + .num_resources = ARRAY_SIZE(cf_slot0_res), +}; + +/* Resources and device for NAND */ +static int rb532_dev_ready(struct mtd_info *mtd) +{ + return readl(IDT434_REG_BASE + GPIOD) & GPIO_RDY; +} + +static void rb532_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl) +{ + struct nand_chip *chip = mtd->priv; + unsigned char orbits, nandbits; + + if (ctrl & NAND_CTRL_CHANGE) { + orbits = (ctrl & NAND_CLE) << 1; + orbits |= (ctrl & NAND_ALE) >> 1; + + nandbits = (~ctrl & NAND_CLE) << 1; + nandbits |= (~ctrl & NAND_ALE) >> 1; + + set_latch_u5(orbits, nandbits); + } + if (cmd != NAND_CMD_NONE) + writeb(cmd, chip->IO_ADDR_W); +} + +static struct resource nand_slot0_res[] = { + [0] = { + .name = "nand_membase", + .flags = IORESOURCE_MEM + } +}; + +static struct platform_nand_data rb532_nand_data = { + .ctrl.dev_ready = rb532_dev_ready, + .ctrl.cmd_ctrl = rb532_cmd_ctrl, +}; + +static struct platform_device nand_slot0 = { + .name = "gen_nand", + .id = -1, + .resource = nand_slot0_res, + .num_resources = ARRAY_SIZE(nand_slot0_res), + .dev.platform_data = &rb532_nand_data, +}; + +static struct mtd_partition rb532_partition_info[] = { + { + .name = "Routerboard NAND boot", + .offset = 0, + .size = 4 * 1024 * 1024, + }, { + .name = "rootfs", + .offset = MTDPART_OFS_NXTBLK, + .size = MTDPART_SIZ_FULL, + } +}; + +static struct platform_device rb532_led = { + .name = "rb532-led", + .id = 0, +}; + +static struct gpio_keys_button rb532_gpio_btn[] = { + { + .gpio = 1, + .code = BTN_0, + .desc = "S1", + .active_low = 1, + } +}; + +static struct gpio_keys_platform_data rb532_gpio_btn_data = { + .buttons = rb532_gpio_btn, + .nbuttons = ARRAY_SIZE(rb532_gpio_btn), +}; + +static struct platform_device rb532_button = { + .name = "gpio-keys", + .id = -1, + .dev = { + .platform_data = &rb532_gpio_btn_data, + } +}; + +static struct resource rb532_wdt_res[] = { + { + .name = "rb532_wdt_res", + .start = INTEG0_BASE_ADDR, + .end = INTEG0_BASE_ADDR + sizeof(struct integ), + .flags = IORESOURCE_MEM, + } +}; + +static struct platform_device rb532_wdt = { + .name = "rc32434_wdt", + .id = -1, + .resource = rb532_wdt_res, + .num_resources = ARRAY_SIZE(rb532_wdt_res), +}; + +static struct platform_device *rb532_devs[] = { + &korina_dev0, + &nand_slot0, + &cf_slot0, + &rb532_led, + &rb532_button, + &rb532_wdt +}; + +static void __init parse_mac_addr(char *macstr) +{ + int i, j; + unsigned char result, value; + + for (i = 0; i < 6; i++) { + result = 0; + + if (i != 5 && *(macstr + 2) != ':') + return; + + for (j = 0; j < 2; j++) { + if (isxdigit(*macstr) + && (value = + isdigit(*macstr) ? *macstr - + '0' : toupper(*macstr) - 'A' + 10) < 16) { + result = result * 16 + value; + macstr++; + } else + return; + } + + macstr++; + korina_dev0_data.mac[i] = result; + } +} + + +/* DEVICE CONTROLLER 1 */ +#define CFG_DC_DEV1 ((void *)0xb8010010) +#define CFG_DC_DEV2 ((void *)0xb8010020) +#define CFG_DC_DEVBASE 0x0 +#define CFG_DC_DEVMASK 0x4 +#define CFG_DC_DEVC 0x8 +#define CFG_DC_DEVTC 0xC + +/* NAND definitions */ +#define NAND_CHIP_DELAY 25 + +static void __init rb532_nand_setup(void) +{ + switch (mips_machtype) { + case MACH_MIKROTIK_RB532A: + set_latch_u5(LO_FOFF | LO_CEX, + LO_ULED | LO_ALE | LO_CLE | LO_WPX); + break; + default: + set_latch_u5(LO_WPX | LO_FOFF | LO_CEX, + LO_ULED | LO_ALE | LO_CLE); + break; + } + + /* Setup NAND specific settings */ + rb532_nand_data.chip.nr_chips = 1; + rb532_nand_data.chip.nr_partitions = ARRAY_SIZE(rb532_partition_info); + rb532_nand_data.chip.partitions = rb532_partition_info; + rb532_nand_data.chip.chip_delay = NAND_CHIP_DELAY; + rb532_nand_data.chip.options = NAND_NO_AUTOINCR; +} + + +static int __init plat_setup_devices(void) +{ + /* Look for the CF card reader */ + if (!readl(CFG_DC_DEV1 + CFG_DC_DEVMASK)) + rb532_devs[1] = NULL; + else { + cf_slot0_res[0].start = + readl(CFG_DC_DEV1 + CFG_DC_DEVBASE); + cf_slot0_res[0].end = cf_slot0_res[0].start + 0x1000; + } + + /* Read the NAND resources from the device controller */ + nand_slot0_res[0].start = readl(CFG_DC_DEV2 + CFG_DC_DEVBASE); + nand_slot0_res[0].end = nand_slot0_res[0].start + 0x1000; + + /* Initialise the NAND device */ + rb532_nand_setup(); + + return platform_add_devices(rb532_devs, ARRAY_SIZE(rb532_devs)); +} + +static int __init setup_kmac(char *s) +{ + printk(KERN_INFO "korina mac = %s\n", s); + parse_mac_addr(s); + return 0; +} + +__setup("kmac=", setup_kmac); + +arch_initcall(plat_setup_devices); diff --git a/arch/mips/rb532/gpio.c b/arch/mips/rb532/gpio.c new file mode 100644 index 000000000000..b2fe82dba0a5 --- /dev/null +++ b/arch/mips/rb532/gpio.c @@ -0,0 +1,220 @@ +/* + * Miscellaneous functions for IDT EB434 board + * + * Copyright 2004 IDT Inc. (rischelp@idt.com) + * Copyright 2006 Phil Sutter <n0-1@freewrt.org> + * Copyright 2007 Florian Fainelli <florian@openwrt.org> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN + * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/kernel.h> +#include <linux/gpio.h> +#include <linux/init.h> +#include <linux/types.h> +#include <linux/pci.h> +#include <linux/spinlock.h> +#include <linux/io.h> +#include <linux/platform_device.h> + +#include <asm/addrspace.h> + +#include <asm/mach-rc32434/rb.h> + +struct rb532_gpio_reg __iomem *rb532_gpio_reg0; +EXPORT_SYMBOL(rb532_gpio_reg0); + +struct mpmc_device dev3; + +static struct resource rb532_gpio_reg0_res[] = { + { + .name = "gpio_reg0", + .start = (u32)(IDT434_REG_BASE + GPIOBASE), + .end = (u32)(IDT434_REG_BASE + GPIOBASE + sizeof(struct rb532_gpio_reg)), + .flags = IORESOURCE_MEM, + } +}; + +static struct resource rb532_dev3_ctl_res[] = { + { + .name = "dev3_ctl", + .start = (u32)(IDT434_REG_BASE + DEV3BASE), + .end = (u32)(IDT434_REG_BASE + DEV3BASE + sizeof(struct dev_reg)), + .flags = IORESOURCE_MEM, + } +}; + +void set_434_reg(unsigned reg_offs, unsigned bit, unsigned len, unsigned val) +{ + unsigned flags, data; + unsigned i = 0; + + spin_lock_irqsave(&dev3.lock, flags); + + data = *(volatile unsigned *) (IDT434_REG_BASE + reg_offs); + for (i = 0; i != len; ++i) { + if (val & (1 << i)) + data |= (1 << (i + bit)); + else + data &= ~(1 << (i + bit)); + } + writel(data, (IDT434_REG_BASE + reg_offs)); + + spin_unlock_irqrestore(&dev3.lock, flags); +} +EXPORT_SYMBOL(set_434_reg); + +unsigned get_434_reg(unsigned reg_offs) +{ + return readl(IDT434_REG_BASE + reg_offs); +} +EXPORT_SYMBOL(get_434_reg); + +void set_latch_u5(unsigned char or_mask, unsigned char nand_mask) +{ + unsigned flags; + + spin_lock_irqsave(&dev3.lock, flags); + + dev3.state = (dev3.state | or_mask) & ~nand_mask; + writel(dev3.state, &dev3.base); + + spin_unlock_irqrestore(&dev3.lock, flags); +} +EXPORT_SYMBOL(set_latch_u5); + +unsigned char get_latch_u5(void) +{ + return dev3.state; +} +EXPORT_SYMBOL(get_latch_u5); + +int rb532_gpio_get_value(unsigned gpio) +{ + return readl(&rb532_gpio_reg0->gpiod) & (1 << gpio); +} +EXPORT_SYMBOL(rb532_gpio_get_value); + +void rb532_gpio_set_value(unsigned gpio, int value) +{ + unsigned tmp; + + tmp = readl(&rb532_gpio_reg0->gpiod) & ~(1 << gpio); + if (value) + tmp |= 1 << gpio; + + writel(tmp, (void *)&rb532_gpio_reg0->gpiod); +} +EXPORT_SYMBOL(rb532_gpio_set_value); + +int rb532_gpio_direction_input(unsigned gpio) +{ + writel(readl(&rb532_gpio_reg0->gpiocfg) & ~(1 << gpio), + (void *)&rb532_gpio_reg0->gpiocfg); + + return 0; +} +EXPORT_SYMBOL(rb532_gpio_direction_input); + +int rb532_gpio_direction_output(unsigned gpio, int value) +{ + gpio_set_value(gpio, value); + writel(readl(&rb532_gpio_reg0->gpiocfg) | (1 << gpio), + (void *)&rb532_gpio_reg0->gpiocfg); + + return 0; +} +EXPORT_SYMBOL(rb532_gpio_direction_output); + +void rb532_gpio_set_int_level(unsigned gpio, int value) +{ + unsigned tmp; + + tmp = readl(&rb532_gpio_reg0->gpioilevel) & ~(1 << gpio); + if (value) + tmp |= 1 << gpio; + writel(tmp, (void *)&rb532_gpio_reg0->gpioilevel); +} +EXPORT_SYMBOL(rb532_gpio_set_int_level); + +int rb532_gpio_get_int_level(unsigned gpio) +{ + return readl(&rb532_gpio_reg0->gpioilevel) & (1 << gpio); +} +EXPORT_SYMBOL(rb532_gpio_get_int_level); + +void rb532_gpio_set_int_status(unsigned gpio, int value) +{ + unsigned tmp; + + tmp = readl(&rb532_gpio_reg0->gpioistat); + if (value) + tmp |= 1 << gpio; + writel(tmp, (void *)&rb532_gpio_reg0->gpioistat); +} +EXPORT_SYMBOL(rb532_gpio_set_int_status); + +int rb532_gpio_get_int_status(unsigned gpio) +{ + return readl(&rb532_gpio_reg0->gpioistat) & (1 << gpio); +} +EXPORT_SYMBOL(rb532_gpio_get_int_status); + +void rb532_gpio_set_func(unsigned gpio, int value) +{ + unsigned tmp; + + tmp = readl(&rb532_gpio_reg0->gpiofunc); + if (value) + tmp |= 1 << gpio; + writel(tmp, (void *)&rb532_gpio_reg0->gpiofunc); +} +EXPORT_SYMBOL(rb532_gpio_set_func); + +int rb532_gpio_get_func(unsigned gpio) +{ + return readl(&rb532_gpio_reg0->gpiofunc) & (1 << gpio); +} +EXPORT_SYMBOL(rb532_gpio_get_func); + +int __init rb532_gpio_init(void) +{ + rb532_gpio_reg0 = ioremap_nocache(rb532_gpio_reg0_res[0].start, + rb532_gpio_reg0_res[0].end - + rb532_gpio_reg0_res[0].start); + + if (!rb532_gpio_reg0) { + printk(KERN_ERR "rb532: cannot remap GPIO register 0\n"); + return -ENXIO; + } + + dev3.base = ioremap_nocache(rb532_dev3_ctl_res[0].start, + rb532_dev3_ctl_res[0].end - + rb532_dev3_ctl_res[0].start); + + if (!dev3.base) { + printk(KERN_ERR "rb532: cannot remap device controller 3\n"); + return -ENXIO; + } + + return 0; +} +arch_initcall(rb532_gpio_init); diff --git a/arch/mips/rb532/irq.c b/arch/mips/rb532/irq.c new file mode 100644 index 000000000000..c0d0f950caf2 --- /dev/null +++ b/arch/mips/rb532/irq.c @@ -0,0 +1,209 @@ +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN + * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Copyright 2002 MontaVista Software Inc. + * Author: MontaVista Software, Inc. + * stevel@mvista.com or source@mvista.com + */ + +#include <linux/bitops.h> +#include <linux/errno.h> +#include <linux/init.h> +#include <linux/io.h> +#include <linux/kernel_stat.h> +#include <linux/module.h> +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/types.h> +#include <linux/interrupt.h> +#include <linux/ioport.h> +#include <linux/timex.h> +#include <linux/slab.h> +#include <linux/random.h> +#include <linux/delay.h> + +#include <asm/bootinfo.h> +#include <asm/time.h> +#include <asm/mipsregs.h> +#include <asm/system.h> + +#include <asm/mach-rc32434/rc32434.h> + +struct intr_group { + u32 mask; /* mask of valid bits in pending/mask registers */ + volatile u32 *base_addr; +}; + +#define RC32434_NR_IRQS (GROUP4_IRQ_BASE + 32) + +#if (NR_IRQS < RC32434_NR_IRQS) +#error Too little irqs defined. Did you override <asm/irq.h> ? +#endif + +static const struct intr_group intr_group[NUM_INTR_GROUPS] = { + { + .mask = 0x0000efff, + .base_addr = (u32 *) KSEG1ADDR(IC_GROUP0_PEND + 0 * IC_GROUP_OFFSET)}, + { + .mask = 0x00001fff, + .base_addr = (u32 *) KSEG1ADDR(IC_GROUP0_PEND + 1 * IC_GROUP_OFFSET)}, + { + .mask = 0x00000007, + .base_addr = (u32 *) KSEG1ADDR(IC_GROUP0_PEND + 2 * IC_GROUP_OFFSET)}, + { + .mask = 0x0003ffff, + .base_addr = (u32 *) KSEG1ADDR(IC_GROUP0_PEND + 3 * IC_GROUP_OFFSET)}, + { + .mask = 0xffffffff, + .base_addr = (u32 *) KSEG1ADDR(IC_GROUP0_PEND + 4 * IC_GROUP_OFFSET)} +}; + +#define READ_PEND(base) (*(base)) +#define READ_MASK(base) (*(base + 2)) +#define WRITE_MASK(base, val) (*(base + 2) = (val)) + +static inline int irq_to_group(unsigned int irq_nr) +{ + return (irq_nr - GROUP0_IRQ_BASE) >> 5; +} + +static inline int group_to_ip(unsigned int group) +{ + return group + 2; +} + +static inline void enable_local_irq(unsigned int ip) +{ + int ipnum = 0x100 << ip; + + set_c0_status(ipnum); +} + +static inline void disable_local_irq(unsigned int ip) +{ + int ipnum = 0x100 << ip; + + clear_c0_status(ipnum); +} + +static inline void ack_local_irq(unsigned int ip) +{ + int ipnum = 0x100 << ip; + + clear_c0_cause(ipnum); +} + +static void rb532_enable_irq(unsigned int irq_nr) +{ + int ip = irq_nr - GROUP0_IRQ_BASE; + unsigned int group, intr_bit; + volatile unsigned int *addr; + + if (ip < 0) + enable_local_irq(irq_nr); + else { + group = ip >> 5; + + ip &= (1 << 5) - 1; + intr_bit = 1 << ip; + + enable_local_irq(group_to_ip(group)); + + addr = intr_group[group].base_addr; + WRITE_MASK(addr, READ_MASK(addr) & ~intr_bit); + } +} + +static void rb532_disable_irq(unsigned int irq_nr) +{ + int ip = irq_nr - GROUP0_IRQ_BASE; + unsigned int group, intr_bit, mask; + volatile unsigned int *addr; + + if (ip < 0) { + disable_local_irq(irq_nr); + } else { + group = ip >> 5; + + ip &= (1 << 5) - 1; + intr_bit = 1 << ip; + addr = intr_group[group].base_addr; + mask = READ_MASK(addr); + mask |= intr_bit; + WRITE_MASK(addr, mask); + + /* + * if there are no more interrupts enabled in this + * group, disable corresponding IP + */ + if (mask == intr_group[group].mask) + disable_local_irq(group_to_ip(group)); + } +} + +static void rb532_mask_and_ack_irq(unsigned int irq_nr) +{ + rb532_disable_irq(irq_nr); + ack_local_irq(group_to_ip(irq_to_group(irq_nr))); +} + +static struct irq_chip rc32434_irq_type = { + .name = "RB532", + .ack = rb532_disable_irq, + .mask = rb532_disable_irq, + .mask_ack = rb532_mask_and_ack_irq, + .unmask = rb532_enable_irq, +}; + +void __init arch_init_irq(void) +{ + int i; + + pr_info("Initializing IRQ's: %d out of %d\n", RC32434_NR_IRQS, NR_IRQS); + + for (i = 0; i < RC32434_NR_IRQS; i++) + set_irq_chip_and_handler(i, &rc32434_irq_type, + handle_level_irq); +} + +/* Main Interrupt dispatcher */ +asmlinkage void plat_irq_dispatch(void) +{ + unsigned int ip, pend, group; + volatile unsigned int *addr; + unsigned int cp0_cause = read_c0_cause() & read_c0_status(); + + if (cp0_cause & CAUSEF_IP7) { + do_IRQ(7); + } else { + ip = (cp0_cause & 0x7c00); + if (ip) { + group = 21 + (fls(ip) - 32); + + addr = intr_group[group].base_addr; + + pend = READ_PEND(addr); + pend &= ~READ_MASK(addr); /* only unmasked interrupts */ + pend = 39 + (fls(pend) - 32); + do_IRQ((group << 5) + pend); + } + } +} diff --git a/arch/mips/rb532/prom.c b/arch/mips/rb532/prom.c new file mode 100644 index 000000000000..1bc0af8febf4 --- /dev/null +++ b/arch/mips/rb532/prom.c @@ -0,0 +1,158 @@ +/* + * RouterBoard 500 specific prom routines + * + * Copyright (C) 2003, Peter Sadik <peter.sadik@idt.com> + * Copyright (C) 2005-2006, P.Christeas <p_christ@hol.gr> + * Copyright (C) 2007, Gabor Juhos <juhosg@openwrt.org> + * Felix Fietkau <nbd@openwrt.org> + * Florian Fainelli <florian@openwrt.org> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the + * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + +#include <linux/init.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/string.h> +#include <linux/console.h> +#include <linux/bootmem.h> +#include <linux/ioport.h> +#include <linux/blkdev.h> + +#include <asm/bootinfo.h> +#include <asm/mach-rc32434/ddr.h> +#include <asm/mach-rc32434/prom.h> + +extern void __init setup_serial_port(void); + +unsigned int idt_cpu_freq = 132000000; +EXPORT_SYMBOL(idt_cpu_freq); +unsigned int gpio_bootup_state; +EXPORT_SYMBOL(gpio_bootup_state); + +static struct resource ddr_reg[] = { + { + .name = "ddr-reg", + .start = DDR0_PHYS_ADDR, + .end = DDR0_PHYS_ADDR + sizeof(struct ddr_ram), + .flags = IORESOURCE_MEM, + } +}; + +void __init prom_free_prom_memory(void) +{ + /* No prom memory to free */ +} + +static inline int match_tag(char *arg, const char *tag) +{ + return strncmp(arg, tag, strlen(tag)) == 0; +} + +static inline unsigned long tag2ul(char *arg, const char *tag) +{ + char *num; + + num = arg + strlen(tag); + return simple_strtoul(num, 0, 10); +} + +void __init prom_setup_cmdline(void) +{ + char cmd_line[CL_SIZE]; + char *cp, *board; + int prom_argc; + char **prom_argv, **prom_envp; + int i; + + prom_argc = fw_arg0; + prom_argv = (char **) fw_arg1; + prom_envp = (char **) fw_arg2; + + cp = cmd_line; + /* Note: it is common that parameters start + * at argv[1] and not argv[0], + * however, our elf loader starts at [0] */ + for (i = 0; i < prom_argc; i++) { + if (match_tag(prom_argv[i], FREQ_TAG)) { + idt_cpu_freq = tag2ul(prom_argv[i], FREQ_TAG); + continue; + } +#ifdef IGNORE_CMDLINE_MEM + /* parses out the "mem=xx" arg */ + if (match_tag(prom_argv[i], MEM_TAG)) + continue; +#endif + if (i > 0) + *(cp++) = ' '; + if (match_tag(prom_argv[i], BOARD_TAG)) { + board = prom_argv[i] + strlen(BOARD_TAG); + + if (match_tag(board, BOARD_RB532A)) + mips_machtype = MACH_MIKROTIK_RB532A; + else + mips_machtype = MACH_MIKROTIK_RB532; + } + + if (match_tag(prom_argv[i], GPIO_TAG)) + gpio_bootup_state = tag2ul(prom_argv[i], GPIO_TAG); + + strcpy(cp, prom_argv[i]); + cp += strlen(prom_argv[i]); + } + *(cp++) = ' '; + + i = strlen(arcs_cmdline); + if (i > 0) { + *(cp++) = ' '; + strcpy(cp, arcs_cmdline); + cp += strlen(arcs_cmdline); + } + if (gpio_bootup_state & 0x02) + strcpy(cp, GPIO_INIT_NOBUTTON); + else + strcpy(cp, GPIO_INIT_BUTTON); + + cmd_line[CL_SIZE-1] = '\0'; + + strcpy(arcs_cmdline, cmd_line); +} + +void __init prom_init(void) +{ + struct ddr_ram __iomem *ddr; + phys_t memsize; + phys_t ddrbase; + + ddr = ioremap_nocache(ddr_reg[0].start, + ddr_reg[0].end - ddr_reg[0].start); + + if (!ddr) { + printk(KERN_ERR "Unable to remap DDR register\n"); + return; + } + + ddrbase = (phys_t)&ddr->ddrbase; + memsize = (phys_t)&ddr->ddrmask; + memsize = 0 - memsize; + + prom_setup_cmdline(); + + /* give all RAM to boot allocator, + * except for the first 0x400 and the last 0x200 bytes */ + add_memory_region(ddrbase + 0x400, memsize - 0x600, BOOT_MEM_RAM); +} diff --git a/arch/mips/rb532/serial.c b/arch/mips/rb532/serial.c new file mode 100644 index 000000000000..1a05b5ddee09 --- /dev/null +++ b/arch/mips/rb532/serial.c @@ -0,0 +1,53 @@ +/* + * BRIEF MODULE DESCRIPTION + * Serial port initialisation. + * + * Copyright 2004 IDT Inc. (rischelp@idt.com) + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN + * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/init.h> +#include <linux/tty.h> +#include <linux/serial_core.h> +#include <linux/serial_8250.h> + +#include <asm/serial.h> +#include <asm/mach-rc32434/rc32434.h> + +extern unsigned int idt_cpu_freq; + +static struct uart_port rb532_uart = { + .type = PORT_16550A, + .line = 0, + .irq = RC32434_UART0_IRQ, + .iotype = UPIO_MEM, + .membase = (char *)KSEG1ADDR(RC32434_UART0_BASE), + .regshift = 2 +}; + +int __init setup_serial_port(void) +{ + rb532_uart.uartclk = idt_cpu_freq; + + return early_serial_setup(&rb532_uart); +} +arch_initcall(setup_serial_port); diff --git a/arch/mips/rb532/setup.c b/arch/mips/rb532/setup.c new file mode 100644 index 000000000000..7aafa95ac20b --- /dev/null +++ b/arch/mips/rb532/setup.c @@ -0,0 +1,79 @@ +/* + * setup.c - boot time setup code + */ + +#include <linux/init.h> + +#include <asm/bootinfo.h> +#include <asm/reboot.h> +#include <asm/time.h> +#include <linux/ioport.h> + +#include <asm/mach-rc32434/rc32434.h> +#include <asm/mach-rc32434/pci.h> + +struct pci_reg __iomem *pci_reg; +EXPORT_SYMBOL(pci_reg); + +static struct resource pci0_res[] = { + { + .name = "pci_reg0", + .start = PCI0_BASE_ADDR, + .end = PCI0_BASE_ADDR + sizeof(struct pci_reg), + .flags = IORESOURCE_MEM, + } +}; + +static void rb_machine_restart(char *command) +{ + /* just jump to the reset vector */ + writel(0x80000001, (void *)KSEG1ADDR(RC32434_REG_BASE + RC32434_RST)); + ((void (*)(void)) KSEG1ADDR(0x1FC00000u))(); +} + +static void rb_machine_halt(void) +{ + for (;;) + continue; +} + +void __init plat_mem_setup(void) +{ + u32 val; + + _machine_restart = rb_machine_restart; + _machine_halt = rb_machine_halt; + pm_power_off = rb_machine_halt; + + set_io_port_base(KSEG1); + + pci_reg = ioremap_nocache(pci0_res[0].start, + pci0_res[0].end - pci0_res[0].start); + if (!pci_reg) { + printk(KERN_ERR "Could not remap PCI registers\n"); + return; + } + + val = __raw_readl(&pci_reg->pcic); + val &= 0xFFFFFF7; + __raw_writel(val, (void *)&pci_reg->pcic); + +#ifdef CONFIG_PCI + /* Enable PCI interrupts in EPLD Mask register */ + *epld_mask = 0x0; + *(epld_mask + 1) = 0x0; +#endif + write_c0_wired(0); +} + +const char *get_system_type(void) +{ + switch (mips_machtype) { + case MACH_MIKROTIK_RB532A: + return "Mikrotik RB532A"; + break; + default: + return "Mikrotik RB532"; + break; + } +} diff --git a/arch/mips/rb532/time.c b/arch/mips/rb532/time.c new file mode 100644 index 000000000000..db74edf8cefb --- /dev/null +++ b/arch/mips/rb532/time.c @@ -0,0 +1,67 @@ +/* + * Carsten Langgaard, carstenl@mips.com + * Copyright (C) 1999,2000 MIPS Technologies, Inc. All rights reserved. + * + * This program is free software; you can distribute it and/or modify it + * under the terms of the GNU General Public License (Version 2) as + * published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Setting up the clock on the MIPS boards. + */ + +#include <linux/init.h> +#include <linux/kernel_stat.h> +#include <linux/ptrace.h> +#include <linux/sched.h> +#include <linux/spinlock.h> +#include <linux/mc146818rtc.h> +#include <linux/irq.h> +#include <linux/timex.h> + +#include <asm/mipsregs.h> +#include <asm/debug.h> +#include <asm/time.h> +#include <asm/mach-rc32434/rc32434.h> + +extern unsigned int idt_cpu_freq; + +/* + * Figure out the r4k offset, the amount to increment the compare + * register for each time tick. There is no RTC available. + * + * The RC32434 counts at half the CPU *core* speed. + */ +static unsigned long __init cal_r4koff(void) +{ + mips_hpt_frequency = idt_cpu_freq * IDT_CLOCK_MULT / 2; + + return mips_hpt_frequency / HZ; +} + +void __init plat_time_init(void) +{ + unsigned int est_freq, flags; + unsigned long r4k_offset; + + local_irq_save(flags); + + printk(KERN_INFO "calculating r4koff... "); + r4k_offset = cal_r4koff(); + printk("%08lx(%d)\n", r4k_offset, (int) r4k_offset); + + est_freq = 2 * r4k_offset * HZ; + est_freq += 5000; /* round */ + est_freq -= est_freq % 10000; + printk(KERN_INFO "CPU frequency %d.%02d MHz\n", est_freq / 1000000, + (est_freq % 1000000) * 100 / 1000000); + local_irq_restore(flags); +} diff --git a/arch/mips/sgi-ip22/ip22-platform.c b/arch/mips/sgi-ip22/ip22-platform.c index fc6df96305ed..60141235ec40 100644 --- a/arch/mips/sgi-ip22/ip22-platform.c +++ b/arch/mips/sgi-ip22/ip22-platform.c @@ -188,8 +188,7 @@ static int __init sgi_button_devinit(void) if (ip22_is_fullhouse()) return 0; /* full house has no volume buttons */ - return IS_ERR(platform_device_register_simple("sgiindybtns", - -1, NULL, 0)); + return IS_ERR(platform_device_register_simple("sgibtns", -1, NULL, 0)); } device_initcall(sgi_button_devinit); diff --git a/arch/mips/sgi-ip22/ip28-berr.c b/arch/mips/sgi-ip22/ip28-berr.c index fee7a2e0e538..30e12e2ec4b5 100644 --- a/arch/mips/sgi-ip22/ip28-berr.c +++ b/arch/mips/sgi-ip22/ip28-berr.c @@ -412,7 +412,7 @@ static int ip28_be_interrupt(const struct pt_regs *regs) * Now we have an asynchronous bus error, speculatively or DMA caused. * Need to search all DMA descriptors for the error address. */ - for (i = 0; i < ARRAY_SIZE(hpc3); ++i) { + for (i = 0; i < sizeof(hpc3)/sizeof(struct hpc3_stat); ++i) { struct hpc3_stat *hp = (struct hpc3_stat *)&hpc3 + i; if ((cpu_err_stat & CPU_ERRMASK) && (cpu_err_addr == hp->ndptr || cpu_err_addr == hp->cbp)) @@ -421,7 +421,7 @@ static int ip28_be_interrupt(const struct pt_regs *regs) (gio_err_addr == hp->ndptr || gio_err_addr == hp->cbp)) break; } - if (i < ARRAY_SIZE(hpc3)) { + if (i < sizeof(hpc3)/sizeof(struct hpc3_stat)) { struct hpc3_stat *hp = (struct hpc3_stat *)&hpc3 + i; printk(KERN_ERR "at DMA addresses: HPC3 @ %08lx:" " ctl %08x, ndp %08x, cbp %08x\n", diff --git a/arch/mips/sgi-ip32/ip32-platform.c b/arch/mips/sgi-ip32/ip32-platform.c index 2ee401ba0b25..3d63721e0e80 100644 --- a/arch/mips/sgi-ip32/ip32-platform.c +++ b/arch/mips/sgi-ip32/ip32-platform.c @@ -85,18 +85,7 @@ device_initcall(sgio2audio_devinit); static __init int sgio2btns_devinit(void) { - struct platform_device *pd; - int ret; - - pd = platform_device_alloc("sgio2btns", -1); - if (!pd) - return -ENOMEM; - - ret = platform_device_add(pd); - if (ret) - platform_device_put(pd); - - return ret; + return IS_ERR(platform_device_register_simple("sgibtns", -1, NULL, 0)); } device_initcall(sgio2btns_devinit); diff --git a/arch/mips/txx9/Kconfig b/arch/mips/txx9/Kconfig index b92a134ef124..6de4c5aa92be 100644 --- a/arch/mips/txx9/Kconfig +++ b/arch/mips/txx9/Kconfig @@ -7,6 +7,8 @@ config TOSHIBA_RBTX4927 bool "Toshiba RBTX49[23]7 board" depends on MACH_TX49XX select SOC_TX4927 + # TX4937 is subset of TX4938 + select SOC_TX4938 help This Toshiba board is based on the TX4927 processor. Say Y here to support this machine type diff --git a/arch/mips/txx9/generic/Makefile b/arch/mips/txx9/generic/Makefile index 668fdaad6448..9c120771e65f 100644 --- a/arch/mips/txx9/generic/Makefile +++ b/arch/mips/txx9/generic/Makefile @@ -4,8 +4,8 @@ obj-y += setup.o obj-$(CONFIG_PCI) += pci.o -obj-$(CONFIG_SOC_TX4927) += mem_tx4927.o irq_tx4927.o -obj-$(CONFIG_SOC_TX4938) += mem_tx4938.o irq_tx4938.o +obj-$(CONFIG_SOC_TX4927) += mem_tx4927.o setup_tx4927.o irq_tx4927.o +obj-$(CONFIG_SOC_TX4938) += mem_tx4927.o setup_tx4938.o irq_tx4938.o obj-$(CONFIG_TOSHIBA_FPCIB0) += smsc_fdc37m81x.o obj-$(CONFIG_KGDB) += dbgio.o diff --git a/arch/mips/txx9/generic/irq_tx4927.c b/arch/mips/txx9/generic/irq_tx4927.c index 6377bd8a9050..cbea1fdde82b 100644 --- a/arch/mips/txx9/generic/irq_tx4927.c +++ b/arch/mips/txx9/generic/irq_tx4927.c @@ -31,7 +31,7 @@ void __init tx4927_irq_init(void) { mips_cpu_irq_init(); - txx9_irq_init(TX4927_IRC_REG); + txx9_irq_init(TX4927_IRC_REG & 0xfffffffffULL); set_irq_chained_handler(MIPS_CPU_IRQ_BASE + TX4927_IRC_INT, handle_simple_irq); } diff --git a/arch/mips/txx9/generic/irq_tx4938.c b/arch/mips/txx9/generic/irq_tx4938.c index 5fc86c9c9d2f..6eac684bf190 100644 --- a/arch/mips/txx9/generic/irq_tx4938.c +++ b/arch/mips/txx9/generic/irq_tx4938.c @@ -19,7 +19,7 @@ void __init tx4938_irq_init(void) { mips_cpu_irq_init(); - txx9_irq_init(TX4938_IRC_REG); + txx9_irq_init(TX4938_IRC_REG & 0xfffffffffULL); set_irq_chained_handler(MIPS_CPU_IRQ_BASE + TX4938_IRC_INT, handle_simple_irq); } diff --git a/arch/mips/txx9/generic/mem_tx4927.c b/arch/mips/txx9/generic/mem_tx4927.c index 12dfc377bf2f..ef6ea6e97873 100644 --- a/arch/mips/txx9/generic/mem_tx4927.c +++ b/arch/mips/txx9/generic/mem_tx4927.c @@ -1,5 +1,5 @@ /* - * linux/arch/mips/tx4927/common/tx4927_prom.c + * linux/arch/mips/txx9/generic/mem_tx4927.c * * common tx4927 memory interface * @@ -32,8 +32,9 @@ #include <linux/init.h> #include <linux/types.h> #include <linux/io.h> +#include <asm/txx9/tx4927.h> -static unsigned int __init tx4927_process_sdccr(unsigned long addr) +static unsigned int __init tx4927_process_sdccr(u64 __iomem *addr) { u64 val; unsigned int sdccr_ce; @@ -45,97 +46,32 @@ static unsigned int __init tx4927_process_sdccr(unsigned long addr) unsigned int rs = 0; unsigned int cs = 0; unsigned int mw = 0; - unsigned int msize = 0; - val = __raw_readq((void __iomem *)addr); + val = __raw_readq(addr); /* MVMCP -- need #defs for these bits masks */ sdccr_ce = ((val & (1 << 10)) >> 10); sdccr_bs = ((val & (1 << 8)) >> 8); sdccr_rs = ((val & (3 << 5)) >> 5); - sdccr_cs = ((val & (3 << 2)) >> 2); + sdccr_cs = ((val & (7 << 2)) >> 2); sdccr_mw = ((val & (1 << 0)) >> 0); if (sdccr_ce) { - switch (sdccr_bs) { - case 0:{ - bs = 2; - break; - } - case 1:{ - bs = 4; - break; - } - } - switch (sdccr_rs) { - case 0:{ - rs = 2048; - break; - } - case 1:{ - rs = 4096; - break; - } - case 2:{ - rs = 8192; - break; - } - case 3:{ - rs = 0; - break; - } - } - switch (sdccr_cs) { - case 0:{ - cs = 256; - break; - } - case 1:{ - cs = 512; - break; - } - case 2:{ - cs = 1024; - break; - } - case 3:{ - cs = 2048; - break; - } - } - switch (sdccr_mw) { - case 0:{ - mw = 8; - break; - } /* 8 bytes = 64 bits */ - case 1:{ - mw = 4; - break; - } /* 4 bytes = 32 bits */ - } + bs = 2 << sdccr_bs; + rs = 2048 << sdccr_rs; + cs = 256 << sdccr_cs; + mw = 8 >> sdccr_mw; } - /* bytes per chip MB per chip num chips */ - msize = (((rs * cs * mw) / (1024 * 1024)) * bs); - - return (msize); + return rs * cs * mw * bs; } - unsigned int __init tx4927_get_mem_size(void) { - unsigned int c0; - unsigned int c1; - unsigned int c2; - unsigned int c3; - unsigned int total; - - /* MVMCP -- need #defs for these registers */ - c0 = tx4927_process_sdccr(0xff1f8000); - c1 = tx4927_process_sdccr(0xff1f8008); - c2 = tx4927_process_sdccr(0xff1f8010); - c3 = tx4927_process_sdccr(0xff1f8018); - total = c0 + c1 + c2 + c3; + unsigned int total = 0; + int i; - return (total); + for (i = 0; i < ARRAY_SIZE(tx4927_sdramcptr->cr); i++) + total += tx4927_process_sdccr(&tx4927_sdramcptr->cr[i]); + return total; } diff --git a/arch/mips/txx9/generic/mem_tx4938.c b/arch/mips/txx9/generic/mem_tx4938.c deleted file mode 100644 index 20baeaeba4cd..000000000000 --- a/arch/mips/txx9/generic/mem_tx4938.c +++ /dev/null @@ -1,124 +0,0 @@ -/* - * linux/arch/mips/tx4938/common/prom.c - * - * common tx4938 memory interface - * Copyright (C) 2000-2001 Toshiba Corporation - * - * 2003-2005 (c) MontaVista Software, Inc. This file is licensed under the - * terms of the GNU General Public License version 2. This program is - * licensed "as is" without any warranty of any kind, whether express - * or implied. - * - * Support for TX4938 in 2.6 - Manish Lachwani (mlachwani@mvista.com) - */ - -#include <linux/init.h> -#include <linux/types.h> -#include <linux/io.h> - -static unsigned int __init -tx4938_process_sdccr(u64 * addr) -{ - u64 val; - unsigned int sdccr_ce; - unsigned int sdccr_rs; - unsigned int sdccr_cs; - unsigned int sdccr_mw; - unsigned int rs = 0; - unsigned int cs = 0; - unsigned int mw = 0; - unsigned int bc = 4; - unsigned int msize = 0; - - val = ____raw_readq((void __iomem *)addr); - - /* MVMCP -- need #defs for these bits masks */ - sdccr_ce = ((val & (1 << 10)) >> 10); - sdccr_rs = ((val & (3 << 5)) >> 5); - sdccr_cs = ((val & (7 << 2)) >> 2); - sdccr_mw = ((val & (1 << 0)) >> 0); - - if (sdccr_ce) { - switch (sdccr_rs) { - case 0:{ - rs = 2048; - break; - } - case 1:{ - rs = 4096; - break; - } - case 2:{ - rs = 8192; - break; - } - default:{ - rs = 0; - break; - } - } - switch (sdccr_cs) { - case 0:{ - cs = 256; - break; - } - case 1:{ - cs = 512; - break; - } - case 2:{ - cs = 1024; - break; - } - case 3:{ - cs = 2048; - break; - } - case 4:{ - cs = 4096; - break; - } - default:{ - cs = 0; - break; - } - } - switch (sdccr_mw) { - case 0:{ - mw = 8; - break; - } /* 8 bytes = 64 bits */ - case 1:{ - mw = 4; - break; - } /* 4 bytes = 32 bits */ - } - } - - /* bytes per chip MB per chip bank count */ - msize = (((rs * cs * mw) / (1024 * 1024)) * (bc)); - - /* MVMCP -- bc hard coded to 4 from table 9.3.1 */ - /* boad supports bc=2 but no way to detect */ - - return (msize); -} - -unsigned int __init -tx4938_get_mem_size(void) -{ - unsigned int c0; - unsigned int c1; - unsigned int c2; - unsigned int c3; - unsigned int total; - - /* MVMCP -- need #defs for these registers */ - c0 = tx4938_process_sdccr((u64 *) 0xff1f8000); - c1 = tx4938_process_sdccr((u64 *) 0xff1f8008); - c2 = tx4938_process_sdccr((u64 *) 0xff1f8010); - c3 = tx4938_process_sdccr((u64 *) 0xff1f8018); - total = c0 + c1 + c2 + c3; - - return (total); -} diff --git a/arch/mips/txx9/generic/setup.c b/arch/mips/txx9/generic/setup.c index 5afc5d5cab03..8c60c78b9a9e 100644 --- a/arch/mips/txx9/generic/setup.c +++ b/arch/mips/txx9/generic/setup.c @@ -19,7 +19,9 @@ #include <linux/module.h> #include <linux/clk.h> #include <linux/err.h> +#include <linux/gpio.h> #include <asm/bootinfo.h> +#include <asm/time.h> #include <asm/txx9/generic.h> #ifdef CONFIG_CPU_TX49XX #include <asm/txx9/tx4938.h> @@ -30,6 +32,7 @@ struct resource txx9_ce_res[8]; static char txx9_ce_res_name[8][4]; /* "CEn" */ /* pcode, internal register */ +unsigned int txx9_pcode; char txx9_pcode_str[8]; static struct resource txx9_reg_res = { .name = txx9_pcode_str, @@ -59,15 +62,16 @@ unsigned int txx9_master_clock; unsigned int txx9_cpu_clock; unsigned int txx9_gbus_clock; +int txx9_ccfg_toeon __initdata = 1; /* Minimum CLK support */ struct clk *clk_get(struct device *dev, const char *id) { if (!strcmp(id, "spi-baseclk")) - return (struct clk *)(txx9_gbus_clock / 2 / 4); + return (struct clk *)((unsigned long)txx9_gbus_clock / 2 / 4); if (!strcmp(id, "imbus_clk")) - return (struct clk *)(txx9_gbus_clock / 2); + return (struct clk *)((unsigned long)txx9_gbus_clock / 2); return ERR_PTR(-ENOENT); } EXPORT_SYMBOL(clk_get); @@ -94,6 +98,22 @@ void clk_put(struct clk *clk) } EXPORT_SYMBOL(clk_put); +/* GPIO support */ + +#ifdef CONFIG_GENERIC_GPIO +int gpio_to_irq(unsigned gpio) +{ + return -EINVAL; +} +EXPORT_SYMBOL(gpio_to_irq); + +int irq_to_gpio(unsigned irq) +{ + return -EINVAL; +} +EXPORT_SYMBOL(irq_to_gpio); +#endif + extern struct txx9_board_vec jmr3927_vec; extern struct txx9_board_vec rbtx4927_vec; extern struct txx9_board_vec rbtx4937_vec; @@ -107,6 +127,12 @@ void __init prom_init_cmdline(void) int argc = (int)fw_arg0; char **argv = (char **)fw_arg1; int i; /* Always ignore the "-c" at argv[0] */ +#ifdef CONFIG_64BIT + char *fixed_argv[32]; + for (i = 0; i < argc; i++) + fixed_argv[i] = (char *)(long)(*((__s32 *)argv + i)); + argv = fixed_argv; +#endif /* ignore all built-in args if any f/w args given */ if (argc > 1) @@ -126,15 +152,19 @@ void __init prom_init(void) #endif #ifdef CONFIG_CPU_TX49XX switch (TX4938_REV_PCODE()) { +#ifdef CONFIG_TOSHIBA_RBTX4927 case 0x4927: txx9_board_vec = &rbtx4927_vec; break; case 0x4937: txx9_board_vec = &rbtx4937_vec; break; +#endif +#ifdef CONFIG_TOSHIBA_RBTX4938 case 0x4938: txx9_board_vec = &rbtx4938_vec; break; +#endif } #endif @@ -160,6 +190,10 @@ char * __init prom_getcmdline(void) /* wrappers */ void __init plat_mem_setup(void) { + ioport_resource.start = 0; + ioport_resource.end = ~0UL; /* no limit */ + iomem_resource.start = 0; + iomem_resource.end = ~0UL; /* no limit */ txx9_board_vec->mem_setup(); } diff --git a/arch/mips/txx9/generic/setup_tx4927.c b/arch/mips/txx9/generic/setup_tx4927.c new file mode 100644 index 000000000000..89d6e28add93 --- /dev/null +++ b/arch/mips/txx9/generic/setup_tx4927.c @@ -0,0 +1,194 @@ +/* + * TX4927 setup routines + * Based on linux/arch/mips/txx9/rbtx4938/setup.c, + * and RBTX49xx patch from CELF patch archive. + * + * 2003-2005 (c) MontaVista Software, Inc. + * (C) Copyright TOSHIBA CORPORATION 2000-2001, 2004-2007 + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ +#include <linux/init.h> +#include <linux/ioport.h> +#include <linux/delay.h> +#include <linux/serial_core.h> +#include <linux/param.h> +#include <asm/txx9irq.h> +#include <asm/txx9tmr.h> +#include <asm/txx9pio.h> +#include <asm/txx9/generic.h> +#include <asm/txx9/tx4927.h> + +void __init tx4927_wdr_init(void) +{ + /* clear WatchDogReset (W1C) */ + tx4927_ccfg_set(TX4927_CCFG_WDRST); + /* do reset on watchdog */ + tx4927_ccfg_set(TX4927_CCFG_WR); +} + +static struct resource tx4927_sdram_resource[4]; + +void __init tx4927_setup(void) +{ + int i; + __u32 divmode; + int cpuclk = 0; + u64 ccfg; + + txx9_reg_res_init(TX4927_REV_PCODE(), TX4927_REG_BASE, + TX4927_REG_SIZE); + + /* SDRAMC,EBUSC are configured by PROM */ + for (i = 0; i < 8; i++) { + if (!(TX4927_EBUSC_CR(i) & 0x8)) + continue; /* disabled */ + txx9_ce_res[i].start = (unsigned long)TX4927_EBUSC_BA(i); + txx9_ce_res[i].end = + txx9_ce_res[i].start + TX4927_EBUSC_SIZE(i) - 1; + request_resource(&iomem_resource, &txx9_ce_res[i]); + } + + /* clocks */ + ccfg = ____raw_readq(&tx4927_ccfgptr->ccfg); + if (txx9_master_clock) { + /* calculate gbus_clock and cpu_clock from master_clock */ + divmode = (__u32)ccfg & TX4927_CCFG_DIVMODE_MASK; + switch (divmode) { + case TX4927_CCFG_DIVMODE_8: + case TX4927_CCFG_DIVMODE_10: + case TX4927_CCFG_DIVMODE_12: + case TX4927_CCFG_DIVMODE_16: + txx9_gbus_clock = txx9_master_clock * 4; break; + default: + txx9_gbus_clock = txx9_master_clock; + } + switch (divmode) { + case TX4927_CCFG_DIVMODE_2: + case TX4927_CCFG_DIVMODE_8: + cpuclk = txx9_gbus_clock * 2; break; + case TX4927_CCFG_DIVMODE_2_5: + case TX4927_CCFG_DIVMODE_10: + cpuclk = txx9_gbus_clock * 5 / 2; break; + case TX4927_CCFG_DIVMODE_3: + case TX4927_CCFG_DIVMODE_12: + cpuclk = txx9_gbus_clock * 3; break; + case TX4927_CCFG_DIVMODE_4: + case TX4927_CCFG_DIVMODE_16: + cpuclk = txx9_gbus_clock * 4; break; + } + txx9_cpu_clock = cpuclk; + } else { + if (txx9_cpu_clock == 0) + txx9_cpu_clock = 200000000; /* 200MHz */ + /* calculate gbus_clock and master_clock from cpu_clock */ + cpuclk = txx9_cpu_clock; + divmode = (__u32)ccfg & TX4927_CCFG_DIVMODE_MASK; + switch (divmode) { + case TX4927_CCFG_DIVMODE_2: + case TX4927_CCFG_DIVMODE_8: + txx9_gbus_clock = cpuclk / 2; break; + case TX4927_CCFG_DIVMODE_2_5: + case TX4927_CCFG_DIVMODE_10: + txx9_gbus_clock = cpuclk * 2 / 5; break; + case TX4927_CCFG_DIVMODE_3: + case TX4927_CCFG_DIVMODE_12: + txx9_gbus_clock = cpuclk / 3; break; + case TX4927_CCFG_DIVMODE_4: + case TX4927_CCFG_DIVMODE_16: + txx9_gbus_clock = cpuclk / 4; break; + } + switch (divmode) { + case TX4927_CCFG_DIVMODE_8: + case TX4927_CCFG_DIVMODE_10: + case TX4927_CCFG_DIVMODE_12: + case TX4927_CCFG_DIVMODE_16: + txx9_master_clock = txx9_gbus_clock / 4; break; + default: + txx9_master_clock = txx9_gbus_clock; + } + } + /* change default value to udelay/mdelay take reasonable time */ + loops_per_jiffy = txx9_cpu_clock / HZ / 2; + + /* CCFG */ + tx4927_wdr_init(); + /* clear BusErrorOnWrite flag (W1C) */ + tx4927_ccfg_set(TX4927_CCFG_BEOW); + /* enable Timeout BusError */ + if (txx9_ccfg_toeon) + tx4927_ccfg_set(TX4927_CCFG_TOE); + + /* DMA selection */ + txx9_clear64(&tx4927_ccfgptr->pcfg, TX4927_PCFG_DMASEL_ALL); + + /* Use external clock for external arbiter */ + if (!(____raw_readq(&tx4927_ccfgptr->ccfg) & TX4927_CCFG_PCIARB)) + txx9_clear64(&tx4927_ccfgptr->pcfg, TX4927_PCFG_PCICLKEN_ALL); + + printk(KERN_INFO "%s -- %dMHz(M%dMHz) CRIR:%08x CCFG:%llx PCFG:%llx\n", + txx9_pcode_str, + (cpuclk + 500000) / 1000000, + (txx9_master_clock + 500000) / 1000000, + (__u32)____raw_readq(&tx4927_ccfgptr->crir), + (unsigned long long)____raw_readq(&tx4927_ccfgptr->ccfg), + (unsigned long long)____raw_readq(&tx4927_ccfgptr->pcfg)); + + printk(KERN_INFO "%s SDRAMC --", txx9_pcode_str); + for (i = 0; i < 4; i++) { + __u64 cr = TX4927_SDRAMC_CR(i); + unsigned long base, size; + if (!((__u32)cr & 0x00000400)) + continue; /* disabled */ + base = (unsigned long)(cr >> 49) << 21; + size = (((unsigned long)(cr >> 33) & 0x7fff) + 1) << 21; + printk(" CR%d:%016llx", i, (unsigned long long)cr); + tx4927_sdram_resource[i].name = "SDRAM"; + tx4927_sdram_resource[i].start = base; + tx4927_sdram_resource[i].end = base + size - 1; + tx4927_sdram_resource[i].flags = IORESOURCE_MEM; + request_resource(&iomem_resource, &tx4927_sdram_resource[i]); + } + printk(" TR:%09llx\n", + (unsigned long long)____raw_readq(&tx4927_sdramcptr->tr)); + + /* TMR */ + /* disable all timers */ + for (i = 0; i < TX4927_NR_TMR; i++) + txx9_tmr_init(TX4927_TMR_REG(i) & 0xfffffffffULL); + + /* PIO */ + txx9_gpio_init(TX4927_PIO_REG & 0xfffffffffULL, 0, TX4927_NUM_PIO); + __raw_writel(0, &tx4927_pioptr->maskcpu); + __raw_writel(0, &tx4927_pioptr->maskext); +} + +void __init tx4927_time_init(unsigned int tmrnr) +{ + if (____raw_readq(&tx4927_ccfgptr->ccfg) & TX4927_CCFG_TINTDIS) + txx9_clockevent_init(TX4927_TMR_REG(tmrnr) & 0xfffffffffULL, + TXX9_IRQ_BASE + TX4927_IR_TMR(tmrnr), + TXX9_IMCLK); +} + +void __init tx4927_setup_serial(void) +{ +#ifdef CONFIG_SERIAL_TXX9 + int i; + struct uart_port req; + + for (i = 0; i < 2; i++) { + memset(&req, 0, sizeof(req)); + req.line = i; + req.iotype = UPIO_MEM; + req.membase = (unsigned char __iomem *)TX4927_SIO_REG(i); + req.mapbase = TX4927_SIO_REG(i) & 0xfffffffffULL; + req.irq = TXX9_IRQ_BASE + TX4927_IR_SIO(i); + req.flags |= UPF_BUGGY_UART /*HAVE_CTS_LINE*/; + req.uartclk = TXX9_IMCLK; + early_serial_txx9_setup(&req); + } +#endif /* CONFIG_SERIAL_TXX9 */ +} diff --git a/arch/mips/txx9/generic/setup_tx4938.c b/arch/mips/txx9/generic/setup_tx4938.c new file mode 100644 index 000000000000..317378d8579d --- /dev/null +++ b/arch/mips/txx9/generic/setup_tx4938.c @@ -0,0 +1,259 @@ +/* + * TX4938/4937 setup routines + * Based on linux/arch/mips/txx9/rbtx4938/setup.c, + * and RBTX49xx patch from CELF patch archive. + * + * 2003-2005 (c) MontaVista Software, Inc. + * (C) Copyright TOSHIBA CORPORATION 2000-2001, 2004-2007 + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ +#include <linux/init.h> +#include <linux/ioport.h> +#include <linux/delay.h> +#include <linux/serial_core.h> +#include <linux/param.h> +#include <asm/txx9irq.h> +#include <asm/txx9tmr.h> +#include <asm/txx9pio.h> +#include <asm/txx9/generic.h> +#include <asm/txx9/tx4938.h> + +void __init tx4938_wdr_init(void) +{ + /* clear WatchDogReset (W1C) */ + tx4938_ccfg_set(TX4938_CCFG_WDRST); + /* do reset on watchdog */ + tx4938_ccfg_set(TX4938_CCFG_WR); +} + +static struct resource tx4938_sdram_resource[4]; +static struct resource tx4938_sram_resource; + +#define TX4938_SRAM_SIZE 0x800 + +void __init tx4938_setup(void) +{ + int i; + __u32 divmode; + int cpuclk = 0; + u64 ccfg; + + txx9_reg_res_init(TX4938_REV_PCODE(), TX4938_REG_BASE, + TX4938_REG_SIZE); + + /* SDRAMC,EBUSC are configured by PROM */ + for (i = 0; i < 8; i++) { + if (!(TX4938_EBUSC_CR(i) & 0x8)) + continue; /* disabled */ + txx9_ce_res[i].start = (unsigned long)TX4938_EBUSC_BA(i); + txx9_ce_res[i].end = + txx9_ce_res[i].start + TX4938_EBUSC_SIZE(i) - 1; + request_resource(&iomem_resource, &txx9_ce_res[i]); + } + + /* clocks */ + ccfg = ____raw_readq(&tx4938_ccfgptr->ccfg); + if (txx9_master_clock) { + /* calculate gbus_clock and cpu_clock from master_clock */ + divmode = (__u32)ccfg & TX4938_CCFG_DIVMODE_MASK; + switch (divmode) { + case TX4938_CCFG_DIVMODE_8: + case TX4938_CCFG_DIVMODE_10: + case TX4938_CCFG_DIVMODE_12: + case TX4938_CCFG_DIVMODE_16: + case TX4938_CCFG_DIVMODE_18: + txx9_gbus_clock = txx9_master_clock * 4; break; + default: + txx9_gbus_clock = txx9_master_clock; + } + switch (divmode) { + case TX4938_CCFG_DIVMODE_2: + case TX4938_CCFG_DIVMODE_8: + cpuclk = txx9_gbus_clock * 2; break; + case TX4938_CCFG_DIVMODE_2_5: + case TX4938_CCFG_DIVMODE_10: + cpuclk = txx9_gbus_clock * 5 / 2; break; + case TX4938_CCFG_DIVMODE_3: + case TX4938_CCFG_DIVMODE_12: + cpuclk = txx9_gbus_clock * 3; break; + case TX4938_CCFG_DIVMODE_4: + case TX4938_CCFG_DIVMODE_16: + cpuclk = txx9_gbus_clock * 4; break; + case TX4938_CCFG_DIVMODE_4_5: + case TX4938_CCFG_DIVMODE_18: + cpuclk = txx9_gbus_clock * 9 / 2; break; + } + txx9_cpu_clock = cpuclk; + } else { + if (txx9_cpu_clock == 0) + txx9_cpu_clock = 300000000; /* 300MHz */ + /* calculate gbus_clock and master_clock from cpu_clock */ + cpuclk = txx9_cpu_clock; + divmode = (__u32)ccfg & TX4938_CCFG_DIVMODE_MASK; + switch (divmode) { + case TX4938_CCFG_DIVMODE_2: + case TX4938_CCFG_DIVMODE_8: + txx9_gbus_clock = cpuclk / 2; break; + case TX4938_CCFG_DIVMODE_2_5: + case TX4938_CCFG_DIVMODE_10: + txx9_gbus_clock = cpuclk * 2 / 5; break; + case TX4938_CCFG_DIVMODE_3: + case TX4938_CCFG_DIVMODE_12: + txx9_gbus_clock = cpuclk / 3; break; + case TX4938_CCFG_DIVMODE_4: + case TX4938_CCFG_DIVMODE_16: + txx9_gbus_clock = cpuclk / 4; break; + case TX4938_CCFG_DIVMODE_4_5: + case TX4938_CCFG_DIVMODE_18: + txx9_gbus_clock = cpuclk * 2 / 9; break; + } + switch (divmode) { + case TX4938_CCFG_DIVMODE_8: + case TX4938_CCFG_DIVMODE_10: + case TX4938_CCFG_DIVMODE_12: + case TX4938_CCFG_DIVMODE_16: + case TX4938_CCFG_DIVMODE_18: + txx9_master_clock = txx9_gbus_clock / 4; break; + default: + txx9_master_clock = txx9_gbus_clock; + } + } + /* change default value to udelay/mdelay take reasonable time */ + loops_per_jiffy = txx9_cpu_clock / HZ / 2; + + /* CCFG */ + tx4938_wdr_init(); + /* clear BusErrorOnWrite flag (W1C) */ + tx4938_ccfg_set(TX4938_CCFG_BEOW); + /* enable Timeout BusError */ + if (txx9_ccfg_toeon) + tx4938_ccfg_set(TX4938_CCFG_TOE); + + /* DMA selection */ + txx9_clear64(&tx4938_ccfgptr->pcfg, TX4938_PCFG_DMASEL_ALL); + + /* Use external clock for external arbiter */ + if (!(____raw_readq(&tx4938_ccfgptr->ccfg) & TX4938_CCFG_PCIARB)) + txx9_clear64(&tx4938_ccfgptr->pcfg, TX4938_PCFG_PCICLKEN_ALL); + + printk(KERN_INFO "%s -- %dMHz(M%dMHz) CRIR:%08x CCFG:%llx PCFG:%llx\n", + txx9_pcode_str, + (cpuclk + 500000) / 1000000, + (txx9_master_clock + 500000) / 1000000, + (__u32)____raw_readq(&tx4938_ccfgptr->crir), + (unsigned long long)____raw_readq(&tx4938_ccfgptr->ccfg), + (unsigned long long)____raw_readq(&tx4938_ccfgptr->pcfg)); + + printk(KERN_INFO "%s SDRAMC --", txx9_pcode_str); + for (i = 0; i < 4; i++) { + __u64 cr = TX4938_SDRAMC_CR(i); + unsigned long base, size; + if (!((__u32)cr & 0x00000400)) + continue; /* disabled */ + base = (unsigned long)(cr >> 49) << 21; + size = (((unsigned long)(cr >> 33) & 0x7fff) + 1) << 21; + printk(" CR%d:%016llx", i, (unsigned long long)cr); + tx4938_sdram_resource[i].name = "SDRAM"; + tx4938_sdram_resource[i].start = base; + tx4938_sdram_resource[i].end = base + size - 1; + tx4938_sdram_resource[i].flags = IORESOURCE_MEM; + request_resource(&iomem_resource, &tx4938_sdram_resource[i]); + } + printk(" TR:%09llx\n", + (unsigned long long)____raw_readq(&tx4938_sdramcptr->tr)); + + /* SRAM */ + if (txx9_pcode == 0x4938 && ____raw_readq(&tx4938_sramcptr->cr) & 1) { + unsigned int size = TX4938_SRAM_SIZE; + tx4938_sram_resource.name = "SRAM"; + tx4938_sram_resource.start = + (____raw_readq(&tx4938_sramcptr->cr) >> (39-11)) + & ~(size - 1); + tx4938_sram_resource.end = + tx4938_sram_resource.start + TX4938_SRAM_SIZE - 1; + tx4938_sram_resource.flags = IORESOURCE_MEM; + request_resource(&iomem_resource, &tx4938_sram_resource); + } + + /* TMR */ + /* disable all timers */ + for (i = 0; i < TX4938_NR_TMR; i++) + txx9_tmr_init(TX4938_TMR_REG(i) & 0xfffffffffULL); + + /* DMA */ + for (i = 0; i < 2; i++) + ____raw_writeq(TX4938_DMA_MCR_MSTEN, + (void __iomem *)(TX4938_DMA_REG(i) + 0x50)); + + /* PIO */ + txx9_gpio_init(TX4938_PIO_REG & 0xfffffffffULL, 0, TX4938_NUM_PIO); + __raw_writel(0, &tx4938_pioptr->maskcpu); + __raw_writel(0, &tx4938_pioptr->maskext); + + if (txx9_pcode == 0x4938) { + __u64 pcfg = ____raw_readq(&tx4938_ccfgptr->pcfg); + /* set PCIC1 reset */ + txx9_set64(&tx4938_ccfgptr->clkctr, TX4938_CLKCTR_PCIC1RST); + if (pcfg & (TX4938_PCFG_ETH0_SEL | TX4938_PCFG_ETH1_SEL)) { + mdelay(1); /* at least 128 cpu clock */ + /* clear PCIC1 reset */ + txx9_clear64(&tx4938_ccfgptr->clkctr, + TX4938_CLKCTR_PCIC1RST); + } else { + printk(KERN_INFO "%s: stop PCIC1\n", txx9_pcode_str); + /* stop PCIC1 */ + txx9_set64(&tx4938_ccfgptr->clkctr, + TX4938_CLKCTR_PCIC1CKD); + } + if (!(pcfg & TX4938_PCFG_ETH0_SEL)) { + printk(KERN_INFO "%s: stop ETH0\n", txx9_pcode_str); + txx9_set64(&tx4938_ccfgptr->clkctr, + TX4938_CLKCTR_ETH0RST); + txx9_set64(&tx4938_ccfgptr->clkctr, + TX4938_CLKCTR_ETH0CKD); + } + if (!(pcfg & TX4938_PCFG_ETH1_SEL)) { + printk(KERN_INFO "%s: stop ETH1\n", txx9_pcode_str); + txx9_set64(&tx4938_ccfgptr->clkctr, + TX4938_CLKCTR_ETH1RST); + txx9_set64(&tx4938_ccfgptr->clkctr, + TX4938_CLKCTR_ETH1CKD); + } + } +} + +void __init tx4938_time_init(unsigned int tmrnr) +{ + if (____raw_readq(&tx4938_ccfgptr->ccfg) & TX4938_CCFG_TINTDIS) + txx9_clockevent_init(TX4938_TMR_REG(tmrnr) & 0xfffffffffULL, + TXX9_IRQ_BASE + TX4938_IR_TMR(tmrnr), + TXX9_IMCLK); +} + +void __init tx4938_setup_serial(void) +{ +#ifdef CONFIG_SERIAL_TXX9 + int i; + struct uart_port req; + unsigned int ch_mask = 0; + + if (__raw_readq(&tx4938_ccfgptr->pcfg) & TX4938_PCFG_ETH0_SEL) + ch_mask |= 1 << 1; /* disable SIO1 by PCFG setting */ + for (i = 0; i < 2; i++) { + if ((1 << i) & ch_mask) + continue; + memset(&req, 0, sizeof(req)); + req.line = i; + req.iotype = UPIO_MEM; + req.membase = (unsigned char __iomem *)TX4938_SIO_REG(i); + req.mapbase = TX4938_SIO_REG(i) & 0xfffffffffULL; + req.irq = TXX9_IRQ_BASE + TX4938_IR_SIO(i); + req.flags |= UPF_BUGGY_UART /*HAVE_CTS_LINE*/; + req.uartclk = TXX9_IMCLK; + early_serial_txx9_setup(&req); + } +#endif /* CONFIG_SERIAL_TXX9 */ +} diff --git a/arch/mips/txx9/jmr3927/setup.c b/arch/mips/txx9/jmr3927/setup.c index 5e35ef73c5a5..03647ebe4130 100644 --- a/arch/mips/txx9/jmr3927/setup.c +++ b/arch/mips/txx9/jmr3927/setup.c @@ -105,14 +105,6 @@ static void __init jmr3927_mem_setup(void) _machine_halt = jmr3927_machine_halt; pm_power_off = jmr3927_machine_power_off; - /* - * IO/MEM resources. - */ - ioport_resource.start = 0; - ioport_resource.end = 0xffffffff; - iomem_resource.start = 0; - iomem_resource.end = 0xffffffff; - /* Reboot on panic */ panic_timeout = 180; diff --git a/arch/mips/txx9/rbtx4927/irq.c b/arch/mips/txx9/rbtx4927/irq.c index 70f13211bc2a..cd748a930328 100644 --- a/arch/mips/txx9/rbtx4927/irq.c +++ b/arch/mips/txx9/rbtx4927/irq.c @@ -126,14 +126,12 @@ static struct irq_chip toshiba_rbtx4927_irq_ioc_type = { .mask_ack = toshiba_rbtx4927_irq_ioc_disable, .unmask = toshiba_rbtx4927_irq_ioc_enable, }; -#define TOSHIBA_RBTX4927_IOC_INTR_ENAB (void __iomem *)0xbc002000UL -#define TOSHIBA_RBTX4927_IOC_INTR_STAT (void __iomem *)0xbc002006UL static int toshiba_rbtx4927_irq_nested(int sw_irq) { u8 level3; - level3 = readb(TOSHIBA_RBTX4927_IOC_INTR_STAT) & 0x1f; + level3 = readb(rbtx4927_imstat_addr) & 0x1f; if (level3) sw_irq = RBTX4927_IRQ_IOC + fls(level3) - 1; return (sw_irq); @@ -154,18 +152,18 @@ static void toshiba_rbtx4927_irq_ioc_enable(unsigned int irq) { unsigned char v; - v = readb(TOSHIBA_RBTX4927_IOC_INTR_ENAB); + v = readb(rbtx4927_imask_addr); v |= (1 << (irq - RBTX4927_IRQ_IOC)); - writeb(v, TOSHIBA_RBTX4927_IOC_INTR_ENAB); + writeb(v, rbtx4927_imask_addr); } static void toshiba_rbtx4927_irq_ioc_disable(unsigned int irq) { unsigned char v; - v = readb(TOSHIBA_RBTX4927_IOC_INTR_ENAB); + v = readb(rbtx4927_imask_addr); v &= ~(1 << (irq - RBTX4927_IRQ_IOC)); - writeb(v, TOSHIBA_RBTX4927_IOC_INTR_ENAB); + writeb(v, rbtx4927_imask_addr); mmiowb(); } diff --git a/arch/mips/txx9/rbtx4927/prom.c b/arch/mips/txx9/rbtx4927/prom.c index 942e627d2dc1..5c0de54ebdd2 100644 --- a/arch/mips/txx9/rbtx4927/prom.c +++ b/arch/mips/txx9/rbtx4927/prom.c @@ -36,10 +36,6 @@ void __init rbtx4927_prom_init(void) { - extern int tx4927_get_mem_size(void); - int msize; - prom_init_cmdline(); - msize = tx4927_get_mem_size(); - add_memory_region(0, msize << 20, BOOT_MEM_RAM); + add_memory_region(0, tx4927_get_mem_size(), BOOT_MEM_RAM); } diff --git a/arch/mips/txx9/rbtx4927/setup.c b/arch/mips/txx9/rbtx4927/setup.c index 1657fd935da8..3da20ea3e55c 100644 --- a/arch/mips/txx9/rbtx4927/setup.c +++ b/arch/mips/txx9/rbtx4927/setup.c @@ -53,17 +53,10 @@ #include <asm/io.h> #include <asm/processor.h> #include <asm/reboot.h> -#include <asm/time.h> -#include <asm/txx9tmr.h> #include <asm/txx9/generic.h> #include <asm/txx9/pci.h> #include <asm/txx9/rbtx4927.h> #include <asm/txx9/tx4938.h> /* for TX4937 */ -#ifdef CONFIG_SERIAL_TXX9 -#include <linux/serial_core.h> -#endif - -static int tx4927_ccfg_toeon = 1; #ifdef CONFIG_PCI static void __init tx4927_pci_setup(void) @@ -184,14 +177,14 @@ static void toshiba_rbtx4927_restart(char *command) printk(KERN_NOTICE "System Rebooting...\n"); /* enable the s/w reset register */ - writeb(RBTX4927_SW_RESET_ENABLE_SET, RBTX4927_SW_RESET_ENABLE); + writeb(1, rbtx4927_softresetlock_addr); /* wait for enable to be seen */ - while ((readb(RBTX4927_SW_RESET_ENABLE) & - RBTX4927_SW_RESET_ENABLE_SET) == 0x00); + while (!(readb(rbtx4927_softresetlock_addr) & 1)) + ; /* do a s/w reset */ - writeb(RBTX4927_SW_RESET_DO_SET, RBTX4927_SW_RESET_DO); + writeb(1, rbtx4927_softreset_addr); /* do something passive while waiting for reset */ local_irq_disable(); @@ -213,9 +206,11 @@ static void toshiba_rbtx4927_power_off(void) /* no return */ } +static void __init rbtx4927_clock_init(void); +static void __init rbtx4937_clock_init(void); + static void __init rbtx4927_mem_setup(void) { - int i; u32 cp0_config; char *argptr; @@ -227,16 +222,18 @@ static void __init rbtx4927_mem_setup(void) cp0_config = cp0_config & ~(TX49_CONF_IC | TX49_CONF_DC); write_c0_config(cp0_config); - ioport_resource.end = 0xffffffff; - iomem_resource.end = 0xffffffff; + if (TX4927_REV_PCODE() == 0x4927) { + rbtx4927_clock_init(); + tx4927_setup(); + } else { + rbtx4937_clock_init(); + tx4938_setup(); + } _machine_restart = toshiba_rbtx4927_restart; _machine_halt = toshiba_rbtx4927_halt; pm_power_off = toshiba_rbtx4927_power_off; - for (i = 0; i < TX4927_NR_TMR; i++) - txx9_tmr_init(TX4927_TMR_REG(0) & 0xfffffffffULL); - #ifdef CONFIG_PCI txx9_alloc_pci_controller(&txx9_primary_pcic, RBTX4927_PCIMEM, RBTX4927_PCIMEM_SIZE, @@ -245,36 +242,13 @@ static void __init rbtx4927_mem_setup(void) set_io_port_base(KSEG1 + RBTX4927_ISA_IO_OFFSET); #endif - /* CCFG */ - /* do reset on watchdog */ - tx4927_ccfg_set(TX4927_CCFG_WR); - /* enable Timeout BusError */ - if (tx4927_ccfg_toeon) - tx4927_ccfg_set(TX4927_CCFG_TOE); - -#ifdef CONFIG_SERIAL_TXX9 - { - extern int early_serial_txx9_setup(struct uart_port *port); - struct uart_port req; - for(i = 0; i < 2; i++) { - memset(&req, 0, sizeof(req)); - req.line = i; - req.iotype = UPIO_MEM; - req.membase = (char *)(0xff1ff300 + i * 0x100); - req.mapbase = 0xff1ff300 + i * 0x100; - req.irq = TXX9_IRQ_BASE + TX4927_IR_SIO(i); - req.flags |= UPF_BUGGY_UART /*HAVE_CTS_LINE*/; - req.uartclk = 50000000; - early_serial_txx9_setup(&req); - } - } + tx4927_setup_serial(); #ifdef CONFIG_SERIAL_TXX9_CONSOLE argptr = prom_getcmdline(); if (strstr(argptr, "console=") == NULL) { strcat(argptr, " console=ttyS0,38400"); } #endif -#endif #ifdef CONFIG_ROOT_NFS argptr = prom_getcmdline(); @@ -291,19 +265,7 @@ static void __init rbtx4927_mem_setup(void) #endif } -static void __init rbtx49x7_common_time_init(void) -{ - /* change default value to udelay/mdelay take reasonable time */ - loops_per_jiffy = txx9_cpu_clock / HZ / 2; - - mips_hpt_frequency = txx9_cpu_clock / 2; - if (____raw_readq(&tx4927_ccfgptr->ccfg) & TX4927_CCFG_TINTDIS) - txx9_clockevent_init(TX4927_TMR_REG(0) & 0xfffffffffULL, - TXX9_IRQ_BASE + 17, - 50000000); -} - -static void __init rbtx4927_time_init(void) +static void __init rbtx4927_clock_init(void) { /* * ASSUMPTION: PCIDIVMODE is configured for PCI 33MHz or 66MHz. @@ -325,11 +287,9 @@ static void __init rbtx4927_time_init(void) default: txx9_cpu_clock = 200000000; /* 200MHz */ } - - rbtx49x7_common_time_init(); } -static void __init rbtx4937_time_init(void) +static void __init rbtx4937_clock_init(void) { /* * ASSUMPTION: PCIDIVMODE is configured for PCI 33MHz or 66MHz. @@ -357,15 +317,18 @@ static void __init rbtx4937_time_init(void) default: txx9_cpu_clock = 333333333; /* 333MHz */ } +} - rbtx49x7_common_time_init(); +static void __init rbtx4927_time_init(void) +{ + tx4927_time_init(0); } static int __init toshiba_rbtx4927_rtc_init(void) { - static struct resource __initdata res = { - .start = 0x1c010000, - .end = 0x1c010000 + 0x800 - 1, + struct resource res = { + .start = RBTX4927_BRAMRTC_BASE - IO_BASE, + .end = RBTX4927_BRAMRTC_BASE - IO_BASE + 0x800 - 1, .flags = IORESOURCE_MEM, }; struct platform_device *dev = @@ -375,7 +338,7 @@ static int __init toshiba_rbtx4927_rtc_init(void) static int __init rbtx4927_ne_init(void) { - static struct resource __initdata res[] = { + struct resource res[] = { { .start = RBTX4927_RTL_8019_BASE, .end = RBTX4927_RTL_8019_BASE + 0x20 - 1, @@ -434,7 +397,7 @@ struct txx9_board_vec rbtx4937_vec __initdata = { .prom_init = rbtx4927_prom_init, .mem_setup = rbtx4927_mem_setup, .irq_setup = rbtx4927_irq_setup, - .time_init = rbtx4937_time_init, + .time_init = rbtx4927_time_init, .device_init = rbtx4927_device_init, .arch_init = rbtx4937_arch_init, #ifdef CONFIG_PCI diff --git a/arch/mips/txx9/rbtx4938/prom.c b/arch/mips/txx9/rbtx4938/prom.c index fbb37458ddb2..ee189519ce5a 100644 --- a/arch/mips/txx9/rbtx4938/prom.c +++ b/arch/mips/txx9/rbtx4938/prom.c @@ -18,12 +18,8 @@ void __init rbtx4938_prom_init(void) { - extern int tx4938_get_mem_size(void); - int msize; #ifndef CONFIG_TX4938_NAND_BOOT prom_init_cmdline(); #endif - - msize = tx4938_get_mem_size(); - add_memory_region(0, msize << 20, BOOT_MEM_RAM); + add_memory_region(0, tx4938_get_mem_size(), BOOT_MEM_RAM); } diff --git a/arch/mips/txx9/rbtx4938/setup.c b/arch/mips/txx9/rbtx4938/setup.c index aaa987ae0f83..6c2b99bb8af6 100644 --- a/arch/mips/txx9/rbtx4938/setup.c +++ b/arch/mips/txx9/rbtx4938/setup.c @@ -20,21 +20,14 @@ #include <linux/gpio.h> #include <asm/reboot.h> -#include <asm/time.h> -#include <asm/txx9tmr.h> #include <asm/io.h> #include <asm/txx9/generic.h> #include <asm/txx9/pci.h> #include <asm/txx9/rbtx4938.h> -#ifdef CONFIG_SERIAL_TXX9 -#include <linux/serial_core.h> -#endif #include <linux/spi/spi.h> #include <asm/txx9/spi.h> #include <asm/txx9pio.h> -static int tx4938_ccfg_toeon = 1; - static void rbtx4938_machine_halt(void) { printk(KERN_NOTICE "System Halted\n"); @@ -182,188 +175,10 @@ static void __init rbtx4938_spi_setup(void) } static struct resource rbtx4938_fpga_resource; -static struct resource tx4938_sdram_resource[4]; -static struct resource tx4938_sram_resource; - -void __init tx4938_board_setup(void) -{ - int i; - unsigned long divmode; - int cpuclk = 0; - unsigned long pcode = TX4938_REV_PCODE(); - - ioport_resource.start = 0; - ioport_resource.end = 0xffffffff; - iomem_resource.start = 0; - iomem_resource.end = 0xffffffff; /* expand to 4GB */ - - txx9_reg_res_init(pcode, TX4938_REG_BASE, - TX4938_REG_SIZE); - /* SDRAMC,EBUSC are configured by PROM */ - for (i = 0; i < 8; i++) { - if (!(TX4938_EBUSC_CR(i) & 0x8)) - continue; /* disabled */ - txx9_ce_res[i].start = (unsigned long)TX4938_EBUSC_BA(i); - txx9_ce_res[i].end = - txx9_ce_res[i].start + TX4938_EBUSC_SIZE(i) - 1; - request_resource(&iomem_resource, &txx9_ce_res[i]); - } - - /* clocks */ - if (txx9_master_clock) { - u64 ccfg = ____raw_readq(&tx4938_ccfgptr->ccfg); - /* calculate gbus_clock and cpu_clock_freq from master_clock */ - divmode = (__u32)ccfg & TX4938_CCFG_DIVMODE_MASK; - switch (divmode) { - case TX4938_CCFG_DIVMODE_8: - case TX4938_CCFG_DIVMODE_10: - case TX4938_CCFG_DIVMODE_12: - case TX4938_CCFG_DIVMODE_16: - case TX4938_CCFG_DIVMODE_18: - txx9_gbus_clock = txx9_master_clock * 4; break; - default: - txx9_gbus_clock = txx9_master_clock; - } - switch (divmode) { - case TX4938_CCFG_DIVMODE_2: - case TX4938_CCFG_DIVMODE_8: - cpuclk = txx9_gbus_clock * 2; break; - case TX4938_CCFG_DIVMODE_2_5: - case TX4938_CCFG_DIVMODE_10: - cpuclk = txx9_gbus_clock * 5 / 2; break; - case TX4938_CCFG_DIVMODE_3: - case TX4938_CCFG_DIVMODE_12: - cpuclk = txx9_gbus_clock * 3; break; - case TX4938_CCFG_DIVMODE_4: - case TX4938_CCFG_DIVMODE_16: - cpuclk = txx9_gbus_clock * 4; break; - case TX4938_CCFG_DIVMODE_4_5: - case TX4938_CCFG_DIVMODE_18: - cpuclk = txx9_gbus_clock * 9 / 2; break; - } - txx9_cpu_clock = cpuclk; - } else { - u64 ccfg = ____raw_readq(&tx4938_ccfgptr->ccfg); - if (txx9_cpu_clock == 0) { - txx9_cpu_clock = 300000000; /* 300MHz */ - } - /* calculate gbus_clock and master_clock from cpu_clock_freq */ - cpuclk = txx9_cpu_clock; - divmode = (__u32)ccfg & TX4938_CCFG_DIVMODE_MASK; - switch (divmode) { - case TX4938_CCFG_DIVMODE_2: - case TX4938_CCFG_DIVMODE_8: - txx9_gbus_clock = cpuclk / 2; break; - case TX4938_CCFG_DIVMODE_2_5: - case TX4938_CCFG_DIVMODE_10: - txx9_gbus_clock = cpuclk * 2 / 5; break; - case TX4938_CCFG_DIVMODE_3: - case TX4938_CCFG_DIVMODE_12: - txx9_gbus_clock = cpuclk / 3; break; - case TX4938_CCFG_DIVMODE_4: - case TX4938_CCFG_DIVMODE_16: - txx9_gbus_clock = cpuclk / 4; break; - case TX4938_CCFG_DIVMODE_4_5: - case TX4938_CCFG_DIVMODE_18: - txx9_gbus_clock = cpuclk * 2 / 9; break; - } - switch (divmode) { - case TX4938_CCFG_DIVMODE_8: - case TX4938_CCFG_DIVMODE_10: - case TX4938_CCFG_DIVMODE_12: - case TX4938_CCFG_DIVMODE_16: - case TX4938_CCFG_DIVMODE_18: - txx9_master_clock = txx9_gbus_clock / 4; break; - default: - txx9_master_clock = txx9_gbus_clock; - } - } - /* change default value to udelay/mdelay take reasonable time */ - loops_per_jiffy = txx9_cpu_clock / HZ / 2; - - /* CCFG */ - /* clear WatchDogReset,BusErrorOnWrite flag (W1C) */ - tx4938_ccfg_set(TX4938_CCFG_WDRST | TX4938_CCFG_BEOW); - /* do reset on watchdog */ - tx4938_ccfg_set(TX4938_CCFG_WR); - /* clear PCIC1 reset */ - txx9_clear64(&tx4938_ccfgptr->clkctr, TX4938_CLKCTR_PCIC1RST); - - /* enable Timeout BusError */ - if (tx4938_ccfg_toeon) - tx4938_ccfg_set(TX4938_CCFG_TOE); - - /* DMA selection */ - txx9_clear64(&tx4938_ccfgptr->pcfg, TX4938_PCFG_DMASEL_ALL); - - /* Use external clock for external arbiter */ - if (!(____raw_readq(&tx4938_ccfgptr->ccfg) & TX4938_CCFG_PCIARB)) - txx9_clear64(&tx4938_ccfgptr->pcfg, TX4938_PCFG_PCICLKEN_ALL); - - printk(KERN_INFO "%s -- %dMHz(M%dMHz) CRIR:%08x CCFG:%llx PCFG:%llx\n", - txx9_pcode_str, - (cpuclk + 500000) / 1000000, - (txx9_master_clock + 500000) / 1000000, - (__u32)____raw_readq(&tx4938_ccfgptr->crir), - (unsigned long long)____raw_readq(&tx4938_ccfgptr->ccfg), - (unsigned long long)____raw_readq(&tx4938_ccfgptr->pcfg)); - - printk(KERN_INFO "%s SDRAMC --", txx9_pcode_str); - for (i = 0; i < 4; i++) { - unsigned long long cr = tx4938_sdramcptr->cr[i]; - unsigned long ram_base, ram_size; - if (!((unsigned long)cr & 0x00000400)) - continue; /* disabled */ - ram_base = (unsigned long)(cr >> 49) << 21; - ram_size = ((unsigned long)(cr >> 33) + 1) << 21; - if (ram_base >= 0x20000000) - continue; /* high memory (ignore) */ - printk(" CR%d:%016Lx", i, cr); - tx4938_sdram_resource[i].name = "SDRAM"; - tx4938_sdram_resource[i].start = ram_base; - tx4938_sdram_resource[i].end = ram_base + ram_size - 1; - tx4938_sdram_resource[i].flags = IORESOURCE_MEM; - request_resource(&iomem_resource, &tx4938_sdram_resource[i]); - } - printk(" TR:%09Lx\n", tx4938_sdramcptr->tr); - - /* SRAM */ - if (tx4938_sramcptr->cr & 1) { - unsigned int size = 0x800; - unsigned long base = - (tx4938_sramcptr->cr >> (39-11)) & ~(size - 1); - tx4938_sram_resource.name = "SRAM"; - tx4938_sram_resource.start = base; - tx4938_sram_resource.end = base + size - 1; - tx4938_sram_resource.flags = IORESOURCE_MEM; - request_resource(&iomem_resource, &tx4938_sram_resource); - } - - /* TMR */ - for (i = 0; i < TX4938_NR_TMR; i++) - txx9_tmr_init(TX4938_TMR_REG(i) & 0xfffffffffULL); - - /* enable DMA */ - for (i = 0; i < 2; i++) - ____raw_writeq(TX4938_DMA_MCR_MSTEN, - (void __iomem *)(TX4938_DMA_REG(i) + 0x50)); - - /* PIO */ - __raw_writel(0, &tx4938_pioptr->maskcpu); - __raw_writel(0, &tx4938_pioptr->maskext); - -#ifdef CONFIG_PCI - txx9_alloc_pci_controller(&txx9_primary_pcic, 0, 0, 0, 0); -#endif -} static void __init rbtx4938_time_init(void) { - mips_hpt_frequency = txx9_cpu_clock / 2; - if (____raw_readq(&tx4938_ccfgptr->ccfg) & TX4938_CCFG_TINTDIS) - txx9_clockevent_init(TX4938_TMR_REG(0) & 0xfffffffffULL, - TXX9_IRQ_BASE + TX4938_IR_TMR(0), - txx9_gbus_clock / 2); + tx4938_time_init(0); } static void __init rbtx4938_mem_setup(void) @@ -371,39 +186,24 @@ static void __init rbtx4938_mem_setup(void) unsigned long long pcfg; char *argptr; - iomem_resource.end = 0xffffffff; /* 4GB */ - if (txx9_master_clock == 0) txx9_master_clock = 25000000; /* 25MHz */ - tx4938_board_setup(); -#ifndef CONFIG_PCI + + tx4938_setup(); + +#ifdef CONFIG_PCI + txx9_alloc_pci_controller(&txx9_primary_pcic, 0, 0, 0, 0); +#else set_io_port_base(RBTX4938_ETHER_BASE); #endif -#ifdef CONFIG_SERIAL_TXX9 - { - extern int early_serial_txx9_setup(struct uart_port *port); - int i; - struct uart_port req; - for(i = 0; i < 2; i++) { - memset(&req, 0, sizeof(req)); - req.line = i; - req.iotype = UPIO_MEM; - req.membase = (char *)(0xff1ff300 + i * 0x100); - req.mapbase = 0xff1ff300 + i * 0x100; - req.irq = RBTX4938_IRQ_IRC_SIO(i); - req.flags |= UPF_BUGGY_UART /*HAVE_CTS_LINE*/; - req.uartclk = 50000000; - early_serial_txx9_setup(&req); - } - } + tx4938_setup_serial(); #ifdef CONFIG_SERIAL_TXX9_CONSOLE argptr = prom_getcmdline(); if (strstr(argptr, "console=") == NULL) { strcat(argptr, " console=ttyS0,38400"); } #endif -#endif #ifdef CONFIG_TOSHIBA_RBTX4938_MPLEX_PIO58_61 printk("PIOSEL: disabling both ata and nand selection\n"); @@ -457,7 +257,7 @@ static void __init rbtx4938_mem_setup(void) rbtx4938_fpga_resource.start = CPHYSADDR(RBTX4938_FPGA_REG_ADDR); rbtx4938_fpga_resource.end = CPHYSADDR(RBTX4938_FPGA_REG_ADDR) + 0xffff; rbtx4938_fpga_resource.flags = IORESOURCE_MEM | IORESOURCE_BUSY; - if (request_resource(&iomem_resource, &rbtx4938_fpga_resource)) + if (request_resource(&txx9_ce_res[2], &rbtx4938_fpga_resource)) printk("request resource for fpga failed\n"); _machine_restart = rbtx4938_machine_restart; @@ -488,18 +288,6 @@ static int __init rbtx4938_ne_init(void) return IS_ERR(dev) ? PTR_ERR(dev) : 0; } -/* GPIO support */ - -int gpio_to_irq(unsigned gpio) -{ - return -EINVAL; -} - -int irq_to_gpio(unsigned irq) -{ - return -EINVAL; -} - static DEFINE_SPINLOCK(rbtx4938_spi_gpio_lock); static void rbtx4938_spi_gpio_set(struct gpio_chip *chip, unsigned int offset, @@ -579,7 +367,6 @@ static int __init rbtx4938_spi_init(void) static void __init rbtx4938_arch_init(void) { - txx9_gpio_init(TX4938_PIO_REG & 0xfffffffffULL, 0, 16); gpiochip_add(&rbtx4938_spi_gpio_chip); rbtx4938_pci_setup(); rbtx4938_spi_init(); diff --git a/arch/mn10300/kernel/mn10300-serial.c b/arch/mn10300/kernel/mn10300-serial.c index b9c268c6b2fb..8b054e7a8ae8 100644 --- a/arch/mn10300/kernel/mn10300-serial.c +++ b/arch/mn10300/kernel/mn10300-serial.c @@ -392,7 +392,7 @@ static int mask_test_and_clear(volatile u8 *ptr, u8 mask) static void mn10300_serial_receive_interrupt(struct mn10300_serial_port *port) { struct uart_icount *icount = &port->uart.icount; - struct tty_struct *tty = port->uart.info->tty; + struct tty_struct *tty = port->uart.info->port.tty; unsigned ix; int count; u8 st, ch, push, status, overrun; diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index d0d358d367ec..04e3449e1f42 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -4,7 +4,7 @@ EXTRA_CFLAGS += -Ivirt/kvm -Iarch/powerpc/kvm -common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o) +common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o) kvm-objs := $(common-objs) powerpc.o emulate.o booke_guest.o obj-$(CONFIG_KVM) += kvm.o diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 777e0f34e0ea..53826a5f6c06 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -145,6 +145,9 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_USER_MEMORY: r = 1; break; + case KVM_CAP_COALESCED_MMIO: + r = KVM_COALESCED_MMIO_PAGE_OFFSET; + break; default: r = 0; break; @@ -167,6 +170,10 @@ int kvm_arch_set_memory_region(struct kvm *kvm, return 0; } +void kvm_arch_flush_shadow(struct kvm *kvm) +{ +} + struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) { struct kvm_vcpu *vcpu; @@ -240,10 +247,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { } -void decache_vcpus_on_cpu(int cpu) -{ -} - int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg) { diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 84a7fed4cd4e..11230b0db957 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -31,7 +31,7 @@ static int psw_interrupts_disabled(struct kvm_vcpu *vcpu) } static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu, - struct interrupt_info *inti) + struct kvm_s390_interrupt_info *inti) { switch (inti->type) { case KVM_S390_INT_EMERGENCY: @@ -91,7 +91,7 @@ static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag) } static void __set_intercept_indicator(struct kvm_vcpu *vcpu, - struct interrupt_info *inti) + struct kvm_s390_interrupt_info *inti) { switch (inti->type) { case KVM_S390_INT_EMERGENCY: @@ -111,7 +111,7 @@ static void __set_intercept_indicator(struct kvm_vcpu *vcpu, } static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, - struct interrupt_info *inti) + struct kvm_s390_interrupt_info *inti) { const unsigned short table[] = { 2, 4, 4, 6 }; int rc, exception = 0; @@ -290,9 +290,9 @@ static int __try_deliver_ckc_interrupt(struct kvm_vcpu *vcpu) int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu) { - struct local_interrupt *li = &vcpu->arch.local_int; - struct float_interrupt *fi = vcpu->arch.local_int.float_int; - struct interrupt_info *inti; + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int; + struct kvm_s390_interrupt_info *inti; int rc = 0; if (atomic_read(&li->active)) { @@ -408,9 +408,9 @@ void kvm_s390_idle_wakeup(unsigned long data) void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) { - struct local_interrupt *li = &vcpu->arch.local_int; - struct float_interrupt *fi = vcpu->arch.local_int.float_int; - struct interrupt_info *n, *inti = NULL; + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int; + struct kvm_s390_interrupt_info *n, *inti = NULL; int deliver; __reset_intercept_indicators(vcpu); @@ -465,8 +465,8 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code) { - struct local_interrupt *li = &vcpu->arch.local_int; - struct interrupt_info *inti; + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + struct kvm_s390_interrupt_info *inti; inti = kzalloc(sizeof(*inti), GFP_KERNEL); if (!inti) @@ -487,9 +487,9 @@ int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code) int kvm_s390_inject_vm(struct kvm *kvm, struct kvm_s390_interrupt *s390int) { - struct local_interrupt *li; - struct float_interrupt *fi; - struct interrupt_info *inti; + struct kvm_s390_local_interrupt *li; + struct kvm_s390_float_interrupt *fi; + struct kvm_s390_interrupt_info *inti; int sigcpu; inti = kzalloc(sizeof(*inti), GFP_KERNEL); @@ -544,8 +544,8 @@ int kvm_s390_inject_vm(struct kvm *kvm, int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_interrupt *s390int) { - struct local_interrupt *li; - struct interrupt_info *inti; + struct kvm_s390_local_interrupt *li; + struct kvm_s390_interrupt_info *inti; inti = kzalloc(sizeof(*inti), GFP_KERNEL); if (!inti) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 6558b09ff579..1782cbcd2829 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -79,10 +79,6 @@ void kvm_arch_hardware_disable(void *garbage) { } -void decache_vcpus_on_cpu(int cpu) -{ -} - int kvm_arch_hardware_setup(void) { return 0; @@ -198,6 +194,7 @@ out_nokvm: void kvm_arch_destroy_vm(struct kvm *kvm) { debug_unregister(kvm->arch.dbf); + kvm_free_physmem(kvm); free_page((unsigned long)(kvm->arch.sca)); kfree(kvm); module_put(THIS_MODULE); @@ -250,11 +247,16 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->gbea = 1; } +/* The current code can have up to 256 pages for virtio */ +#define VIRTIODESCSPACE (256ul * 4096ul) + int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) { atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH); - vcpu->arch.sie_block->gmslm = 0xffffffffffUL; - vcpu->arch.sie_block->gmsor = 0x000000000000; + vcpu->arch.sie_block->gmslm = vcpu->kvm->arch.guest_memsize + + vcpu->kvm->arch.guest_origin + + VIRTIODESCSPACE - 1ul; + vcpu->arch.sie_block->gmsor = vcpu->kvm->arch.guest_origin; vcpu->arch.sie_block->ecb = 2; vcpu->arch.sie_block->eca = 0xC1002001U; setup_timer(&vcpu->arch.ckc_timer, kvm_s390_idle_wakeup, @@ -273,7 +275,8 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, if (!vcpu) goto out_nomem; - vcpu->arch.sie_block = (struct sie_block *) get_zeroed_page(GFP_KERNEL); + vcpu->arch.sie_block = (struct kvm_s390_sie_block *) + get_zeroed_page(GFP_KERNEL); if (!vcpu->arch.sie_block) goto out_free_cpu; @@ -672,6 +675,10 @@ int kvm_arch_set_memory_region(struct kvm *kvm, return 0; } +void kvm_arch_flush_shadow(struct kvm *kvm) +{ +} + gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) { return gfn; diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index c02286c6a931..2e2d2ffb6a07 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -199,7 +199,7 @@ out: static void handle_stsi_3_2_2(struct kvm_vcpu *vcpu, struct sysinfo_3_2_2 *mem) { - struct float_interrupt *fi = &vcpu->kvm->arch.float_int; + struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; int cpus = 0; int n; diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index 0a236acfb5f6..5a556114eaa5 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -45,7 +45,7 @@ static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr, u64 *reg) { - struct float_interrupt *fi = &vcpu->kvm->arch.float_int; + struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; int rc; if (cpu_addr >= KVM_MAX_VCPUS) @@ -71,9 +71,9 @@ static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr, u64 *reg) static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr) { - struct float_interrupt *fi = &vcpu->kvm->arch.float_int; - struct local_interrupt *li; - struct interrupt_info *inti; + struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; + struct kvm_s390_local_interrupt *li; + struct kvm_s390_interrupt_info *inti; int rc; if (cpu_addr >= KVM_MAX_VCPUS) @@ -108,9 +108,9 @@ unlock: static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int store) { - struct float_interrupt *fi = &vcpu->kvm->arch.float_int; - struct local_interrupt *li; - struct interrupt_info *inti; + struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; + struct kvm_s390_local_interrupt *li; + struct kvm_s390_interrupt_info *inti; int rc; if (cpu_addr >= KVM_MAX_VCPUS) @@ -169,9 +169,9 @@ static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter) static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address, u64 *reg) { - struct float_interrupt *fi = &vcpu->kvm->arch.float_int; - struct local_interrupt *li; - struct interrupt_info *inti; + struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; + struct kvm_s390_local_interrupt *li; + struct kvm_s390_interrupt_info *inti; int rc; u8 tmp; diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 96e0c2ebc388..03980cb04291 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -447,7 +447,6 @@ config PARAVIRT_DEBUG config MEMTEST bool "Memtest" - depends on X86_64 help This option adds a kernel parameter 'memtest', which allows memtest to be set. diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index abff1b84ed5b..54b8c02c71e6 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -362,10 +362,6 @@ config X86_ALIGNMENT_16 def_bool y depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || X86_ELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1 -config X86_GOOD_APIC - def_bool y - depends on MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || MK8 || MEFFICEON || MCORE2 || MVIAC7 || X86_64 - config X86_INTEL_USERCOPY def_bool y depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2 diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index ffd5913b35d1..85a87d2ac0c0 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -5,13 +5,15 @@ config TRACE_IRQFLAGS_SUPPORT source "lib/Kconfig.debug" -config NONPROMISC_DEVMEM +config STRICT_DEVMEM bool "Filter access to /dev/mem" help - If this option is left off, you allow userspace access to all + If this option is left on, you allow userspace (root) access to all of memory, including kernel and userspace memory. Accidental access to this is obviously disastrous, but specific access can - be used by people debugging the kernel. + be used by people debugging the kernel. Note that with PAT support + enabled, even in this case there are restrictions on /dev/mem + use due to the cache aliasing requirements. If this option is switched on, the /dev/mem file only allows userspace access to PCI space and the BIOS code and data regions. diff --git a/arch/x86/boot/edd.c b/arch/x86/boot/edd.c index 03399d64013b..d93cbc6464d0 100644 --- a/arch/x86/boot/edd.c +++ b/arch/x86/boot/edd.c @@ -167,9 +167,8 @@ void query_edd(void) * Scan the BIOS-supported hard disks and query EDD * information... */ - get_edd_info(devno, &ei); - - if (boot_params.eddbuf_entries < EDDMAXNR) { + if (!get_edd_info(devno, &ei) + && boot_params.eddbuf_entries < EDDMAXNR) { memcpy(edp, &ei, sizeof ei); edp++; boot_params.eddbuf_entries++; diff --git a/arch/x86/boot/pm.c b/arch/x86/boot/pm.c index 328956fdb59e..85a1cd8a8ff8 100644 --- a/arch/x86/boot/pm.c +++ b/arch/x86/boot/pm.c @@ -98,12 +98,6 @@ static void reset_coprocessor(void) /* * Set up the GDT */ -#define GDT_ENTRY(flags, base, limit) \ - (((u64)(base & 0xff000000) << 32) | \ - ((u64)flags << 40) | \ - ((u64)(limit & 0x00ff0000) << 32) | \ - ((u64)(base & 0x00ffffff) << 16) | \ - ((u64)(limit & 0x0000ffff))) struct gdt_ptr { u16 len; diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index 9bc34e2033ec..4d73f53287b6 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -2047,7 +2047,7 @@ CONFIG_PROVIDE_OHCI1394_DMA_INIT=y # CONFIG_SAMPLES is not set # CONFIG_KGDB is not set CONFIG_HAVE_ARCH_KGDB=y -# CONFIG_NONPROMISC_DEVMEM is not set +# CONFIG_STRICT_DEVMEM is not set CONFIG_EARLY_PRINTK=y CONFIG_DEBUG_STACKOVERFLOW=y CONFIG_DEBUG_STACK_USAGE=y diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index ae5124e064d4..a40452429625 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -2012,7 +2012,7 @@ CONFIG_PROVIDE_OHCI1394_DMA_INIT=y # CONFIG_SAMPLES is not set # CONFIG_KGDB is not set CONFIG_HAVE_ARCH_KGDB=y -# CONFIG_NONPROMISC_DEVMEM is not set +# CONFIG_STRICT_DEVMEM is not set CONFIG_EARLY_PRINTK=y CONFIG_DEBUG_STACKOVERFLOW=y CONFIG_DEBUG_STACK_USAGE=y diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index cb3856a18c85..20af4c79579a 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -36,6 +36,11 @@ #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) +#define FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_OF | \ + X86_EFLAGS_DF | X86_EFLAGS_TF | X86_EFLAGS_SF | \ + X86_EFLAGS_ZF | X86_EFLAGS_AF | X86_EFLAGS_PF | \ + X86_EFLAGS_CF) + asmlinkage int do_signal(struct pt_regs *regs, sigset_t *oldset); void signal_fault(struct pt_regs *regs, void __user *frame, char *where); @@ -248,7 +253,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs, regs->ss |= 3; err |= __get_user(tmpflags, &sc->flags); - regs->flags = (regs->flags & ~0x40DD5) | (tmpflags & 0x40DD5); + regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); /* disable syscall checks */ regs->orig_ax = -1; @@ -515,7 +520,6 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, compat_sigset_t *set, struct pt_regs *regs) { struct rt_sigframe __user *frame; - struct exec_domain *ed = current_thread_info()->exec_domain; void __user *restorer; int err = 0; @@ -538,8 +542,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) goto give_sigsegv; - err |= __put_user((ed && ed->signal_invmap && sig < 32 - ? ed->signal_invmap[sig] : sig), &frame->sig); + err |= __put_user(sig, &frame->sig); err |= __put_user(ptr_to_compat(&frame->info), &frame->pinfo); err |= __put_user(ptr_to_compat(&frame->uc), &frame->puc); err |= copy_siginfo_to_user32(&frame->info, info); diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 20371d0635e4..23d146ce676b 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -37,6 +37,11 @@ movq %rax,R8(%rsp) .endm + /* + * Reload arg registers from stack in case ptrace changed them. + * We don't reload %eax because syscall_trace_enter() returned + * the value it wants us to use in the table lookup. + */ .macro LOAD_ARGS32 offset movl \offset(%rsp),%r11d movl \offset+8(%rsp),%r10d @@ -46,7 +51,6 @@ movl \offset+48(%rsp),%edx movl \offset+56(%rsp),%esi movl \offset+64(%rsp),%edi - movl \offset+72(%rsp),%eax .endm .macro CFI_STARTPROC32 simple @@ -137,13 +141,12 @@ ENTRY(ia32_sysenter_target) .previous GET_THREAD_INFO(%r10) orl $TS_COMPAT,TI_status(%r10) - testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \ - TI_flags(%r10) + testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10) CFI_REMEMBER_STATE jnz sysenter_tracesys -sysenter_do_call: cmpl $(IA32_NR_syscalls-1),%eax ja ia32_badsys +sysenter_do_call: IA32_ARG_FIXUP 1 call *ia32_sys_call_table(,%rax,8) movq %rax,RAX-ARGOFFSET(%rsp) @@ -242,8 +245,7 @@ ENTRY(ia32_cstar_target) .previous GET_THREAD_INFO(%r10) orl $TS_COMPAT,TI_status(%r10) - testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \ - TI_flags(%r10) + testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10) CFI_REMEMBER_STATE jnz cstar_tracesys cstar_do_call: @@ -321,6 +323,7 @@ ENTRY(ia32_syscall) /*CFI_REL_OFFSET rflags,EFLAGS-RIP*/ /*CFI_REL_OFFSET cs,CS-RIP*/ CFI_REL_OFFSET rip,RIP-RIP + PARAVIRT_ADJUST_EXCEPTION_FRAME SWAPGS /* * No need to follow this irqs on/off section: the syscall @@ -336,8 +339,7 @@ ENTRY(ia32_syscall) SAVE_ARGS 0,0,1 GET_THREAD_INFO(%r10) orl $TS_COMPAT,TI_status(%r10) - testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \ - TI_flags(%r10) + testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10) jnz ia32_tracesys ia32_do_syscall: cmpl $(IA32_NR_syscalls-1),%eax diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index da140611bb57..b78a17b12810 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -102,6 +102,7 @@ obj-$(CONFIG_OLPC) += olpc.o # 64 bit specific files ifeq ($(CONFIG_X86_64),y) obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o + obj-y += bios_uv.o obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o obj-$(CONFIG_AUDIT) += audit_64.o diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 868de3d5c39d..a3ddad18aaa3 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -9,6 +9,7 @@ #include <linux/bootmem.h> #include <linux/dmi.h> #include <linux/cpumask.h> +#include <asm/segment.h> #include "realmode/wakeup.h" #include "sleep.h" @@ -23,15 +24,6 @@ static unsigned long acpi_realmode; static char temp_stack[10240]; #endif -/* XXX: this macro should move to asm-x86/segment.h and be shared with the - boot code... */ -#define GDT_ENTRY(flags, base, limit) \ - (((u64)(base & 0xff000000) << 32) | \ - ((u64)flags << 40) | \ - ((u64)(limit & 0x00ff0000) << 32) | \ - ((u64)(base & 0x00ffffff) << 16) | \ - ((u64)(limit & 0x0000ffff))) - /** * acpi_save_state_mem - save kernel state * diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index f2766d84c7a0..c25210e6ac88 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -23,7 +23,7 @@ #include <linux/scatterlist.h> #include <linux/iommu-helper.h> #include <asm/proto.h> -#include <asm/gart.h> +#include <asm/iommu.h> #include <asm/amd_iommu_types.h> #include <asm/amd_iommu.h> @@ -32,21 +32,37 @@ #define to_pages(addr, size) \ (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT) +#define EXIT_LOOP_COUNT 10000000 + static DEFINE_RWLOCK(amd_iommu_devtable_lock); -struct command { +/* + * general struct to manage commands send to an IOMMU + */ +struct iommu_cmd { u32 data[4]; }; static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, struct unity_map_entry *e); +/* returns !0 if the IOMMU is caching non-present entries in its TLB */ static int iommu_has_npcache(struct amd_iommu *iommu) { return iommu->cap & IOMMU_CAP_NPCACHE; } -static int __iommu_queue_command(struct amd_iommu *iommu, struct command *cmd) +/**************************************************************************** + * + * IOMMU command queuing functions + * + ****************************************************************************/ + +/* + * Writes the command to the IOMMUs command buffer and informs the + * hardware about the new command. Must be called with iommu->lock held. + */ +static int __iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) { u32 tail, head; u8 *target; @@ -63,7 +79,11 @@ static int __iommu_queue_command(struct amd_iommu *iommu, struct command *cmd) return 0; } -static int iommu_queue_command(struct amd_iommu *iommu, struct command *cmd) +/* + * General queuing function for commands. Takes iommu->lock and calls + * __iommu_queue_command(). + */ +static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) { unsigned long flags; int ret; @@ -75,16 +95,24 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct command *cmd) return ret; } +/* + * This function is called whenever we need to ensure that the IOMMU has + * completed execution of all commands we sent. It sends a + * COMPLETION_WAIT command and waits for it to finish. The IOMMU informs + * us about that by writing a value to a physical address we pass with + * the command. + */ static int iommu_completion_wait(struct amd_iommu *iommu) { int ret; - struct command cmd; + struct iommu_cmd cmd; volatile u64 ready = 0; unsigned long ready_phys = virt_to_phys(&ready); + unsigned long i = 0; memset(&cmd, 0, sizeof(cmd)); cmd.data[0] = LOW_U32(ready_phys) | CMD_COMPL_WAIT_STORE_MASK; - cmd.data[1] = HIGH_U32(ready_phys); + cmd.data[1] = upper_32_bits(ready_phys); cmd.data[2] = 1; /* value written to 'ready' */ CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT); @@ -95,15 +123,23 @@ static int iommu_completion_wait(struct amd_iommu *iommu) if (ret) return ret; - while (!ready) + while (!ready && (i < EXIT_LOOP_COUNT)) { + ++i; cpu_relax(); + } + + if (unlikely((i == EXIT_LOOP_COUNT) && printk_ratelimit())) + printk(KERN_WARNING "AMD IOMMU: Completion wait loop failed\n"); return 0; } +/* + * Command send function for invalidating a device table entry + */ static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid) { - struct command cmd; + struct iommu_cmd cmd; BUG_ON(iommu == NULL); @@ -116,20 +152,23 @@ static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid) return iommu_queue_command(iommu, &cmd); } +/* + * Generic command send function for invalidaing TLB entries + */ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu, u64 address, u16 domid, int pde, int s) { - struct command cmd; + struct iommu_cmd cmd; memset(&cmd, 0, sizeof(cmd)); address &= PAGE_MASK; CMD_SET_TYPE(&cmd, CMD_INV_IOMMU_PAGES); cmd.data[1] |= domid; cmd.data[2] = LOW_U32(address); - cmd.data[3] = HIGH_U32(address); - if (s) + cmd.data[3] = upper_32_bits(address); + if (s) /* size bit - we flush more than one 4kb page */ cmd.data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; - if (pde) + if (pde) /* PDE bit - we wan't flush everything not only the PTEs */ cmd.data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; iommu->need_sync = 1; @@ -137,6 +176,11 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu, return iommu_queue_command(iommu, &cmd); } +/* + * TLB invalidation function which is called from the mapping functions. + * It invalidates a single PTE if the range to flush is within a single + * page. Otherwise it flushes the whole TLB of the IOMMU. + */ static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid, u64 address, size_t size) { @@ -159,6 +203,20 @@ static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid, return 0; } +/**************************************************************************** + * + * The functions below are used the create the page table mappings for + * unity mapped regions. + * + ****************************************************************************/ + +/* + * Generic mapping functions. It maps a physical address into a DMA + * address space. It allocates the page table pages if necessary. + * In the future it can be extended to a generic mapping function + * supporting all features of AMD IOMMU page tables like level skipping + * and full 64 bit address spaces. + */ static int iommu_map(struct protection_domain *dom, unsigned long bus_addr, unsigned long phys_addr, @@ -209,6 +267,10 @@ static int iommu_map(struct protection_domain *dom, return 0; } +/* + * This function checks if a specific unity mapping entry is needed for + * this specific IOMMU. + */ static int iommu_for_unity_map(struct amd_iommu *iommu, struct unity_map_entry *entry) { @@ -223,6 +285,12 @@ static int iommu_for_unity_map(struct amd_iommu *iommu, return 0; } +/* + * Init the unity mappings for a specific IOMMU in the system + * + * Basically iterates over all unity mapping entries and applies them to + * the default domain DMA of that IOMMU if necessary. + */ static int iommu_init_unity_mappings(struct amd_iommu *iommu) { struct unity_map_entry *entry; @@ -239,6 +307,10 @@ static int iommu_init_unity_mappings(struct amd_iommu *iommu) return 0; } +/* + * This function actually applies the mapping to the page table of the + * dma_ops domain. + */ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, struct unity_map_entry *e) { @@ -261,6 +333,9 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, return 0; } +/* + * Inits the unity mappings required for a specific device + */ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, u16 devid) { @@ -278,12 +353,26 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, return 0; } +/**************************************************************************** + * + * The next functions belong to the address allocator for the dma_ops + * interface functions. They work like the allocators in the other IOMMU + * drivers. Its basically a bitmap which marks the allocated pages in + * the aperture. Maybe it could be enhanced in the future to a more + * efficient allocator. + * + ****************************************************************************/ static unsigned long dma_mask_to_pages(unsigned long mask) { return (mask >> PAGE_SHIFT) + (PAGE_ALIGN(mask & ~PAGE_MASK) >> PAGE_SHIFT); } +/* + * The address allocator core function. + * + * called with domain->lock held + */ static unsigned long dma_ops_alloc_addresses(struct device *dev, struct dma_ops_domain *dom, unsigned int pages) @@ -317,6 +406,11 @@ static unsigned long dma_ops_alloc_addresses(struct device *dev, return address; } +/* + * The address free function. + * + * called with domain->lock held + */ static void dma_ops_free_addresses(struct dma_ops_domain *dom, unsigned long address, unsigned int pages) @@ -325,6 +419,16 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom, iommu_area_free(dom->bitmap, address, pages); } +/**************************************************************************** + * + * The next functions belong to the domain allocation. A domain is + * allocated for every IOMMU as the default domain. If device isolation + * is enabled, every device get its own domain. The most important thing + * about domains is the page table mapping the DMA address space they + * contain. + * + ****************************************************************************/ + static u16 domain_id_alloc(void) { unsigned long flags; @@ -342,6 +446,10 @@ static u16 domain_id_alloc(void) return id; } +/* + * Used to reserve address ranges in the aperture (e.g. for exclusion + * ranges. + */ static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, unsigned long start_page, unsigned int pages) @@ -382,6 +490,10 @@ static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom) free_page((unsigned long)p1); } +/* + * Free a domain, only used if something went wrong in the + * allocation path and we need to free an already allocated page table + */ static void dma_ops_domain_free(struct dma_ops_domain *dom) { if (!dom) @@ -396,6 +508,11 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom) kfree(dom); } +/* + * Allocates a new protection domain usable for the dma_ops functions. + * It also intializes the page table and the address allocator data + * structures required for the dma_ops interface + */ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, unsigned order) { @@ -436,6 +553,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, dma_dom->bitmap[0] = 1; dma_dom->next_bit = 0; + /* Intialize the exclusion range if necessary */ if (iommu->exclusion_start && iommu->exclusion_start < dma_dom->aperture_size) { unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT; @@ -444,6 +562,11 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, dma_ops_reserve_addresses(dma_dom, startpage, pages); } + /* + * At the last step, build the page tables so we don't need to + * allocate page table pages in the dma_ops mapping/unmapping + * path. + */ num_pte_pages = dma_dom->aperture_size / (PAGE_SIZE * 512); dma_dom->pte_pages = kzalloc(num_pte_pages * sizeof(void *), GFP_KERNEL); @@ -472,6 +595,10 @@ free_dma_dom: return NULL; } +/* + * Find out the protection domain structure for a given PCI device. This + * will give us the pointer to the page table root for example. + */ static struct protection_domain *domain_for_device(u16 devid) { struct protection_domain *dom; @@ -484,6 +611,10 @@ static struct protection_domain *domain_for_device(u16 devid) return dom; } +/* + * If a device is not yet associated with a domain, this function does + * assigns it visible for the hardware + */ static void set_device_domain(struct amd_iommu *iommu, struct protection_domain *domain, u16 devid) @@ -508,6 +639,19 @@ static void set_device_domain(struct amd_iommu *iommu, iommu->need_sync = 1; } +/***************************************************************************** + * + * The next functions belong to the dma_ops mapping/unmapping code. + * + *****************************************************************************/ + +/* + * In the dma_ops path we only have the struct device. This function + * finds the corresponding IOMMU, the protection domain and the + * requestor id for a given device. + * If the device is not yet associated with a domain this is also done + * in this function. + */ static int get_device_resources(struct device *dev, struct amd_iommu **iommu, struct protection_domain **domain, @@ -520,8 +664,9 @@ static int get_device_resources(struct device *dev, BUG_ON(!dev || dev->bus != &pci_bus_type || !dev->dma_mask); pcidev = to_pci_dev(dev); - _bdf = (pcidev->bus->number << 8) | pcidev->devfn; + _bdf = calc_devid(pcidev->bus->number, pcidev->devfn); + /* device not translated by any IOMMU in the system? */ if (_bdf >= amd_iommu_last_bdf) { *iommu = NULL; *domain = NULL; @@ -547,6 +692,10 @@ static int get_device_resources(struct device *dev, return 1; } +/* + * This is the generic map function. It maps one 4kb page at paddr to + * the given address in the DMA address space for the domain. + */ static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu, struct dma_ops_domain *dom, unsigned long address, @@ -578,6 +727,9 @@ static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu, return (dma_addr_t)address; } +/* + * The generic unmapping function for on page in the DMA address space. + */ static void dma_ops_domain_unmap(struct amd_iommu *iommu, struct dma_ops_domain *dom, unsigned long address) @@ -597,6 +749,12 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu, *pte = 0ULL; } +/* + * This function contains common code for mapping of a physically + * contiguous memory region into DMA address space. It is uses by all + * mapping functions provided by this IOMMU driver. + * Must be called with the domain lock held. + */ static dma_addr_t __map_single(struct device *dev, struct amd_iommu *iommu, struct dma_ops_domain *dma_dom, @@ -628,6 +786,10 @@ out: return address; } +/* + * Does the reverse of the __map_single function. Must be called with + * the domain lock held too + */ static void __unmap_single(struct amd_iommu *iommu, struct dma_ops_domain *dma_dom, dma_addr_t dma_addr, @@ -652,6 +814,9 @@ static void __unmap_single(struct amd_iommu *iommu, dma_ops_free_addresses(dma_dom, dma_addr, pages); } +/* + * The exported map_single function for dma_ops. + */ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir) { @@ -664,6 +829,7 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr, get_device_resources(dev, &iommu, &domain, &devid); if (iommu == NULL || domain == NULL) + /* device not handled by any AMD IOMMU */ return (dma_addr_t)paddr; spin_lock_irqsave(&domain->lock, flags); @@ -683,6 +849,9 @@ out: return addr; } +/* + * The exported unmap_single function for dma_ops. + */ static void unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size, int dir) { @@ -692,6 +861,7 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr, u16 devid; if (!get_device_resources(dev, &iommu, &domain, &devid)) + /* device not handled by any AMD IOMMU */ return; spin_lock_irqsave(&domain->lock, flags); @@ -706,6 +876,10 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr, spin_unlock_irqrestore(&domain->lock, flags); } +/* + * This is a special map_sg function which is used if we should map a + * device which is not handled by an AMD IOMMU in the system. + */ static int map_sg_no_iommu(struct device *dev, struct scatterlist *sglist, int nelems, int dir) { @@ -720,6 +894,10 @@ static int map_sg_no_iommu(struct device *dev, struct scatterlist *sglist, return nelems; } +/* + * The exported map_sg function for dma_ops (handles scatter-gather + * lists). + */ static int map_sg(struct device *dev, struct scatterlist *sglist, int nelems, int dir) { @@ -775,6 +953,10 @@ unmap: goto out; } +/* + * The exported map_sg function for dma_ops (handles scatter-gather + * lists). + */ static void unmap_sg(struct device *dev, struct scatterlist *sglist, int nelems, int dir) { @@ -804,6 +986,9 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, spin_unlock_irqrestore(&domain->lock, flags); } +/* + * The exported alloc_coherent function for dma_ops. + */ static void *alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, gfp_t flag) { @@ -851,6 +1036,11 @@ out: return virt_addr; } +/* + * The exported free_coherent function for dma_ops. + * FIXME: fix the generic x86 DMA layer so that it actually calls that + * function. + */ static void free_coherent(struct device *dev, size_t size, void *virt_addr, dma_addr_t dma_addr) { @@ -879,6 +1069,8 @@ free_mem: } /* + * The function for pre-allocating protection domains. + * * If the driver core informs the DMA layer if a driver grabs a device * we don't need to preallocate the protection domains anymore. * For now we have to. @@ -921,12 +1113,20 @@ static struct dma_mapping_ops amd_iommu_dma_ops = { .unmap_sg = unmap_sg, }; +/* + * The function which clues the AMD IOMMU driver into dma_ops. + */ int __init amd_iommu_init_dma_ops(void) { struct amd_iommu *iommu; int order = amd_iommu_aperture_order; int ret; + /* + * first allocate a default protection domain for every IOMMU we + * found in the system. Devices not assigned to any other + * protection domain will be assigned to the default one. + */ list_for_each_entry(iommu, &amd_iommu_list, list) { iommu->default_dom = dma_ops_domain_alloc(iommu, order); if (iommu->default_dom == NULL) @@ -936,6 +1136,10 @@ int __init amd_iommu_init_dma_ops(void) goto free_domains; } + /* + * If device isolation is enabled, pre-allocate the protection + * domains for each device. + */ if (amd_iommu_isolate) prealloc_protection_domains(); @@ -947,6 +1151,7 @@ int __init amd_iommu_init_dma_ops(void) gart_iommu_aperture = 0; #endif + /* Make the driver finally visible to the drivers */ dma_ops = &amd_iommu_dma_ops; return 0; diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 2a13e430437d..c9d8ff2eb130 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -25,20 +25,13 @@ #include <asm/pci-direct.h> #include <asm/amd_iommu_types.h> #include <asm/amd_iommu.h> -#include <asm/gart.h> +#include <asm/iommu.h> /* * definitions for the ACPI scanning code */ -#define UPDATE_LAST_BDF(x) do {\ - if ((x) > amd_iommu_last_bdf) \ - amd_iommu_last_bdf = (x); \ - } while (0); - -#define DEVID(bus, devfn) (((bus) << 8) | (devfn)) #define PCI_BUS(x) (((x) >> 8) & 0xff) #define IVRS_HEADER_LENGTH 48 -#define TBL_SIZE(x) (1 << (PAGE_SHIFT + get_order(amd_iommu_last_bdf * (x)))) #define ACPI_IVHD_TYPE 0x10 #define ACPI_IVMD_TYPE_ALL 0x20 @@ -71,6 +64,17 @@ #define ACPI_DEVFLAG_LINT1 0x80 #define ACPI_DEVFLAG_ATSDIS 0x10000000 +/* + * ACPI table definitions + * + * These data structures are laid over the table to parse the important values + * out of it. + */ + +/* + * structure describing one IOMMU in the ACPI table. Typically followed by one + * or more ivhd_entrys. + */ struct ivhd_header { u8 type; u8 flags; @@ -83,6 +87,10 @@ struct ivhd_header { u32 reserved; } __attribute__((packed)); +/* + * A device entry describing which devices a specific IOMMU translates and + * which requestor ids they use. + */ struct ivhd_entry { u8 type; u16 devid; @@ -90,6 +98,10 @@ struct ivhd_entry { u32 ext; } __attribute__((packed)); +/* + * An AMD IOMMU memory definition structure. It defines things like exclusion + * ranges for devices and regions that should be unity mapped. + */ struct ivmd_header { u8 type; u8 flags; @@ -103,22 +115,80 @@ struct ivmd_header { static int __initdata amd_iommu_detected; -u16 amd_iommu_last_bdf; -struct list_head amd_iommu_unity_map; -unsigned amd_iommu_aperture_order = 26; -int amd_iommu_isolate; +u16 amd_iommu_last_bdf; /* largest PCI device id we have + to handle */ +LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings + we find in ACPI */ +unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */ +int amd_iommu_isolate; /* if 1, device isolation is enabled */ + +LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the + system */ -struct list_head amd_iommu_list; +/* + * Pointer to the device table which is shared by all AMD IOMMUs + * it is indexed by the PCI device id or the HT unit id and contains + * information about the domain the device belongs to as well as the + * page table root pointer. + */ struct dev_table_entry *amd_iommu_dev_table; + +/* + * The alias table is a driver specific data structure which contains the + * mappings of the PCI device ids to the actual requestor ids on the IOMMU. + * More than one device can share the same requestor id. + */ u16 *amd_iommu_alias_table; + +/* + * The rlookup table is used to find the IOMMU which is responsible + * for a specific device. It is also indexed by the PCI device id. + */ struct amd_iommu **amd_iommu_rlookup_table; + +/* + * The pd table (protection domain table) is used to find the protection domain + * data structure a device belongs to. Indexed with the PCI device id too. + */ struct protection_domain **amd_iommu_pd_table; + +/* + * AMD IOMMU allows up to 2^16 differend protection domains. This is a bitmap + * to know which ones are already in use. + */ unsigned long *amd_iommu_pd_alloc_bitmap; -static u32 dev_table_size; -static u32 alias_table_size; -static u32 rlookup_table_size; +static u32 dev_table_size; /* size of the device table */ +static u32 alias_table_size; /* size of the alias table */ +static u32 rlookup_table_size; /* size if the rlookup table */ +static inline void update_last_devid(u16 devid) +{ + if (devid > amd_iommu_last_bdf) + amd_iommu_last_bdf = devid; +} + +static inline unsigned long tbl_size(int entry_size) +{ + unsigned shift = PAGE_SHIFT + + get_order(amd_iommu_last_bdf * entry_size); + + return 1UL << shift; +} + +/**************************************************************************** + * + * AMD IOMMU MMIO register space handling functions + * + * These functions are used to program the IOMMU device registers in + * MMIO space required for that driver. + * + ****************************************************************************/ + +/* + * This function set the exclusion range in the IOMMU. DMA accesses to the + * exclusion range are passed through untranslated + */ static void __init iommu_set_exclusion_range(struct amd_iommu *iommu) { u64 start = iommu->exclusion_start & PAGE_MASK; @@ -137,6 +207,7 @@ static void __init iommu_set_exclusion_range(struct amd_iommu *iommu) &entry, sizeof(entry)); } +/* Programs the physical address of the device table into the IOMMU hardware */ static void __init iommu_set_device_table(struct amd_iommu *iommu) { u32 entry; @@ -149,6 +220,7 @@ static void __init iommu_set_device_table(struct amd_iommu *iommu) &entry, sizeof(entry)); } +/* Generic functions to enable/disable certain features of the IOMMU. */ static void __init iommu_feature_enable(struct amd_iommu *iommu, u8 bit) { u32 ctrl; @@ -167,6 +239,7 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit) writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); } +/* Function to enable the hardware */ void __init iommu_enable(struct amd_iommu *iommu) { printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at "); @@ -176,6 +249,10 @@ void __init iommu_enable(struct amd_iommu *iommu) iommu_feature_enable(iommu, CONTROL_IOMMU_EN); } +/* + * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in + * the system has one. + */ static u8 * __init iommu_map_mmio_space(u64 address) { u8 *ret; @@ -199,16 +276,33 @@ static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu) release_mem_region(iommu->mmio_phys, MMIO_REGION_LENGTH); } +/**************************************************************************** + * + * The functions below belong to the first pass of AMD IOMMU ACPI table + * parsing. In this pass we try to find out the highest device id this + * code has to handle. Upon this information the size of the shared data + * structures is determined later. + * + ****************************************************************************/ + +/* + * This function reads the last device id the IOMMU has to handle from the PCI + * capability header for this IOMMU + */ static int __init find_last_devid_on_pci(int bus, int dev, int fn, int cap_ptr) { u32 cap; cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET); - UPDATE_LAST_BDF(DEVID(MMIO_GET_BUS(cap), MMIO_GET_LD(cap))); + update_last_devid(calc_devid(MMIO_GET_BUS(cap), MMIO_GET_LD(cap))); return 0; } +/* + * After reading the highest device id from the IOMMU PCI capability header + * this function looks if there is a higher device id defined in the ACPI table + */ static int __init find_last_devid_from_ivhd(struct ivhd_header *h) { u8 *p = (void *)h, *end = (void *)h; @@ -229,7 +323,8 @@ static int __init find_last_devid_from_ivhd(struct ivhd_header *h) case IVHD_DEV_RANGE_END: case IVHD_DEV_ALIAS: case IVHD_DEV_EXT_SELECT: - UPDATE_LAST_BDF(dev->devid); + /* all the above subfield types refer to device ids */ + update_last_devid(dev->devid); break; default: break; @@ -242,6 +337,11 @@ static int __init find_last_devid_from_ivhd(struct ivhd_header *h) return 0; } +/* + * Iterate over all IVHD entries in the ACPI table and find the highest device + * id which we need to handle. This is the first of three functions which parse + * the ACPI table. So we check the checksum here. + */ static int __init find_last_devid_acpi(struct acpi_table_header *table) { int i; @@ -277,19 +377,31 @@ static int __init find_last_devid_acpi(struct acpi_table_header *table) return 0; } +/**************************************************************************** + * + * The following functions belong the the code path which parses the ACPI table + * the second time. In this ACPI parsing iteration we allocate IOMMU specific + * data structures, initialize the device/alias/rlookup table and also + * basically initialize the hardware. + * + ****************************************************************************/ + +/* + * Allocates the command buffer. This buffer is per AMD IOMMU. We can + * write commands to that buffer later and the IOMMU will execute them + * asynchronously + */ static u8 * __init alloc_command_buffer(struct amd_iommu *iommu) { - u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL, + u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(CMD_BUFFER_SIZE)); - u64 entry = 0; + u64 entry; if (cmd_buf == NULL) return NULL; iommu->cmd_buf_size = CMD_BUFFER_SIZE; - memset(cmd_buf, 0, CMD_BUFFER_SIZE); - entry = (u64)virt_to_phys(cmd_buf); entry |= MMIO_CMD_SIZE_512; memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET, @@ -302,11 +414,10 @@ static u8 * __init alloc_command_buffer(struct amd_iommu *iommu) static void __init free_command_buffer(struct amd_iommu *iommu) { - if (iommu->cmd_buf) - free_pages((unsigned long)iommu->cmd_buf, - get_order(CMD_BUFFER_SIZE)); + free_pages((unsigned long)iommu->cmd_buf, get_order(CMD_BUFFER_SIZE)); } +/* sets a specific bit in the device table entry. */ static void set_dev_entry_bit(u16 devid, u8 bit) { int i = (bit >> 5) & 0x07; @@ -315,7 +426,18 @@ static void set_dev_entry_bit(u16 devid, u8 bit) amd_iommu_dev_table[devid].data[i] |= (1 << _bit); } -static void __init set_dev_entry_from_acpi(u16 devid, u32 flags, u32 ext_flags) +/* Writes the specific IOMMU for a device into the rlookup table */ +static void __init set_iommu_for_device(struct amd_iommu *iommu, u16 devid) +{ + amd_iommu_rlookup_table[devid] = iommu; +} + +/* + * This function takes the device specific flags read from the ACPI + * table and sets up the device table entry with that information + */ +static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu, + u16 devid, u32 flags, u32 ext_flags) { if (flags & ACPI_DEVFLAG_INITPASS) set_dev_entry_bit(devid, DEV_ENTRY_INIT_PASS); @@ -331,13 +453,14 @@ static void __init set_dev_entry_from_acpi(u16 devid, u32 flags, u32 ext_flags) set_dev_entry_bit(devid, DEV_ENTRY_LINT0_PASS); if (flags & ACPI_DEVFLAG_LINT1) set_dev_entry_bit(devid, DEV_ENTRY_LINT1_PASS); -} -static void __init set_iommu_for_device(struct amd_iommu *iommu, u16 devid) -{ - amd_iommu_rlookup_table[devid] = iommu; + set_iommu_for_device(iommu, devid); } +/* + * Reads the device exclusion range from ACPI and initialize IOMMU with + * it + */ static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m) { struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; @@ -346,12 +469,22 @@ static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m) return; if (iommu) { + /* + * We only can configure exclusion ranges per IOMMU, not + * per device. But we can enable the exclusion range per + * device. This is done here + */ set_dev_entry_bit(m->devid, DEV_ENTRY_EX); iommu->exclusion_start = m->range_start; iommu->exclusion_length = m->range_length; } } +/* + * This function reads some important data from the IOMMU PCI space and + * initializes the driver data structure with it. It reads the hardware + * capabilities and the first/last device entries + */ static void __init init_iommu_from_pci(struct amd_iommu *iommu) { int bus = PCI_BUS(iommu->devid); @@ -363,10 +496,16 @@ static void __init init_iommu_from_pci(struct amd_iommu *iommu) iommu->cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_CAP_HDR_OFFSET); range = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET); - iommu->first_device = DEVID(MMIO_GET_BUS(range), MMIO_GET_FD(range)); - iommu->last_device = DEVID(MMIO_GET_BUS(range), MMIO_GET_LD(range)); + iommu->first_device = calc_devid(MMIO_GET_BUS(range), + MMIO_GET_FD(range)); + iommu->last_device = calc_devid(MMIO_GET_BUS(range), + MMIO_GET_LD(range)); } +/* + * Takes a pointer to an AMD IOMMU entry in the ACPI table and + * initializes the hardware and our data structures with it. + */ static void __init init_iommu_from_acpi(struct amd_iommu *iommu, struct ivhd_header *h) { @@ -374,7 +513,7 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu, u8 *end = p, flags = 0; u16 dev_i, devid = 0, devid_start = 0, devid_to = 0; u32 ext_flags = 0; - bool alias = 0; + bool alias = false; struct ivhd_entry *e; /* @@ -414,22 +553,23 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu, case IVHD_DEV_ALL: for (dev_i = iommu->first_device; dev_i <= iommu->last_device; ++dev_i) - set_dev_entry_from_acpi(dev_i, e->flags, 0); + set_dev_entry_from_acpi(iommu, dev_i, + e->flags, 0); break; case IVHD_DEV_SELECT: devid = e->devid; - set_dev_entry_from_acpi(devid, e->flags, 0); + set_dev_entry_from_acpi(iommu, devid, e->flags, 0); break; case IVHD_DEV_SELECT_RANGE_START: devid_start = e->devid; flags = e->flags; ext_flags = 0; - alias = 0; + alias = false; break; case IVHD_DEV_ALIAS: devid = e->devid; devid_to = e->ext >> 8; - set_dev_entry_from_acpi(devid, e->flags, 0); + set_dev_entry_from_acpi(iommu, devid, e->flags, 0); amd_iommu_alias_table[devid] = devid_to; break; case IVHD_DEV_ALIAS_RANGE: @@ -437,24 +577,25 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu, flags = e->flags; devid_to = e->ext >> 8; ext_flags = 0; - alias = 1; + alias = true; break; case IVHD_DEV_EXT_SELECT: devid = e->devid; - set_dev_entry_from_acpi(devid, e->flags, e->ext); + set_dev_entry_from_acpi(iommu, devid, e->flags, + e->ext); break; case IVHD_DEV_EXT_SELECT_RANGE: devid_start = e->devid; flags = e->flags; ext_flags = e->ext; - alias = 0; + alias = false; break; case IVHD_DEV_RANGE_END: devid = e->devid; for (dev_i = devid_start; dev_i <= devid; ++dev_i) { if (alias) amd_iommu_alias_table[dev_i] = devid_to; - set_dev_entry_from_acpi( + set_dev_entry_from_acpi(iommu, amd_iommu_alias_table[dev_i], flags, ext_flags); } @@ -467,6 +608,7 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu, } } +/* Initializes the device->iommu mapping for the driver */ static int __init init_iommu_devices(struct amd_iommu *iommu) { u16 i; @@ -494,6 +636,11 @@ static void __init free_iommu_all(void) } } +/* + * This function clues the initialization function for one IOMMU + * together and also allocates the command buffer and programs the + * hardware. It does NOT enable the IOMMU. This is done afterwards. + */ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) { spin_lock_init(&iommu->lock); @@ -521,6 +668,10 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) return 0; } +/* + * Iterates over all IOMMU entries in the ACPI table, allocates the + * IOMMU structure and initializes it with init_iommu_one() + */ static int __init init_iommu_all(struct acpi_table_header *table) { u8 *p = (u8 *)table, *end = (u8 *)table; @@ -528,8 +679,6 @@ static int __init init_iommu_all(struct acpi_table_header *table) struct amd_iommu *iommu; int ret; - INIT_LIST_HEAD(&amd_iommu_list); - end += table->length; p += IVRS_HEADER_LENGTH; @@ -555,6 +704,14 @@ static int __init init_iommu_all(struct acpi_table_header *table) return 0; } +/**************************************************************************** + * + * The next functions belong to the third pass of parsing the ACPI + * table. In this last pass the memory mapping requirements are + * gathered (like exclusion and unity mapping reanges). + * + ****************************************************************************/ + static void __init free_unity_maps(void) { struct unity_map_entry *entry, *next; @@ -565,6 +722,7 @@ static void __init free_unity_maps(void) } } +/* called when we find an exclusion range definition in ACPI */ static int __init init_exclusion_range(struct ivmd_header *m) { int i; @@ -588,6 +746,7 @@ static int __init init_exclusion_range(struct ivmd_header *m) return 0; } +/* called for unity map ACPI definition */ static int __init init_unity_map_range(struct ivmd_header *m) { struct unity_map_entry *e = 0; @@ -619,13 +778,12 @@ static int __init init_unity_map_range(struct ivmd_header *m) return 0; } +/* iterates over all memory definitions we find in the ACPI table */ static int __init init_memory_definitions(struct acpi_table_header *table) { u8 *p = (u8 *)table, *end = (u8 *)table; struct ivmd_header *m; - INIT_LIST_HEAD(&amd_iommu_unity_map); - end += table->length; p += IVRS_HEADER_LENGTH; @@ -642,6 +800,10 @@ static int __init init_memory_definitions(struct acpi_table_header *table) return 0; } +/* + * This function finally enables all IOMMUs found in the system after + * they have been initialized + */ static void __init enable_iommus(void) { struct amd_iommu *iommu; @@ -678,6 +840,34 @@ static struct sys_device device_amd_iommu = { .cls = &amd_iommu_sysdev_class, }; +/* + * This is the core init function for AMD IOMMU hardware in the system. + * This function is called from the generic x86 DMA layer initialization + * code. + * + * This function basically parses the ACPI table for AMD IOMMU (IVRS) + * three times: + * + * 1 pass) Find the highest PCI device id the driver has to handle. + * Upon this information the size of the data structures is + * determined that needs to be allocated. + * + * 2 pass) Initialize the data structures just allocated with the + * information in the ACPI table about available AMD IOMMUs + * in the system. It also maps the PCI devices in the + * system to specific IOMMUs + * + * 3 pass) After the basic data structures are allocated and + * initialized we update them with information about memory + * remapping requirements parsed out of the ACPI table in + * this last pass. + * + * After that the hardware is initialized and ready to go. In the last + * step we do some Linux specific things like registering the driver in + * the dma_ops interface and initializing the suspend/resume support + * functions. Finally it prints some information about AMD IOMMUs and + * the driver state and enables the hardware. + */ int __init amd_iommu_init(void) { int i, ret = 0; @@ -699,14 +889,14 @@ int __init amd_iommu_init(void) if (acpi_table_parse("IVRS", find_last_devid_acpi) != 0) return -ENODEV; - dev_table_size = TBL_SIZE(DEV_TABLE_ENTRY_SIZE); - alias_table_size = TBL_SIZE(ALIAS_TABLE_ENTRY_SIZE); - rlookup_table_size = TBL_SIZE(RLOOKUP_TABLE_ENTRY_SIZE); + dev_table_size = tbl_size(DEV_TABLE_ENTRY_SIZE); + alias_table_size = tbl_size(ALIAS_TABLE_ENTRY_SIZE); + rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE); ret = -ENOMEM; /* Device table - directly used by all IOMMUs */ - amd_iommu_dev_table = (void *)__get_free_pages(GFP_KERNEL, + amd_iommu_dev_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(dev_table_size)); if (amd_iommu_dev_table == NULL) goto out; @@ -730,27 +920,23 @@ int __init amd_iommu_init(void) * Protection Domain table - maps devices to protection domains * This table has the same size as the rlookup_table */ - amd_iommu_pd_table = (void *)__get_free_pages(GFP_KERNEL, + amd_iommu_pd_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(rlookup_table_size)); if (amd_iommu_pd_table == NULL) goto free; - amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages(GFP_KERNEL, + amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages( + GFP_KERNEL | __GFP_ZERO, get_order(MAX_DOMAIN_ID/8)); if (amd_iommu_pd_alloc_bitmap == NULL) goto free; /* - * memory is allocated now; initialize the device table with all zeroes - * and let all alias entries point to itself + * let all alias entries point to itself */ - memset(amd_iommu_dev_table, 0, dev_table_size); for (i = 0; i < amd_iommu_last_bdf; ++i) amd_iommu_alias_table[i] = i; - memset(amd_iommu_pd_table, 0, rlookup_table_size); - memset(amd_iommu_pd_alloc_bitmap, 0, MAX_DOMAIN_ID / 8); - /* * never allocate domain 0 because its used as the non-allocated and * error value placeholder @@ -795,24 +981,19 @@ out: return ret; free: - if (amd_iommu_pd_alloc_bitmap) - free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, 1); + free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, 1); - if (amd_iommu_pd_table) - free_pages((unsigned long)amd_iommu_pd_table, - get_order(rlookup_table_size)); + free_pages((unsigned long)amd_iommu_pd_table, + get_order(rlookup_table_size)); - if (amd_iommu_rlookup_table) - free_pages((unsigned long)amd_iommu_rlookup_table, - get_order(rlookup_table_size)); + free_pages((unsigned long)amd_iommu_rlookup_table, + get_order(rlookup_table_size)); - if (amd_iommu_alias_table) - free_pages((unsigned long)amd_iommu_alias_table, - get_order(alias_table_size)); + free_pages((unsigned long)amd_iommu_alias_table, + get_order(alias_table_size)); - if (amd_iommu_dev_table) - free_pages((unsigned long)amd_iommu_dev_table, - get_order(dev_table_size)); + free_pages((unsigned long)amd_iommu_dev_table, + get_order(dev_table_size)); free_iommu_all(); @@ -821,6 +1002,13 @@ free: goto out; } +/**************************************************************************** + * + * Early detect code. This code runs at IOMMU detection time in the DMA + * layer. It just looks if there is an IVRS ACPI table to detect AMD + * IOMMUs + * + ****************************************************************************/ static int __init early_amd_iommu_detect(struct acpi_table_header *table) { return 0; @@ -828,7 +1016,7 @@ static int __init early_amd_iommu_detect(struct acpi_table_header *table) void __init amd_iommu_detect(void) { - if (swiotlb || no_iommu || iommu_detected) + if (swiotlb || no_iommu || (iommu_detected && !gart_iommu_aperture)) return; if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) { @@ -841,6 +1029,13 @@ void __init amd_iommu_detect(void) } } +/**************************************************************************** + * + * Parsing functions for the AMD IOMMU specific kernel command line + * options. + * + ****************************************************************************/ + static int __init parse_amd_iommu_options(char *str) { for (; *str; ++str) { @@ -853,20 +1048,10 @@ static int __init parse_amd_iommu_options(char *str) static int __init parse_amd_iommu_size_options(char *str) { - for (; *str; ++str) { - if (strcmp(str, "32M") == 0) - amd_iommu_aperture_order = 25; - if (strcmp(str, "64M") == 0) - amd_iommu_aperture_order = 26; - if (strcmp(str, "128M") == 0) - amd_iommu_aperture_order = 27; - if (strcmp(str, "256M") == 0) - amd_iommu_aperture_order = 28; - if (strcmp(str, "512M") == 0) - amd_iommu_aperture_order = 29; - if (strcmp(str, "1G") == 0) - amd_iommu_aperture_order = 30; - } + unsigned order = PAGE_SHIFT + get_order(memparse(str, &str)); + + if ((order > 24) && (order < 31)) + amd_iommu_aperture_order = order; return 1; } diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 9f907806c1a5..44e21826db11 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -21,6 +21,7 @@ #include <linux/suspend.h> #include <asm/e820.h> #include <asm/io.h> +#include <asm/iommu.h> #include <asm/gart.h> #include <asm/pci-direct.h> #include <asm/dma.h> diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index e9a00e5074b2..d6c898358371 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -75,7 +75,7 @@ char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE}; /* * Debug level, exported for io_apic.c */ -int apic_verbosity; +unsigned int apic_verbosity; int pic_mode; @@ -177,7 +177,7 @@ void __cpuinit enable_NMI_through_LVT0(void) /* Level triggered for 82489DX */ if (!lapic_is_integrated()) v |= APIC_LVT_LEVEL_TRIGGER; - apic_write_around(APIC_LVT0, v); + apic_write(APIC_LVT0, v); } /** @@ -212,9 +212,6 @@ int lapic_get_maxlvt(void) * this function twice on the boot CPU, once with a bogus timeout * value, second time for real. The other (noncalibrating) CPUs * call this function only once, with the real, calibrated value. - * - * We do reads before writes even if unnecessary, to get around the - * P5 APIC double write bug. */ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) { @@ -229,18 +226,18 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) if (!irqen) lvtt_value |= APIC_LVT_MASKED; - apic_write_around(APIC_LVTT, lvtt_value); + apic_write(APIC_LVTT, lvtt_value); /* * Divide PICLK by 16 */ tmp_value = apic_read(APIC_TDCR); - apic_write_around(APIC_TDCR, (tmp_value - & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) - | APIC_TDR_DIV_16); + apic_write(APIC_TDCR, + (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | + APIC_TDR_DIV_16); if (!oneshot) - apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR); + apic_write(APIC_TMICT, clocks / APIC_DIVISOR); } /* @@ -249,7 +246,7 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) static int lapic_next_event(unsigned long delta, struct clock_event_device *evt) { - apic_write_around(APIC_TMICT, delta); + apic_write(APIC_TMICT, delta); return 0; } @@ -278,7 +275,7 @@ static void lapic_timer_setup(enum clock_event_mode mode, case CLOCK_EVT_MODE_SHUTDOWN: v = apic_read(APIC_LVTT); v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); - apic_write_around(APIC_LVTT, v); + apic_write(APIC_LVTT, v); break; case CLOCK_EVT_MODE_RESUME: /* Nothing to do here */ @@ -372,12 +369,7 @@ static void __init lapic_cal_handler(struct clock_event_device *dev) } } -/* - * Setup the boot APIC - * - * Calibrate and verify the result. - */ -void __init setup_boot_APIC_clock(void) +static int __init calibrate_APIC_clock(void) { struct clock_event_device *levt = &__get_cpu_var(lapic_events); const long pm_100ms = PMTMR_TICKS_PER_SEC/10; @@ -387,24 +379,6 @@ void __init setup_boot_APIC_clock(void) long delta, deltapm; int pm_referenced = 0; - /* - * The local apic timer can be disabled via the kernel - * commandline or from the CPU detection code. Register the lapic - * timer as a dummy clock event source on SMP systems, so the - * broadcast mechanism is used. On UP systems simply ignore it. - */ - if (local_apic_timer_disabled) { - /* No broadcast on UP ! */ - if (num_possible_cpus() > 1) { - lapic_clockevent.mult = 1; - setup_APIC_timer(); - } - return; - } - - apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n" - "calibrating APIC timer ...\n"); - local_irq_disable(); /* Replace the global interrupt handler */ @@ -489,8 +463,6 @@ void __init setup_boot_APIC_clock(void) calibration_result / (1000000 / HZ), calibration_result % (1000000 / HZ)); - local_apic_timer_verify_ok = 1; - /* * Do a sanity check on the APIC calibration result */ @@ -498,12 +470,11 @@ void __init setup_boot_APIC_clock(void) local_irq_enable(); printk(KERN_WARNING "APIC frequency too slow, disabling apic timer\n"); - /* No broadcast on UP ! */ - if (num_possible_cpus() > 1) - setup_APIC_timer(); - return; + return -1; } + local_apic_timer_verify_ok = 1; + /* We trust the pm timer based calibration */ if (!pm_referenced) { apic_printk(APIC_VERBOSE, "... verify APIC timer\n"); @@ -543,22 +514,55 @@ void __init setup_boot_APIC_clock(void) if (!local_apic_timer_verify_ok) { printk(KERN_WARNING "APIC timer disabled due to verification failure.\n"); + return -1; + } + + return 0; +} + +/* + * Setup the boot APIC + * + * Calibrate and verify the result. + */ +void __init setup_boot_APIC_clock(void) +{ + /* + * The local apic timer can be disabled via the kernel + * commandline or from the CPU detection code. Register the lapic + * timer as a dummy clock event source on SMP systems, so the + * broadcast mechanism is used. On UP systems simply ignore it. + */ + if (local_apic_timer_disabled) { /* No broadcast on UP ! */ - if (num_possible_cpus() == 1) - return; - } else { - /* - * If nmi_watchdog is set to IO_APIC, we need the - * PIT/HPET going. Otherwise register lapic as a dummy - * device. - */ - if (nmi_watchdog != NMI_IO_APIC) - lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; - else - printk(KERN_WARNING "APIC timer registered as dummy," - " due to nmi_watchdog=%d!\n", nmi_watchdog); + if (num_possible_cpus() > 1) { + lapic_clockevent.mult = 1; + setup_APIC_timer(); + } + return; } + apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n" + "calibrating APIC timer ...\n"); + + if (calibrate_APIC_clock()) { + /* No broadcast on UP ! */ + if (num_possible_cpus() > 1) + setup_APIC_timer(); + return; + } + + /* + * If nmi_watchdog is set to IO_APIC, we need the + * PIT/HPET going. Otherwise register lapic as a dummy + * device. + */ + if (nmi_watchdog != NMI_IO_APIC) + lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; + else + printk(KERN_WARNING "APIC timer registered as dummy," + " due to nmi_watchdog=%d!\n", nmi_watchdog); + /* Setup the lapic or request the broadcast */ setup_APIC_timer(); } @@ -693,44 +697,44 @@ void clear_local_APIC(void) */ if (maxlvt >= 3) { v = ERROR_APIC_VECTOR; /* any non-zero vector will do */ - apic_write_around(APIC_LVTERR, v | APIC_LVT_MASKED); + apic_write(APIC_LVTERR, v | APIC_LVT_MASKED); } /* * Careful: we have to set masks only first to deassert * any level-triggered sources. */ v = apic_read(APIC_LVTT); - apic_write_around(APIC_LVTT, v | APIC_LVT_MASKED); + apic_write(APIC_LVTT, v | APIC_LVT_MASKED); v = apic_read(APIC_LVT0); - apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED); + apic_write(APIC_LVT0, v | APIC_LVT_MASKED); v = apic_read(APIC_LVT1); - apic_write_around(APIC_LVT1, v | APIC_LVT_MASKED); + apic_write(APIC_LVT1, v | APIC_LVT_MASKED); if (maxlvt >= 4) { v = apic_read(APIC_LVTPC); - apic_write_around(APIC_LVTPC, v | APIC_LVT_MASKED); + apic_write(APIC_LVTPC, v | APIC_LVT_MASKED); } /* lets not touch this if we didn't frob it */ #ifdef CONFIG_X86_MCE_P4THERMAL if (maxlvt >= 5) { v = apic_read(APIC_LVTTHMR); - apic_write_around(APIC_LVTTHMR, v | APIC_LVT_MASKED); + apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED); } #endif /* * Clean APIC state for other OSs: */ - apic_write_around(APIC_LVTT, APIC_LVT_MASKED); - apic_write_around(APIC_LVT0, APIC_LVT_MASKED); - apic_write_around(APIC_LVT1, APIC_LVT_MASKED); + apic_write(APIC_LVTT, APIC_LVT_MASKED); + apic_write(APIC_LVT0, APIC_LVT_MASKED); + apic_write(APIC_LVT1, APIC_LVT_MASKED); if (maxlvt >= 3) - apic_write_around(APIC_LVTERR, APIC_LVT_MASKED); + apic_write(APIC_LVTERR, APIC_LVT_MASKED); if (maxlvt >= 4) - apic_write_around(APIC_LVTPC, APIC_LVT_MASKED); + apic_write(APIC_LVTPC, APIC_LVT_MASKED); #ifdef CONFIG_X86_MCE_P4THERMAL if (maxlvt >= 5) - apic_write_around(APIC_LVTTHMR, APIC_LVT_MASKED); + apic_write(APIC_LVTTHMR, APIC_LVT_MASKED); #endif /* Integrated APIC (!82489DX) ? */ if (lapic_is_integrated()) { @@ -756,7 +760,7 @@ void disable_local_APIC(void) */ value = apic_read(APIC_SPIV); value &= ~APIC_SPIV_APIC_ENABLED; - apic_write_around(APIC_SPIV, value); + apic_write(APIC_SPIV, value); /* * When LAPIC was disabled by the BIOS and enabled by the kernel, @@ -865,8 +869,8 @@ void __init sync_Arb_IDs(void) apic_wait_icr_idle(); apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n"); - apic_write_around(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG - | APIC_DM_INIT); + apic_write(APIC_ICR, + APIC_DEST_ALLINC | APIC_INT_LEVELTRIG | APIC_DM_INIT); } /* @@ -902,16 +906,16 @@ void __init init_bsp_APIC(void) else value |= APIC_SPIV_FOCUS_DISABLED; value |= SPURIOUS_APIC_VECTOR; - apic_write_around(APIC_SPIV, value); + apic_write(APIC_SPIV, value); /* * Set up the virtual wire mode. */ - apic_write_around(APIC_LVT0, APIC_DM_EXTINT); + apic_write(APIC_LVT0, APIC_DM_EXTINT); value = APIC_DM_NMI; if (!lapic_is_integrated()) /* 82489DX */ value |= APIC_LVT_LEVEL_TRIGGER; - apic_write_around(APIC_LVT1, value); + apic_write(APIC_LVT1, value); } static void __cpuinit lapic_setup_esr(void) @@ -926,7 +930,7 @@ static void __cpuinit lapic_setup_esr(void) /* enables sending errors */ value = ERROR_APIC_VECTOR; - apic_write_around(APIC_LVTERR, value); + apic_write(APIC_LVTERR, value); /* * spec says clear errors after enabling vector. */ @@ -989,7 +993,7 @@ void __cpuinit setup_local_APIC(void) */ value = apic_read(APIC_TASKPRI); value &= ~APIC_TPRI_MASK; - apic_write_around(APIC_TASKPRI, value); + apic_write(APIC_TASKPRI, value); /* * After a crash, we no longer service the interrupts and a pending @@ -1047,7 +1051,7 @@ void __cpuinit setup_local_APIC(void) * Set spurious IRQ vector */ value |= SPURIOUS_APIC_VECTOR; - apic_write_around(APIC_SPIV, value); + apic_write(APIC_SPIV, value); /* * Set up LVT0, LVT1: @@ -1069,7 +1073,7 @@ void __cpuinit setup_local_APIC(void) apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n", smp_processor_id()); } - apic_write_around(APIC_LVT0, value); + apic_write(APIC_LVT0, value); /* * only the BP should see the LINT1 NMI signal, obviously. @@ -1080,7 +1084,7 @@ void __cpuinit setup_local_APIC(void) value = APIC_DM_NMI | APIC_LVT_MASKED; if (!integrated) /* 82489DX */ value |= APIC_LVT_LEVEL_TRIGGER; - apic_write_around(APIC_LVT1, value); + apic_write(APIC_LVT1, value); } void __cpuinit end_local_APIC_setup(void) @@ -1091,7 +1095,7 @@ void __cpuinit end_local_APIC_setup(void) /* Disable the local apic timer */ value = apic_read(APIC_LVTT); value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); - apic_write_around(APIC_LVTT, value); + apic_write(APIC_LVTT, value); setup_apic_nmi_watchdog(NULL); apic_pm_activate(); @@ -1416,7 +1420,7 @@ void disconnect_bsp_APIC(int virt_wire_setup) value &= ~APIC_VECTOR_MASK; value |= APIC_SPIV_APIC_ENABLED; value |= 0xf; - apic_write_around(APIC_SPIV, value); + apic_write(APIC_SPIV, value); if (!virt_wire_setup) { /* @@ -1429,10 +1433,10 @@ void disconnect_bsp_APIC(int virt_wire_setup) APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT); - apic_write_around(APIC_LVT0, value); + apic_write(APIC_LVT0, value); } else { /* Disable LVT0 */ - apic_write_around(APIC_LVT0, APIC_LVT_MASKED); + apic_write(APIC_LVT0, APIC_LVT_MASKED); } /* @@ -1446,7 +1450,7 @@ void disconnect_bsp_APIC(int virt_wire_setup) APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI); - apic_write_around(APIC_LVT1, value); + apic_write(APIC_LVT1, value); } } diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 16e586cacbdc..7f1f030da7ee 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -54,7 +54,7 @@ EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); /* * Debug level, exported for io_apic.c */ -int apic_verbosity; +unsigned int apic_verbosity; /* Have we found an MP table */ int smp_found_config; @@ -314,7 +314,7 @@ static void setup_APIC_timer(void) #define TICK_COUNT 100000000 -static void __init calibrate_APIC_clock(void) +static int __init calibrate_APIC_clock(void) { unsigned apic, apic_start; unsigned long tsc, tsc_start; @@ -368,6 +368,17 @@ static void __init calibrate_APIC_clock(void) clockevent_delta2ns(0xF, &lapic_clockevent); calibration_result = result / HZ; + + /* + * Do a sanity check on the APIC calibration result + */ + if (calibration_result < (1000000 / HZ)) { + printk(KERN_WARNING + "APIC frequency too slow, disabling apic timer\n"); + return -1; + } + + return 0; } /* @@ -394,14 +405,7 @@ void __init setup_boot_APIC_clock(void) } printk(KERN_INFO "Using local APIC timer interrupts.\n"); - calibrate_APIC_clock(); - - /* - * Do a sanity check on the APIC calibration result - */ - if (calibration_result < (1000000 / HZ)) { - printk(KERN_WARNING - "APIC frequency too slow, disabling apic timer\n"); + if (calibrate_APIC_clock()) { /* No broadcast on UP ! */ if (num_possible_cpus() > 1) setup_APIC_timer(); diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index bacf5deeec2d..aa89387006fe 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -18,6 +18,8 @@ #include <asm/ia32.h> #include <asm/bootparam.h> +#include <xen/interface/xen.h> + #define __NO_STUBS 1 #undef __SYSCALL #undef _ASM_X86_64_UNISTD_H_ @@ -131,5 +133,14 @@ int main(void) OFFSET(BP_loadflags, boot_params, hdr.loadflags); OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch); OFFSET(BP_version, boot_params, hdr.version); + + BLANK(); + DEFINE(PAGE_SIZE_asm, PAGE_SIZE); +#ifdef CONFIG_XEN + BLANK(); + OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask); + OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending); +#undef ENTRY +#endif return 0; } diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c new file mode 100644 index 000000000000..c639bd55391c --- /dev/null +++ b/arch/x86/kernel/bios_uv.c @@ -0,0 +1,48 @@ +/* + * BIOS run time interface routines. + * + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <asm/uv/bios.h> + +const char * +x86_bios_strerror(long status) +{ + const char *str; + switch (status) { + case 0: str = "Call completed without error"; break; + case -1: str = "Not implemented"; break; + case -2: str = "Invalid argument"; break; + case -3: str = "Call completed with error"; break; + default: str = "Unknown BIOS status code"; break; + } + return str; +} + +long +x86_bios_freq_base(unsigned long which, unsigned long *ticks_per_second, + unsigned long *drift_info) +{ + struct uv_bios_retval isrv; + + BIOS_CALL(isrv, BIOS_FREQ_BASE, which, 0, 0, 0, 0, 0, 0); + *ticks_per_second = isrv.v0; + *drift_info = isrv.v1; + return isrv.status; +} +EXPORT_SYMBOL_GPL(x86_bios_freq_base); diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 81a07ca65d44..cae9cabc3031 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -24,8 +24,6 @@ extern void vide(void); __asm__(".align 4\nvide: ret"); -int force_mwait __cpuinitdata; - static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) { if (cpuid_eax(0x80000000) >= 0x80000007) { diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c index 7c36fb8a28d4..d1692b2a41ff 100644 --- a/arch/x86/kernel/cpu/amd_64.c +++ b/arch/x86/kernel/cpu/amd_64.c @@ -115,6 +115,8 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */ if (c->x86_power & (1<<8)) set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); + + set_cpu_cap(c, X86_FEATURE_SYSCALL32); } static void __cpuinit init_amd(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 1b1c56bb338f..c9b58a806e85 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -131,13 +131,7 @@ static void __init check_popad(void) * (for due to lack of "invlpg" and working WP on a i386) * - In order to run on anything without a TSC, we need to be * compiled for a i486. - * - In order to support the local APIC on a buggy Pentium machine, - * we need to be compiled with CONFIG_X86_GOOD_APIC disabled, - * which happens implicitly if compiled for a Pentium or lower - * (unless an advanced selection of CPU features is used) as an - * otherwise config implies a properly working local APIC without - * the need to do extra reads from the APIC. -*/ + */ static void __init check_config(void) { @@ -151,21 +145,6 @@ static void __init check_config(void) if (boot_cpu_data.x86 == 3) panic("Kernel requires i486+ for 'invlpg' and other features"); #endif - -/* - * If we were told we had a good local APIC, check for buggy Pentia, - * i.e. all B steppings and the C2 stepping of P54C when using their - * integrated APIC (see 11AP erratum in "Pentium Processor - * Specification Update"). - */ -#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_GOOD_APIC) - if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL - && cpu_has_apic - && boot_cpu_data.x86 == 5 - && boot_cpu_data.x86_model == 2 - && (boot_cpu_data.x86_mask < 6 || boot_cpu_data.x86_mask == 11)) - panic("Kernel compiled for PMMX+, assumes a local APIC without the read-before-write bug!"); -#endif } diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index daee611f0140..dd6e3f15017e 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -7,15 +7,13 @@ #include <linux/module.h> #include <linux/kgdb.h> #include <linux/topology.h> -#include <linux/string.h> #include <linux/delay.h> #include <linux/smp.h> -#include <linux/module.h> #include <linux/percpu.h> -#include <asm/processor.h> #include <asm/i387.h> #include <asm/msr.h> #include <asm/io.h> +#include <asm/linkage.h> #include <asm/mmu_context.h> #include <asm/mtrr.h> #include <asm/mce.h> @@ -315,9 +313,6 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) c->x86_phys_bits = eax & 0xff; } - /* Assume all 64-bit CPUs support 32-bit syscall */ - set_cpu_cap(c, X86_FEATURE_SYSCALL32); - if (c->x86_vendor != X86_VENDOR_UNKNOWN && cpu_devs[c->x86_vendor]->c_early_init) cpu_devs[c->x86_vendor]->c_early_init(c); @@ -512,8 +507,7 @@ void pda_init(int cpu) } char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + - DEBUG_STKSZ] -__attribute__((section(".bss.page_aligned"))); + DEBUG_STKSZ] __page_aligned_bss; extern asmlinkage void ignore_sysret(void); diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 70609efdf1da..b75f2569b8f8 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -227,6 +227,16 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) if (cpu_has_bts) ds_init_intel(c); + /* + * See if we have a good local APIC by checking for buggy Pentia, + * i.e. all B steppings and the C2 stepping of P54C when using their + * integrated APIC (see 11AP erratum in "Pentium Processor + * Specification Update"). + */ + if (cpu_has_apic && (c->x86<<8 | c->x86_model<<4) == 0x520 && + (c->x86_mask < 0x6 || c->x86_mask == 0xb)) + set_cpu_cap(c, X86_FEATURE_11AP); + #ifdef CONFIG_X86_NUMAQ numaq_tsc_disable(); #endif diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 2c8afafa18e8..ff517f0b8cc4 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -780,15 +780,14 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) } kobject_put(per_cpu(cache_kobject, cpu)); cpuid4_cache_sysfs_exit(cpu); - break; + return retval; } kobject_uevent(&(this_object->kobj), KOBJ_ADD); } - if (!retval) - cpu_set(cpu, cache_dev_map); + cpu_set(cpu, cache_dev_map); kobject_uevent(per_cpu(cache_kobject, cpu), KOBJ_ADD); - return retval; + return 0; } static void __cpuinit cache_remove_dev(struct sys_device * sys_dev) diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c index eef001ad3bde..9b60fce09f75 100644 --- a/arch/x86/kernel/cpu/mcheck/p4.c +++ b/arch/x86/kernel/cpu/mcheck/p4.c @@ -102,7 +102,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c) /* The temperature transition interrupt handler setup */ h = THERMAL_APIC_VECTOR; /* our delivery vector */ h |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready */ - apic_write_around(APIC_LVTTHMR, h); + apic_write(APIC_LVTTHMR, h); rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03 , h); @@ -114,7 +114,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c) wrmsr(MSR_IA32_MISC_ENABLE, l | (1<<3), h); l = apic_read(APIC_LVTTHMR); - apic_write_around(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); + apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); printk(KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu); /* enable thermal throttle processing */ diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 28c29180b380..9af89078f7bb 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -877,7 +877,8 @@ void __init early_res_to_bootmem(u64 start, u64 end) for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) count++; - printk(KERN_INFO "(%d early reservations) ==> bootmem\n", count); + printk(KERN_INFO "(%d early reservations) ==> bootmem [%010llx - %010llx]\n", + count, start, end); for (i = 0; i < count; i++) { struct early_res *r = &early_res[i]; printk(KERN_INFO " #%d [%010llx - %010llx] %16s", i, @@ -1298,11 +1299,6 @@ void __init e820_reserve_resources(void) } } -/* - * Non-standard memory setup can be specified via this quirk: - */ -char * (*arch_memory_setup_quirk)(void); - char *__init default_machine_specific_memory_setup(void) { char *who = "BIOS-e820"; @@ -1343,8 +1339,8 @@ char *__init default_machine_specific_memory_setup(void) char *__init __attribute__((weak)) machine_specific_memory_setup(void) { - if (arch_memory_setup_quirk) { - char *who = arch_memory_setup_quirk(); + if (x86_quirks->arch_memory_setup) { + char *who = x86_quirks->arch_memory_setup(); if (who) return who; @@ -1367,24 +1363,3 @@ void __init setup_memory_map(void) printk(KERN_INFO "BIOS-provided physical RAM map:\n"); e820_print_map(who); } - -#ifdef CONFIG_X86_64 -int __init arch_get_ram_range(int slot, u64 *addr, u64 *size) -{ - int i; - - if (slot < 0 || slot >= e820.nr_map) - return -1; - for (i = slot; i < e820.nr_map; i++) { - if (e820.map[i].type != E820_RAM) - continue; - break; - } - if (i == e820.nr_map || e820.map[i].addr > (max_pfn << PAGE_SHIFT)) - return -1; - *addr = e820.map[i].addr; - *size = min_t(u64, e820.map[i].size + e820.map[i].addr, - max_pfn << PAGE_SHIFT) - *addr; - return i + 1; -} -#endif diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index a0e11c0cc872..4353cf5e6fac 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -16,10 +16,7 @@ #include <asm/dma.h> #include <asm/io_apic.h> #include <asm/apic.h> - -#ifdef CONFIG_GART_IOMMU -#include <asm/gart.h> -#endif +#include <asm/iommu.h> static void __init fix_hypertransport_config(int num, int slot, int func) { diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 6bc07f0f1202..cdfd94cc6b14 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -332,7 +332,7 @@ sysenter_past_esp: GET_THREAD_INFO(%ebp) /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ - testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp) + testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) jnz syscall_trace_entry cmpl $(nr_syscalls), %eax jae syscall_badsys @@ -370,7 +370,7 @@ ENTRY(system_call) GET_THREAD_INFO(%ebp) # system call tracing in operation / emulation /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ - testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp) + testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) jnz syscall_trace_entry cmpl $(nr_syscalls), %eax jae syscall_badsys @@ -383,10 +383,6 @@ syscall_exit: # setting need_resched or sigpending # between sampling and the iret TRACE_IRQS_OFF - testl $X86_EFLAGS_TF,PT_EFLAGS(%esp) # If tracing set singlestep flag on exit - jz no_singlestep - orl $_TIF_SINGLESTEP,TI_flags(%ebp) -no_singlestep: movl TI_flags(%ebp), %ecx testw $_TIF_ALLWORK_MASK, %cx # current->work jne syscall_exit_work @@ -514,12 +510,8 @@ END(work_pending) syscall_trace_entry: movl $-ENOSYS,PT_EAX(%esp) movl %esp, %eax - xorl %edx,%edx - call do_syscall_trace - cmpl $0, %eax - jne resume_userspace # ret != 0 -> running under PTRACE_SYSEMU, - # so must skip actual syscall - movl PT_ORIG_EAX(%esp), %eax + call syscall_trace_enter + /* What it returned is what we'll actually use. */ cmpl $(nr_syscalls), %eax jnae syscall_call jmp syscall_exit @@ -528,14 +520,13 @@ END(syscall_trace_entry) # perform syscall exit tracing ALIGN syscall_exit_work: - testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl + testb $_TIF_WORK_SYSCALL_EXIT, %cl jz work_pending TRACE_IRQS_ON - ENABLE_INTERRUPTS(CLBR_ANY) # could let do_syscall_trace() call + ENABLE_INTERRUPTS(CLBR_ANY) # could let syscall_trace_leave() call # schedule() instead movl %esp, %eax - movl $1, %edx - call do_syscall_trace + call syscall_trace_leave jmp resume_userspace END(syscall_exit_work) CFI_ENDPROC @@ -1024,6 +1015,7 @@ ENDPROC(kernel_thread_helper) ENTRY(xen_sysenter_target) RING0_INT_FRAME addl $5*4, %esp /* remove xen-provided frame */ + CFI_ADJUST_CFA_OFFSET -5*4 jmp sysenter_past_esp CFI_ENDPROC diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index ae63e584c340..8410e26f4183 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -349,8 +349,7 @@ ENTRY(system_call_after_swapgs) movq %rcx,RIP-ARGOFFSET(%rsp) CFI_REL_OFFSET rip,RIP-ARGOFFSET GET_THREAD_INFO(%rcx) - testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \ - TI_flags(%rcx) + testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx) jnz tracesys cmpq $__NR_syscall_max,%rax ja badsys @@ -430,7 +429,12 @@ tracesys: FIXUP_TOP_OF_STACK %rdi movq %rsp,%rdi call syscall_trace_enter - LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */ + /* + * Reload arg registers from stack in case ptrace changed them. + * We don't reload %rax because syscall_trace_enter() returned + * the value it wants us to use in the table lookup. + */ + LOAD_ARGS ARGOFFSET, 1 RESTORE_REST cmpq $__NR_syscall_max,%rax ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */ @@ -483,7 +487,7 @@ int_very_careful: ENABLE_INTERRUPTS(CLBR_NONE) SAVE_REST /* Check for syscall exit trace */ - testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx + testl $_TIF_WORK_SYSCALL_EXIT,%edx jz int_signal pushq %rdi CFI_ADJUST_CFA_OFFSET 8 @@ -491,7 +495,7 @@ int_very_careful: call syscall_trace_leave popq %rdi CFI_ADJUST_CFA_OFFSET -8 - andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi + andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi jmp int_restore_rest int_signal: @@ -1189,6 +1193,7 @@ END(device_not_available) /* runs on exception stack */ KPROBE_ENTRY(debug) INTR_FRAME + PARAVIRT_ADJUST_EXCEPTION_FRAME pushq $0 CFI_ADJUST_CFA_OFFSET 8 paranoidentry do_debug, DEBUG_STACK @@ -1198,6 +1203,7 @@ KPROBE_END(debug) /* runs on exception stack */ KPROBE_ENTRY(nmi) INTR_FRAME + PARAVIRT_ADJUST_EXCEPTION_FRAME pushq $-1 CFI_ADJUST_CFA_OFFSET 8 paranoidentry do_nmi, 0, 0 @@ -1211,6 +1217,7 @@ KPROBE_END(nmi) KPROBE_ENTRY(int3) INTR_FRAME + PARAVIRT_ADJUST_EXCEPTION_FRAME pushq $0 CFI_ADJUST_CFA_OFFSET 8 paranoidentry do_int3, DEBUG_STACK @@ -1237,6 +1244,7 @@ END(coprocessor_segment_overrun) /* runs on exception stack */ ENTRY(double_fault) XCPT_FRAME + PARAVIRT_ADJUST_EXCEPTION_FRAME paranoidentry do_double_fault jmp paranoid_exit1 CFI_ENDPROC @@ -1253,6 +1261,7 @@ END(segment_not_present) /* runs on exception stack */ ENTRY(stack_segment) XCPT_FRAME + PARAVIRT_ADJUST_EXCEPTION_FRAME paranoidentry do_stack_segment jmp paranoid_exit1 CFI_ENDPROC @@ -1278,6 +1287,7 @@ END(spurious_interrupt_bug) /* runs on exception stack */ ENTRY(machine_check) INTR_FRAME + PARAVIRT_ADJUST_EXCEPTION_FRAME pushq $0 CFI_ADJUST_CFA_OFFSET 8 paranoidentry do_machine_check @@ -1312,3 +1322,103 @@ KPROBE_ENTRY(ignore_sysret) sysret CFI_ENDPROC ENDPROC(ignore_sysret) + +#ifdef CONFIG_XEN +ENTRY(xen_hypervisor_callback) + zeroentry xen_do_hypervisor_callback +END(xen_hypervisor_callback) + +/* +# A note on the "critical region" in our callback handler. +# We want to avoid stacking callback handlers due to events occurring +# during handling of the last event. To do this, we keep events disabled +# until we've done all processing. HOWEVER, we must enable events before +# popping the stack frame (can't be done atomically) and so it would still +# be possible to get enough handler activations to overflow the stack. +# Although unlikely, bugs of that kind are hard to track down, so we'd +# like to avoid the possibility. +# So, on entry to the handler we detect whether we interrupted an +# existing activation in its critical region -- if so, we pop the current +# activation and restart the handler using the previous one. +*/ +ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs) + CFI_STARTPROC +/* Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will + see the correct pointer to the pt_regs */ + movq %rdi, %rsp # we don't return, adjust the stack frame + CFI_ENDPROC + CFI_DEFAULT_STACK +11: incl %gs:pda_irqcount + movq %rsp,%rbp + CFI_DEF_CFA_REGISTER rbp + cmovzq %gs:pda_irqstackptr,%rsp + pushq %rbp # backlink for old unwinder + call xen_evtchn_do_upcall + popq %rsp + CFI_DEF_CFA_REGISTER rsp + decl %gs:pda_irqcount + jmp error_exit + CFI_ENDPROC +END(do_hypervisor_callback) + +/* +# Hypervisor uses this for application faults while it executes. +# We get here for two reasons: +# 1. Fault while reloading DS, ES, FS or GS +# 2. Fault while executing IRET +# Category 1 we do not need to fix up as Xen has already reloaded all segment +# registers that could be reloaded and zeroed the others. +# Category 2 we fix up by killing the current process. We cannot use the +# normal Linux return path in this case because if we use the IRET hypercall +# to pop the stack frame we end up in an infinite loop of failsafe callbacks. +# We distinguish between categories by comparing each saved segment register +# with its current contents: any discrepancy means we in category 1. +*/ +ENTRY(xen_failsafe_callback) + framesz = (RIP-0x30) /* workaround buggy gas */ + _frame framesz + CFI_REL_OFFSET rcx, 0 + CFI_REL_OFFSET r11, 8 + movw %ds,%cx + cmpw %cx,0x10(%rsp) + CFI_REMEMBER_STATE + jne 1f + movw %es,%cx + cmpw %cx,0x18(%rsp) + jne 1f + movw %fs,%cx + cmpw %cx,0x20(%rsp) + jne 1f + movw %gs,%cx + cmpw %cx,0x28(%rsp) + jne 1f + /* All segments match their saved values => Category 2 (Bad IRET). */ + movq (%rsp),%rcx + CFI_RESTORE rcx + movq 8(%rsp),%r11 + CFI_RESTORE r11 + addq $0x30,%rsp + CFI_ADJUST_CFA_OFFSET -0x30 + pushq $0 + CFI_ADJUST_CFA_OFFSET 8 + pushq %r11 + CFI_ADJUST_CFA_OFFSET 8 + pushq %rcx + CFI_ADJUST_CFA_OFFSET 8 + jmp general_protection + CFI_RESTORE_STATE +1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */ + movq (%rsp),%rcx + CFI_RESTORE rcx + movq 8(%rsp),%r11 + CFI_RESTORE r11 + addq $0x30,%rsp + CFI_ADJUST_CFA_OFFSET -0x30 + pushq $0 + CFI_ADJUST_CFA_OFFSET 8 + SAVE_ALL + jmp error_exit + CFI_ENDPROC +END(xen_failsafe_callback) + +#endif /* CONFIG_XEN */ diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 711f11c30b06..3c3929340692 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -24,6 +24,7 @@ #include <asm/pgtable.h> #include <asm/uv/uv_mmrs.h> #include <asm/uv/uv_hub.h> +#include <asm/uv/bios.h> DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); EXPORT_PER_CPU_SYMBOL_GPL(__uv_hub_info); @@ -40,6 +41,9 @@ EXPORT_SYMBOL_GPL(uv_cpu_to_blade); short uv_possible_blades; EXPORT_SYMBOL_GPL(uv_possible_blades); +unsigned long sn_rtc_cycles_per_second; +EXPORT_SYMBOL(sn_rtc_cycles_per_second); + /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ static cpumask_t uv_target_cpus(void) @@ -272,6 +276,23 @@ static __init void map_mmioh_high(int max_pnode) map_high("MMIOH", mmioh.s.base, shift, map_uc); } +static __init void uv_rtc_init(void) +{ + long status, ticks_per_sec, drift; + + status = + x86_bios_freq_base(BIOS_FREQ_BASE_REALTIME_CLOCK, &ticks_per_sec, + &drift); + if (status != 0 || ticks_per_sec < 100000) { + printk(KERN_WARNING + "unable to determine platform RTC clock frequency, " + "guessing.\n"); + /* BIOS gives wrong value for clock freq. so guess */ + sn_rtc_cycles_per_second = 1000000000000UL / 30000UL; + } else + sn_rtc_cycles_per_second = ticks_per_sec; +} + static __init void uv_system_init(void) { union uvh_si_addr_map_config_u m_n_config; @@ -326,6 +347,8 @@ static __init void uv_system_init(void) gnode_upper = (((unsigned long)node_id.s.node_id) & ~((1 << n_val) - 1)) << m_val; + uv_rtc_init(); + for_each_present_cpu(cpu) { nid = cpu_to_node(cpu); pnode = uv_apicid_to_pnode(per_cpu(x86_cpu_to_apicid, cpu)); diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index c97819829146..1b318e903bf6 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -39,6 +39,13 @@ static struct x8664_pda *__cpu_pda[NR_CPUS] __initdata; static struct x8664_pda *__cpu_pda[NR_CPUS] __read_mostly; #endif +void __init x86_64_init_pda(void) +{ + _cpu_pda = __cpu_pda; + cpu_pda(0) = &_boot_cpu_pda; + pda_init(0); +} + static void __init zap_identity_mappings(void) { pgd_t *pgd = pgd_offset_k(0UL); @@ -102,9 +109,7 @@ void __init x86_64_start_kernel(char * real_mode_data) early_printk("Kernel alive\n"); - _cpu_pda = __cpu_pda; - cpu_pda(0) = &_boot_cpu_pda; - pda_init(0); + x86_64_init_pda(); early_printk("Kernel really alive\n"); diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index b07ac7b217cb..db3280afe886 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -407,6 +407,7 @@ ENTRY(phys_base) /* This must match the first entry in level2_kernel_pgt */ .quad 0x0000000000000000 +#include "../../x86/xen/xen-head.S" .section .bss, "aw", @nobits .align L1_CACHE_BYTES diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index 558abf4c796a..de9aa0e3a9c5 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -756,7 +756,7 @@ void send_IPI_self(int vector) /* * Send the IPI. The write to APIC_ICR fires this off. */ - apic_write_around(APIC_ICR, cfg); + apic_write(APIC_ICR, cfg); } #endif /* !CONFIG_SMP */ @@ -2030,7 +2030,7 @@ static void mask_lapic_irq(unsigned int irq) unsigned long v; v = apic_read(APIC_LVT0); - apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED); + apic_write(APIC_LVT0, v | APIC_LVT_MASKED); } static void unmask_lapic_irq(unsigned int irq) @@ -2038,7 +2038,7 @@ static void unmask_lapic_irq(unsigned int irq) unsigned long v; v = apic_read(APIC_LVT0); - apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED); + apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); } static struct irq_chip lapic_chip __read_mostly = { @@ -2168,7 +2168,7 @@ static inline void __init check_timer(void) * The AEOI mode will finish them in the 8259A * automatically. */ - apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); + apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); init_8259A(1); timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver)); @@ -2177,8 +2177,9 @@ static inline void __init check_timer(void) pin2 = ioapic_i8259.pin; apic2 = ioapic_i8259.apic; - printk(KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n", - vector, apic1, pin1, apic2, pin2); + apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X " + "apic1=%d pin1=%d apic2=%d pin2=%d\n", + vector, apic1, pin1, apic2, pin2); /* * Some BIOS writers are clueless and report the ExtINTA @@ -2216,12 +2217,13 @@ static inline void __init check_timer(void) } clear_IO_APIC_pin(apic1, pin1); if (!no_pin1) - printk(KERN_ERR "..MP-BIOS bug: " - "8254 timer not connected to IO-APIC\n"); + apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: " + "8254 timer not connected to IO-APIC\n"); - printk(KERN_INFO "...trying to set up timer (IRQ0) " - "through the 8259A ... "); - printk("\n..... (found pin %d) ...", pin2); + apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer " + "(IRQ0) through the 8259A ...\n"); + apic_printk(APIC_QUIET, KERN_INFO + "..... (found apic %d pin %d) ...\n", apic2, pin2); /* * legacy devices should be connected to IO APIC #0 */ @@ -2230,7 +2232,7 @@ static inline void __init check_timer(void) unmask_IO_APIC_irq(0); enable_8259A_irq(0); if (timer_irq_works()) { - printk("works.\n"); + apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); timer_through_8259 = 1; if (nmi_watchdog == NMI_IO_APIC) { disable_8259A_irq(0); @@ -2244,44 +2246,47 @@ static inline void __init check_timer(void) */ disable_8259A_irq(0); clear_IO_APIC_pin(apic2, pin2); - printk(" failed.\n"); + apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); } if (nmi_watchdog == NMI_IO_APIC) { - printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n"); + apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work " + "through the IO-APIC - disabling NMI Watchdog!\n"); nmi_watchdog = NMI_NONE; } timer_ack = 0; - printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ..."); + apic_printk(APIC_QUIET, KERN_INFO + "...trying to set up timer as Virtual Wire IRQ...\n"); lapic_register_intr(0, vector); - apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */ + apic_write(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */ enable_8259A_irq(0); if (timer_irq_works()) { - printk(" works.\n"); + apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); goto out; } disable_8259A_irq(0); - apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector); - printk(" failed.\n"); + apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector); + apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n"); - printk(KERN_INFO "...trying to set up timer as ExtINT IRQ..."); + apic_printk(APIC_QUIET, KERN_INFO + "...trying to set up timer as ExtINT IRQ...\n"); init_8259A(0); make_8259A_irq(0); - apic_write_around(APIC_LVT0, APIC_DM_EXTINT); + apic_write(APIC_LVT0, APIC_DM_EXTINT); unlock_ExtINT_logic(); if (timer_irq_works()) { - printk(" works.\n"); + apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); goto out; } - printk(" failed :(.\n"); + apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n"); panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a " - "report. Then try booting with the 'noapic' option"); + "report. Then try booting with the 'noapic' option.\n"); out: local_irq_restore(flags); } diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 6510cde36b35..64a46affd858 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -45,6 +45,7 @@ #include <asm/proto.h> #include <asm/acpi.h> #include <asm/dma.h> +#include <asm/i8259.h> #include <asm/nmi.h> #include <asm/msidef.h> #include <asm/hypertransport.h> @@ -1696,8 +1697,9 @@ static inline void __init check_timer(void) pin2 = ioapic_i8259.pin; apic2 = ioapic_i8259.apic; - apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n", - cfg->vector, apic1, pin1, apic2, pin2); + apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X " + "apic1=%d pin1=%d apic2=%d pin2=%d\n", + cfg->vector, apic1, pin1, apic2, pin2); /* * Some BIOS writers are clueless and report the ExtINTA @@ -1735,14 +1737,13 @@ static inline void __init check_timer(void) } clear_IO_APIC_pin(apic1, pin1); if (!no_pin1) - apic_printk(APIC_QUIET,KERN_ERR "..MP-BIOS bug: " + apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: " "8254 timer not connected to IO-APIC\n"); - apic_printk(APIC_VERBOSE,KERN_INFO - "...trying to set up timer (IRQ0) " - "through the 8259A ... "); - apic_printk(APIC_VERBOSE,"\n..... (found apic %d pin %d) ...", - apic2, pin2); + apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer " + "(IRQ0) through the 8259A ...\n"); + apic_printk(APIC_QUIET, KERN_INFO + "..... (found apic %d pin %d) ...\n", apic2, pin2); /* * legacy devices should be connected to IO APIC #0 */ @@ -1751,7 +1752,7 @@ static inline void __init check_timer(void) unmask_IO_APIC_irq(0); enable_8259A_irq(0); if (timer_irq_works()) { - apic_printk(APIC_VERBOSE," works.\n"); + apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); timer_through_8259 = 1; if (nmi_watchdog == NMI_IO_APIC) { disable_8259A_irq(0); @@ -1765,29 +1766,32 @@ static inline void __init check_timer(void) */ disable_8259A_irq(0); clear_IO_APIC_pin(apic2, pin2); - apic_printk(APIC_VERBOSE," failed.\n"); + apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); } if (nmi_watchdog == NMI_IO_APIC) { - printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n"); + apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work " + "through the IO-APIC - disabling NMI Watchdog!\n"); nmi_watchdog = NMI_NONE; } - apic_printk(APIC_VERBOSE, KERN_INFO "...trying to set up timer as Virtual Wire IRQ..."); + apic_printk(APIC_QUIET, KERN_INFO + "...trying to set up timer as Virtual Wire IRQ...\n"); lapic_register_intr(0); apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ enable_8259A_irq(0); if (timer_irq_works()) { - apic_printk(APIC_VERBOSE," works.\n"); + apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); goto out; } disable_8259A_irq(0); apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); - apic_printk(APIC_VERBOSE," failed.\n"); + apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n"); - apic_printk(APIC_VERBOSE, KERN_INFO "...trying to set up timer as ExtINT IRQ..."); + apic_printk(APIC_QUIET, KERN_INFO + "...trying to set up timer as ExtINT IRQ...\n"); init_8259A(0); make_8259A_irq(0); @@ -1796,11 +1800,12 @@ static inline void __init check_timer(void) unlock_ExtINT_logic(); if (timer_irq_works()) { - apic_printk(APIC_VERBOSE," works.\n"); + apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); goto out; } - apic_printk(APIC_VERBOSE," failed :(.\n"); - panic("IO-APIC + timer doesn't work! Try using the 'noapic' kernel parameter\n"); + apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n"); + panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a " + "report. Then try booting with the 'noapic' option.\n"); out: local_irq_restore(flags); } diff --git a/arch/x86/kernel/io_delay.c b/arch/x86/kernel/io_delay.c index 5921e5f0a640..1c3a66a67f83 100644 --- a/arch/x86/kernel/io_delay.c +++ b/arch/x86/kernel/io_delay.c @@ -103,6 +103,9 @@ void __init io_delay_init(void) static int __init io_delay_param(char *s) { + if (!s) + return -EINVAL; + if (!strcmp(s, "0x80")) io_delay_type = CONFIG_IO_DELAY_TYPE_0X80; else if (!strcmp(s, "0xed")) diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c index 9d98cda39ad9..3f7537b669d3 100644 --- a/arch/x86/kernel/ipi.c +++ b/arch/x86/kernel/ipi.c @@ -70,7 +70,7 @@ void __send_IPI_shortcut(unsigned int shortcut, int vector) /* * Send the IPI. The write to APIC_ICR fires this off. */ - apic_write_around(APIC_ICR, cfg); + apic_write(APIC_ICR, cfg); } void send_IPI_self(int vector) @@ -98,7 +98,7 @@ static inline void __send_IPI_dest_field(unsigned long mask, int vector) * prepare target chip field */ cfg = __prepare_ICR2(mask); - apic_write_around(APIC_ICR2, cfg); + apic_write(APIC_ICR2, cfg); /* * program the ICR @@ -108,7 +108,7 @@ static inline void __send_IPI_dest_field(unsigned long mask, int vector) /* * Send the IPI. The write to APIC_ICR fires this off. */ - apic_write_around(APIC_ICR, cfg); + apic_write(APIC_ICR, cfg); } /* diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 47a6f6f12478..1cf8c1fcc088 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -83,11 +83,8 @@ union irq_ctx { static union irq_ctx *hardirq_ctx[NR_CPUS] __read_mostly; static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly; -static char softirq_stack[NR_CPUS * THREAD_SIZE] - __attribute__((__section__(".bss.page_aligned"))); - -static char hardirq_stack[NR_CPUS * THREAD_SIZE] - __attribute__((__section__(".bss.page_aligned"))); +static char softirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss; +static char hardirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss; static void call_on_stack(void *func, void *stack) { diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c index c03205991718..f2d43bc75514 100644 --- a/arch/x86/kernel/kdebugfs.c +++ b/arch/x86/kernel/kdebugfs.c @@ -12,9 +12,13 @@ #include <linux/init.h> #include <linux/io.h> #include <linux/mm.h> +#include <linux/module.h> #include <asm/setup.h> +struct dentry *arch_debugfs_dir; +EXPORT_SYMBOL(arch_debugfs_dir); + #ifdef CONFIG_DEBUG_BOOT_PARAMS struct setup_data_node { u64 paddr; @@ -209,6 +213,10 @@ static int __init arch_kdebugfs_init(void) { int error = 0; + arch_debugfs_dir = debugfs_create_dir("x86", NULL); + if (!arch_debugfs_dir) + return -ENOMEM; + #ifdef CONFIG_DEBUG_BOOT_PARAMS error = boot_params_kdebugfs_init(); #endif diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index b8c6743a13da..43c019f85f0d 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -860,7 +860,6 @@ static int __kprobes post_kprobe_handler(struct pt_regs *regs) resume_execution(cur, regs, kcb); regs->flags |= kcb->kprobe_saved_flags; - trace_hardirqs_fixup_flags(regs->flags); if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) { kcb->kprobe_status = KPROBE_HIT_SSDONE; diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 87edf1ceb1df..d02def06ca91 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -113,7 +113,7 @@ static void kvm_setup_secondary_clock(void) #endif #ifdef CONFIG_SMP -void __init kvm_smp_prepare_boot_cpu(void) +static void __init kvm_smp_prepare_boot_cpu(void) { WARN_ON(kvm_register_clock("primary cpu clock")); native_smp_prepare_boot_cpu(); diff --git a/arch/x86/kernel/module_64.c b/arch/x86/kernel/module_64.c index a888e67f5874..0e867676b5a5 100644 --- a/arch/x86/kernel/module_64.c +++ b/arch/x86/kernel/module_64.c @@ -150,7 +150,8 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *me) { - const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL; + const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL, + *para = NULL; char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { @@ -160,6 +161,8 @@ int module_finalize(const Elf_Ehdr *hdr, alt = s; if (!strcmp(".smp_locks", secstrings + s->sh_name)) locks= s; + if (!strcmp(".parainstructions", secstrings + s->sh_name)) + para = s; } if (alt) { @@ -175,6 +178,11 @@ int module_finalize(const Elf_Ehdr *hdr, tseg, tseg + text->sh_size); } + if (para) { + void *pseg = (void *)para->sh_addr; + apply_paravirt(pseg, pseg + para->sh_size); + } + return module_bug_finalize(hdr, sechdrs, me); } diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 3b25e49380c6..6ae005ccaed8 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -27,6 +27,7 @@ #include <asm/bios_ebda.h> #include <asm/e820.h> #include <asm/trampoline.h> +#include <asm/setup.h> #include <mach_apic.h> #ifdef CONFIG_X86_32 @@ -48,76 +49,6 @@ static int __init mpf_checksum(unsigned char *mp, int len) return sum & 0xFF; } -#ifdef CONFIG_X86_NUMAQ -int found_numaq; -/* - * Have to match translation table entries to main table entries by counter - * hence the mpc_record variable .... can't see a less disgusting way of - * doing this .... - */ -struct mpc_config_translation { - unsigned char mpc_type; - unsigned char trans_len; - unsigned char trans_type; - unsigned char trans_quad; - unsigned char trans_global; - unsigned char trans_local; - unsigned short trans_reserved; -}; - - -static int mpc_record; -static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] - __cpuinitdata; - -static inline int generate_logical_apicid(int quad, int phys_apicid) -{ - return (quad << 4) + (phys_apicid ? phys_apicid << 1 : 1); -} - - -static inline int mpc_apic_id(struct mpc_config_processor *m, - struct mpc_config_translation *translation_record) -{ - int quad = translation_record->trans_quad; - int logical_apicid = generate_logical_apicid(quad, m->mpc_apicid); - - printk(KERN_DEBUG "Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n", - m->mpc_apicid, - (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8, - (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4, - m->mpc_apicver, quad, logical_apicid); - return logical_apicid; -} - -int mp_bus_id_to_node[MAX_MP_BUSSES]; - -int mp_bus_id_to_local[MAX_MP_BUSSES]; - -static void mpc_oem_bus_info(struct mpc_config_bus *m, char *name, - struct mpc_config_translation *translation) -{ - int quad = translation->trans_quad; - int local = translation->trans_local; - - mp_bus_id_to_node[m->mpc_busid] = quad; - mp_bus_id_to_local[m->mpc_busid] = local; - printk(KERN_INFO "Bus #%d is %s (node %d)\n", - m->mpc_busid, name, quad); -} - -int quad_local_to_mp_bus_id [NR_CPUS/4][4]; -static void mpc_oem_pci_bus(struct mpc_config_bus *m, - struct mpc_config_translation *translation) -{ - int quad = translation->trans_quad; - int local = translation->trans_local; - - quad_local_to_mp_bus_id[quad][local] = m->mpc_busid; -} - -#endif - static void __cpuinit MP_processor_info(struct mpc_config_processor *m) { int apicid; @@ -127,14 +58,12 @@ static void __cpuinit MP_processor_info(struct mpc_config_processor *m) disabled_cpus++; return; } -#ifdef CONFIG_X86_NUMAQ - if (found_numaq) - apicid = mpc_apic_id(m, translation_table[mpc_record]); + + if (x86_quirks->mpc_apic_id) + apicid = x86_quirks->mpc_apic_id(m); else apicid = m->mpc_apicid; -#else - apicid = m->mpc_apicid; -#endif + if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) { bootup_cpu = " (Bootup-CPU)"; boot_cpu_physical_apicid = m->mpc_apicid; @@ -151,12 +80,10 @@ static void __init MP_bus_info(struct mpc_config_bus *m) memcpy(str, m->mpc_bustype, 6); str[6] = 0; -#ifdef CONFIG_X86_NUMAQ - if (found_numaq) - mpc_oem_bus_info(m, str, translation_table[mpc_record]); -#else - printk(KERN_INFO "Bus #%d is %s\n", m->mpc_busid, str); -#endif + if (x86_quirks->mpc_oem_bus_info) + x86_quirks->mpc_oem_bus_info(m, str); + else + printk(KERN_INFO "Bus #%d is %s\n", m->mpc_busid, str); #if MAX_MP_BUSSES < 256 if (m->mpc_busid >= MAX_MP_BUSSES) { @@ -173,10 +100,9 @@ static void __init MP_bus_info(struct mpc_config_bus *m) mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA; #endif } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) { -#ifdef CONFIG_X86_NUMAQ - if (found_numaq) - mpc_oem_pci_bus(m, translation_table[mpc_record]); -#endif + if (x86_quirks->mpc_oem_pci_bus) + x86_quirks->mpc_oem_pci_bus(m); + clear_bit(m->mpc_busid, mp_bus_not_pci); #if defined(CONFIG_EISA) || defined (CONFIG_MCA) mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI; @@ -316,83 +242,6 @@ static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m) m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint); } -#ifdef CONFIG_X86_NUMAQ -static void __init MP_translation_info(struct mpc_config_translation *m) -{ - printk(KERN_INFO - "Translation: record %d, type %d, quad %d, global %d, local %d\n", - mpc_record, m->trans_type, m->trans_quad, m->trans_global, - m->trans_local); - - if (mpc_record >= MAX_MPC_ENTRY) - printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n"); - else - translation_table[mpc_record] = m; /* stash this for later */ - if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad)) - node_set_online(m->trans_quad); -} - -/* - * Read/parse the MPC oem tables - */ - -static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable, - unsigned short oemsize) -{ - int count = sizeof(*oemtable); /* the header size */ - unsigned char *oemptr = ((unsigned char *)oemtable) + count; - - mpc_record = 0; - printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n", - oemtable); - if (memcmp(oemtable->oem_signature, MPC_OEM_SIGNATURE, 4)) { - printk(KERN_WARNING - "SMP mpc oemtable: bad signature [%c%c%c%c]!\n", - oemtable->oem_signature[0], oemtable->oem_signature[1], - oemtable->oem_signature[2], oemtable->oem_signature[3]); - return; - } - if (mpf_checksum((unsigned char *)oemtable, oemtable->oem_length)) { - printk(KERN_WARNING "SMP oem mptable: checksum error!\n"); - return; - } - while (count < oemtable->oem_length) { - switch (*oemptr) { - case MP_TRANSLATION: - { - struct mpc_config_translation *m = - (struct mpc_config_translation *)oemptr; - MP_translation_info(m); - oemptr += sizeof(*m); - count += sizeof(*m); - ++mpc_record; - break; - } - default: - { - printk(KERN_WARNING - "Unrecognised OEM table entry type! - %d\n", - (int)*oemptr); - return; - } - } - } -} - -void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem, - char *productid) -{ - if (strncmp(oem, "IBM NUMA", 8)) - printk("Warning! Not a NUMA-Q system!\n"); - else - found_numaq = 1; - - if (mpc->mpc_oemptr) - smp_read_mpc_oem((struct mp_config_oemtable *)mpc->mpc_oemptr, - mpc->mpc_oemsize); -} -#endif /* CONFIG_X86_NUMAQ */ - /* * Read/parse the MPC */ @@ -457,7 +306,6 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) } else mps_oem_check(mpc, oem, str); #endif - /* save the local APIC address, it might be non-default */ if (!acpi_lapic) mp_lapic_addr = mpc->mpc_lapic; @@ -465,12 +313,17 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) if (early) return 1; + if (mpc->mpc_oemptr && x86_quirks->smp_read_mpc_oem) { + struct mp_config_oemtable *oem_table = (struct mp_config_oemtable *)(unsigned long)mpc->mpc_oemptr; + x86_quirks->smp_read_mpc_oem(oem_table, mpc->mpc_oemsize); + } + /* * Now process the configuration blocks. */ -#ifdef CONFIG_X86_NUMAQ - mpc_record = 0; -#endif + if (x86_quirks->mpc_record) + *x86_quirks->mpc_record = 0; + while (count < mpc->mpc_length) { switch (*mpt) { case MP_PROCESSOR: @@ -536,9 +389,8 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) count = mpc->mpc_length; break; } -#ifdef CONFIG_X86_NUMAQ - ++mpc_record; -#endif + if (x86_quirks->mpc_record) + (*x86_quirks->mpc_record)++; } #ifdef CONFIG_X86_GENERICARCH @@ -726,20 +578,14 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type) static struct intel_mp_floating *mpf_found; /* - * Machine specific quirk for finding the SMP config before other setup - * activities destroy the table: - */ -int (*mach_get_smp_config_quirk)(unsigned int early); - -/* * Scan the memory blocks for an SMP configuration block. */ static void __init __get_smp_config(unsigned int early) { struct intel_mp_floating *mpf = mpf_found; - if (mach_get_smp_config_quirk) { - if (mach_get_smp_config_quirk(early)) + if (x86_quirks->mach_get_smp_config) { + if (x86_quirks->mach_get_smp_config(early)) return; } if (acpi_lapic && early) @@ -899,14 +745,12 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, return 0; } -int (*mach_find_smp_config_quirk)(unsigned int reserve); - static void __init __find_smp_config(unsigned int reserve) { unsigned int address; - if (mach_find_smp_config_quirk) { - if (mach_find_smp_config_quirk(reserve)) + if (x86_quirks->mach_find_smp_config) { + if (x86_quirks->mach_find_smp_config(reserve)) return; } /* diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index ec024b3baad0..ac6d51222e7d 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -263,7 +263,7 @@ late_initcall(init_lapic_nmi_sysfs); static void __acpi_nmi_enable(void *__unused) { - apic_write_around(APIC_LVT0, APIC_DM_NMI); + apic_write(APIC_LVT0, APIC_DM_NMI); } /* @@ -277,7 +277,7 @@ void acpi_nmi_enable(void) static void __acpi_nmi_disable(void *__unused) { - apic_write_around(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED); + apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED); } /* @@ -448,6 +448,13 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) #ifdef CONFIG_SYSCTL +static int __init setup_unknown_nmi_panic(char *str) +{ + unknown_nmi_panic = 1; + return 1; +} +__setup("unknown_nmi_panic", setup_unknown_nmi_panic); + static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) { unsigned char reason = get_nmi_reason(); diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c index a23e8233b9ac..b8c45610b20a 100644 --- a/arch/x86/kernel/numaq_32.c +++ b/arch/x86/kernel/numaq_32.c @@ -33,6 +33,7 @@ #include <asm/processor.h> #include <asm/mpspec.h> #include <asm/e820.h> +#include <asm/setup.h> #define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT)) @@ -71,6 +72,188 @@ static void __init smp_dump_qct(void) } } + +void __init numaq_tsc_disable(void) +{ + if (!found_numaq) + return; + + if (num_online_nodes() > 1) { + printk(KERN_DEBUG "NUMAQ: disabling TSC\n"); + setup_clear_cpu_cap(X86_FEATURE_TSC); + } +} + +static int __init numaq_pre_time_init(void) +{ + numaq_tsc_disable(); + return 0; +} + +int found_numaq; +/* + * Have to match translation table entries to main table entries by counter + * hence the mpc_record variable .... can't see a less disgusting way of + * doing this .... + */ +struct mpc_config_translation { + unsigned char mpc_type; + unsigned char trans_len; + unsigned char trans_type; + unsigned char trans_quad; + unsigned char trans_global; + unsigned char trans_local; + unsigned short trans_reserved; +}; + +/* x86_quirks member */ +static int mpc_record; +static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] + __cpuinitdata; + +static inline int generate_logical_apicid(int quad, int phys_apicid) +{ + return (quad << 4) + (phys_apicid ? phys_apicid << 1 : 1); +} + +/* x86_quirks member */ +static int mpc_apic_id(struct mpc_config_processor *m) +{ + int quad = translation_table[mpc_record]->trans_quad; + int logical_apicid = generate_logical_apicid(quad, m->mpc_apicid); + + printk(KERN_DEBUG "Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n", + m->mpc_apicid, + (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8, + (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4, + m->mpc_apicver, quad, logical_apicid); + return logical_apicid; +} + +int mp_bus_id_to_node[MAX_MP_BUSSES]; + +int mp_bus_id_to_local[MAX_MP_BUSSES]; + +/* x86_quirks member */ +static void mpc_oem_bus_info(struct mpc_config_bus *m, char *name) +{ + int quad = translation_table[mpc_record]->trans_quad; + int local = translation_table[mpc_record]->trans_local; + + mp_bus_id_to_node[m->mpc_busid] = quad; + mp_bus_id_to_local[m->mpc_busid] = local; + printk(KERN_INFO "Bus #%d is %s (node %d)\n", + m->mpc_busid, name, quad); +} + +int quad_local_to_mp_bus_id [NR_CPUS/4][4]; + +/* x86_quirks member */ +static void mpc_oem_pci_bus(struct mpc_config_bus *m) +{ + int quad = translation_table[mpc_record]->trans_quad; + int local = translation_table[mpc_record]->trans_local; + + quad_local_to_mp_bus_id[quad][local] = m->mpc_busid; +} + +static void __init MP_translation_info(struct mpc_config_translation *m) +{ + printk(KERN_INFO + "Translation: record %d, type %d, quad %d, global %d, local %d\n", + mpc_record, m->trans_type, m->trans_quad, m->trans_global, + m->trans_local); + + if (mpc_record >= MAX_MPC_ENTRY) + printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n"); + else + translation_table[mpc_record] = m; /* stash this for later */ + if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad)) + node_set_online(m->trans_quad); +} + +static int __init mpf_checksum(unsigned char *mp, int len) +{ + int sum = 0; + + while (len--) + sum += *mp++; + + return sum & 0xFF; +} + +/* + * Read/parse the MPC oem tables + */ + +static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable, + unsigned short oemsize) +{ + int count = sizeof(*oemtable); /* the header size */ + unsigned char *oemptr = ((unsigned char *)oemtable) + count; + + mpc_record = 0; + printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n", + oemtable); + if (memcmp(oemtable->oem_signature, MPC_OEM_SIGNATURE, 4)) { + printk(KERN_WARNING + "SMP mpc oemtable: bad signature [%c%c%c%c]!\n", + oemtable->oem_signature[0], oemtable->oem_signature[1], + oemtable->oem_signature[2], oemtable->oem_signature[3]); + return; + } + if (mpf_checksum((unsigned char *)oemtable, oemtable->oem_length)) { + printk(KERN_WARNING "SMP oem mptable: checksum error!\n"); + return; + } + while (count < oemtable->oem_length) { + switch (*oemptr) { + case MP_TRANSLATION: + { + struct mpc_config_translation *m = + (struct mpc_config_translation *)oemptr; + MP_translation_info(m); + oemptr += sizeof(*m); + count += sizeof(*m); + ++mpc_record; + break; + } + default: + { + printk(KERN_WARNING + "Unrecognised OEM table entry type! - %d\n", + (int)*oemptr); + return; + } + } + } +} + +static struct x86_quirks numaq_x86_quirks __initdata = { + .arch_pre_time_init = numaq_pre_time_init, + .arch_time_init = NULL, + .arch_pre_intr_init = NULL, + .arch_memory_setup = NULL, + .arch_intr_init = NULL, + .arch_trap_init = NULL, + .mach_get_smp_config = NULL, + .mach_find_smp_config = NULL, + .mpc_record = &mpc_record, + .mpc_apic_id = mpc_apic_id, + .mpc_oem_bus_info = mpc_oem_bus_info, + .mpc_oem_pci_bus = mpc_oem_pci_bus, + .smp_read_mpc_oem = smp_read_mpc_oem, +}; + +void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem, + char *productid) +{ + if (strncmp(oem, "IBM NUMA", 8)) + printk("Warning! Not a NUMA-Q system!\n"); + else + found_numaq = 1; +} + static __init void early_check_numaq(void) { /* @@ -82,6 +265,9 @@ static __init void early_check_numaq(void) */ if (smp_found_config) early_get_smp_config(); + + if (found_numaq) + x86_quirks = &numaq_x86_quirks; } int __init get_memcfg_numaq(void) @@ -92,14 +278,3 @@ int __init get_memcfg_numaq(void) smp_dump_qct(); return 1; } - -void __init numaq_tsc_disable(void) -{ - if (!found_numaq) - return; - - if (num_online_nodes() > 1) { - printk(KERN_DEBUG "NUMAQ: disabling TSC\n"); - setup_clear_cpu_cap(X86_FEATURE_TSC); - } -} diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index e0f571d58c19..b4564d089b43 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -29,6 +29,7 @@ #include <asm/desc.h> #include <asm/setup.h> #include <asm/arch_hooks.h> +#include <asm/pgtable.h> #include <asm/time.h> #include <asm/pgalloc.h> #include <asm/irq.h> @@ -361,7 +362,6 @@ struct pv_cpu_ops pv_cpu_ops = { struct pv_apic_ops pv_apic_ops = { #ifdef CONFIG_X86_LOCAL_APIC .apic_write = native_apic_write, - .apic_write_atomic = native_apic_write_atomic, .apic_read = native_apic_read, .setup_boot_clock = setup_boot_APIC_clock, .setup_secondary_clock = setup_secondary_APIC_clock, @@ -373,6 +373,9 @@ struct pv_mmu_ops pv_mmu_ops = { #ifndef CONFIG_X86_64 .pagetable_setup_start = native_pagetable_setup_start, .pagetable_setup_done = native_pagetable_setup_done, +#else + .pagetable_setup_start = paravirt_nop, + .pagetable_setup_done = paravirt_nop, #endif .read_cr2 = native_read_cr2, diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 6959b5c45df4..151f2d171f7c 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -36,7 +36,7 @@ #include <linux/delay.h> #include <linux/scatterlist.h> #include <linux/iommu-helper.h> -#include <asm/gart.h> +#include <asm/iommu.h> #include <asm/calgary.h> #include <asm/tce.h> #include <asm/pci-direct.h> diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 8467ec2320f1..a4213c00dffc 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -5,12 +5,11 @@ #include <asm/proto.h> #include <asm/dma.h> -#include <asm/gart.h> +#include <asm/iommu.h> #include <asm/calgary.h> #include <asm/amd_iommu.h> -int forbid_dac __read_mostly; -EXPORT_SYMBOL(forbid_dac); +static int forbid_dac __read_mostly; const struct dma_mapping_ops *dma_ops; EXPORT_SYMBOL(dma_ops); @@ -114,21 +113,15 @@ void __init pci_iommu_alloc(void) * The order of these functions is important for * fall-back/fail-over reasons */ -#ifdef CONFIG_GART_IOMMU gart_iommu_hole_init(); -#endif -#ifdef CONFIG_CALGARY_IOMMU detect_calgary(); -#endif detect_intel_iommu(); amd_iommu_detect(); -#ifdef CONFIG_SWIOTLB pci_swiotlb_init(); -#endif } #endif @@ -184,9 +177,7 @@ static __init int iommu_setup(char *p) swiotlb = 1; #endif -#ifdef CONFIG_GART_IOMMU gart_parse_options(p); -#endif #ifdef CONFIG_CALGARY_IOMMU if (!strncmp(p, "calgary", 7)) @@ -500,17 +491,13 @@ EXPORT_SYMBOL(dma_free_coherent); static int __init pci_iommu_init(void) { -#ifdef CONFIG_CALGARY_IOMMU calgary_iommu_init(); -#endif intel_iommu_init(); amd_iommu_init(); -#ifdef CONFIG_GART_IOMMU gart_iommu_init(); -#endif no_iommu_init(); return 0; diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index c3fe78406d18..be60961f8695 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -32,6 +32,7 @@ #include <asm/mtrr.h> #include <asm/pgtable.h> #include <asm/proto.h> +#include <asm/iommu.h> #include <asm/gart.h> #include <asm/cacheflush.h> #include <asm/swiotlb.h> diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index aec43d56f49c..792b9179eff3 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -7,7 +7,7 @@ #include <linux/dma-mapping.h> #include <linux/scatterlist.h> -#include <asm/gart.h> +#include <asm/iommu.h> #include <asm/processor.h> #include <asm/dma.h> diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c index 82299cd1d04d..20df839b9c20 100644 --- a/arch/x86/kernel/pci-swiotlb_64.c +++ b/arch/x86/kernel/pci-swiotlb_64.c @@ -5,7 +5,7 @@ #include <linux/module.h> #include <linux/dma-mapping.h> -#include <asm/gart.h> +#include <asm/iommu.h> #include <asm/swiotlb.h> #include <asm/dma.h> diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 4d629c62f4f8..7fc4d5b0a6a0 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -15,6 +15,7 @@ unsigned long idle_nomwait; EXPORT_SYMBOL(idle_nomwait); struct kmem_cache *task_xstate_cachep; +static int force_mwait __cpuinitdata; int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) { @@ -199,6 +200,7 @@ static void poll_idle(void) * * idle=mwait overrides this decision and forces the usage of mwait. */ +static int __cpuinitdata force_mwait; #define MWAIT_INFO 0x05 #define MWAIT_ECX_EXTENDED_INFO 0x01 @@ -326,6 +328,9 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) static int __init idle_setup(char *str) { + if (!str) + return -EINVAL; + if (!strcmp(str, "poll")) { printk("using polling idle threads.\n"); pm_idle = poll_idle; diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index a8e53626ac9a..e8a8e1b99817 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -537,8 +537,8 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct *next_p) { - struct thread_struct *prev = &prev_p->thread, - *next = &next_p->thread; + struct thread_struct *prev = &prev_p->thread; + struct thread_struct *next = &next_p->thread; int cpu = smp_processor_id(); struct tss_struct *tss = &per_cpu(init_tss, cpu); unsigned fsindex, gsindex; @@ -586,35 +586,34 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) /* * Switch FS and GS. + * + * Segment register != 0 always requires a reload. Also + * reload when it has changed. When prev process used 64bit + * base always reload to avoid an information leak. */ - { - /* segment register != 0 always requires a reload. - also reload when it has changed. - when prev process used 64bit base always reload - to avoid an information leak. */ - if (unlikely(fsindex | next->fsindex | prev->fs)) { - loadsegment(fs, next->fsindex); - /* check if the user used a selector != 0 - * if yes clear 64bit base, since overloaded base - * is always mapped to the Null selector - */ - if (fsindex) + if (unlikely(fsindex | next->fsindex | prev->fs)) { + loadsegment(fs, next->fsindex); + /* + * Check if the user used a selector != 0; if yes + * clear 64bit base, since overloaded base is always + * mapped to the Null selector + */ + if (fsindex) prev->fs = 0; - } - /* when next process has a 64bit base use it */ - if (next->fs) - wrmsrl(MSR_FS_BASE, next->fs); - prev->fsindex = fsindex; - - if (unlikely(gsindex | next->gsindex | prev->gs)) { - load_gs_index(next->gsindex); - if (gsindex) + } + /* when next process has a 64bit base use it */ + if (next->fs) + wrmsrl(MSR_FS_BASE, next->fs); + prev->fsindex = fsindex; + + if (unlikely(gsindex | next->gsindex | prev->gs)) { + load_gs_index(next->gsindex); + if (gsindex) prev->gs = 0; - } - if (next->gs) - wrmsrl(MSR_KERNEL_GS_BASE, next->gs); - prev->gsindex = gsindex; } + if (next->gs) + wrmsrl(MSR_KERNEL_GS_BASE, next->gs); + prev->gsindex = gsindex; /* Must be after DS reload */ unlazy_fpu(prev_p); @@ -627,7 +626,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) write_pda(pcurrent, next_p); write_pda(kernelstack, - (unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET); + (unsigned long)task_stack_page(next_p) + + THREAD_SIZE - PDA_STACKOFFSET); #ifdef CONFIG_CC_STACKPROTECTOR write_pda(stack_canary, next_p->stack_canary); /* diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 77040b6070e1..e37dccce85db 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -1357,8 +1357,6 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task) #endif } -#ifdef CONFIG_X86_32 - void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code) { struct siginfo info; @@ -1377,89 +1375,10 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code) force_sig_info(SIGTRAP, &info, tsk); } -/* notification of system call entry/exit - * - triggered by current->work.syscall_trace - */ -int do_syscall_trace(struct pt_regs *regs, int entryexit) -{ - int is_sysemu = test_thread_flag(TIF_SYSCALL_EMU); - /* - * With TIF_SYSCALL_EMU set we want to ignore TIF_SINGLESTEP for syscall - * interception - */ - int is_singlestep = !is_sysemu && test_thread_flag(TIF_SINGLESTEP); - int ret = 0; - - /* do the secure computing check first */ - if (!entryexit) - secure_computing(regs->orig_ax); - - if (unlikely(current->audit_context)) { - if (entryexit) - audit_syscall_exit(AUDITSC_RESULT(regs->ax), - regs->ax); - /* Debug traps, when using PTRACE_SINGLESTEP, must be sent only - * on the syscall exit path. Normally, when TIF_SYSCALL_AUDIT is - * not used, entry.S will call us only on syscall exit, not - * entry; so when TIF_SYSCALL_AUDIT is used we must avoid - * calling send_sigtrap() on syscall entry. - * - * Note that when PTRACE_SYSEMU_SINGLESTEP is used, - * is_singlestep is false, despite his name, so we will still do - * the correct thing. - */ - else if (is_singlestep) - goto out; - } - - if (!(current->ptrace & PT_PTRACED)) - goto out; - - /* If a process stops on the 1st tracepoint with SYSCALL_TRACE - * and then is resumed with SYSEMU_SINGLESTEP, it will come in - * here. We have to check this and return */ - if (is_sysemu && entryexit) - return 0; - - /* Fake a debug trap */ - if (is_singlestep) - send_sigtrap(current, regs, 0); - - if (!test_thread_flag(TIF_SYSCALL_TRACE) && !is_sysemu) - goto out; - - /* the 0x80 provides a way for the tracing parent to distinguish - between a syscall stop and SIGTRAP delivery */ - /* Note that the debugger could change the result of test_thread_flag!*/ - ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) ? 0x80:0)); - - /* - * this isn't the same as continuing with a signal, but it will do - * for normal use. strace only continues with a signal if the - * stopping signal is not SIGTRAP. -brl - */ - if (current->exit_code) { - send_sig(current->exit_code, current, 1); - current->exit_code = 0; - } - ret = is_sysemu; -out: - if (unlikely(current->audit_context) && !entryexit) - audit_syscall_entry(AUDIT_ARCH_I386, regs->orig_ax, - regs->bx, regs->cx, regs->dx, regs->si); - if (ret == 0) - return 0; - - regs->orig_ax = -1; /* force skip of syscall restarting */ - if (unlikely(current->audit_context)) - audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax); - return 1; -} - -#else /* CONFIG_X86_64 */ - static void syscall_trace(struct pt_regs *regs) { + if (!(current->ptrace & PT_PTRACED)) + return; #if 0 printk("trace %s ip %lx sp %lx ax %d origrax %d caller %lx tiflags %x ptrace %x\n", @@ -1481,39 +1400,81 @@ static void syscall_trace(struct pt_regs *regs) } } -asmlinkage void syscall_trace_enter(struct pt_regs *regs) +#ifdef CONFIG_X86_32 +# define IS_IA32 1 +#elif defined CONFIG_IA32_EMULATION +# define IS_IA32 test_thread_flag(TIF_IA32) +#else +# define IS_IA32 0 +#endif + +/* + * We must return the syscall number to actually look up in the table. + * This can be -1L to skip running any syscall at all. + */ +asmregparm long syscall_trace_enter(struct pt_regs *regs) { + long ret = 0; + + /* + * If we stepped into a sysenter/syscall insn, it trapped in + * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP. + * If user-mode had set TF itself, then it's still clear from + * do_debug() and we need to set it again to restore the user + * state. If we entered on the slow path, TF was already set. + */ + if (test_thread_flag(TIF_SINGLESTEP)) + regs->flags |= X86_EFLAGS_TF; + /* do the secure computing check first */ secure_computing(regs->orig_ax); - if (test_thread_flag(TIF_SYSCALL_TRACE) - && (current->ptrace & PT_PTRACED)) + if (unlikely(test_thread_flag(TIF_SYSCALL_EMU))) + ret = -1L; + + if (ret || test_thread_flag(TIF_SYSCALL_TRACE)) syscall_trace(regs); if (unlikely(current->audit_context)) { - if (test_thread_flag(TIF_IA32)) { + if (IS_IA32) audit_syscall_entry(AUDIT_ARCH_I386, regs->orig_ax, regs->bx, regs->cx, regs->dx, regs->si); - } else { +#ifdef CONFIG_X86_64 + else audit_syscall_entry(AUDIT_ARCH_X86_64, regs->orig_ax, regs->di, regs->si, regs->dx, regs->r10); - } +#endif } + + return ret ?: regs->orig_ax; } -asmlinkage void syscall_trace_leave(struct pt_regs *regs) +asmregparm void syscall_trace_leave(struct pt_regs *regs) { if (unlikely(current->audit_context)) audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax); - if ((test_thread_flag(TIF_SYSCALL_TRACE) - || test_thread_flag(TIF_SINGLESTEP)) - && (current->ptrace & PT_PTRACED)) + if (test_thread_flag(TIF_SYSCALL_TRACE)) syscall_trace(regs); -} -#endif /* CONFIG_X86_32 */ + /* + * If TIF_SYSCALL_EMU is set, we only get here because of + * TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP). + * We already reported this syscall instruction in + * syscall_trace_enter(), so don't do any more now. + */ + if (unlikely(test_thread_flag(TIF_SYSCALL_EMU))) + return; + + /* + * If we are single-stepping, synthesize a trap to follow the + * system call instruction. + */ + if (test_thread_flag(TIF_SINGLESTEP) && + (current->ptrace & PT_PTRACED)) + send_sigtrap(current, regs, 0); +} diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index f8a62160e151..9dcf39c02972 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -177,6 +177,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 2400"), }, }, + { /* Handle problems with rebooting on Dell T5400's */ + .callback = set_bios_reboot, + .ident = "Dell Precision T5400", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Precision WorkStation T5400"), + }, + }, { /* Handle problems with rebooting on HP laptops */ .callback = set_bios_reboot, .ident = "HP Compaq Laptop", diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 531b55b8e81a..ec952aa5394a 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -57,12 +57,8 @@ #include <linux/slab.h> #include <linux/user.h> #include <linux/delay.h> -#include <linux/highmem.h> #include <linux/kallsyms.h> -#include <linux/edd.h> -#include <linux/iscsi_ibft.h> -#include <linux/kexec.h> #include <linux/cpufreq.h> #include <linux/dma-mapping.h> #include <linux/ctype.h> @@ -96,7 +92,7 @@ #include <asm/smp.h> #include <asm/desc.h> #include <asm/dma.h> -#include <asm/gart.h> +#include <asm/iommu.h> #include <asm/mmu_context.h> #include <asm/proto.h> @@ -104,7 +100,6 @@ #include <asm/paravirt.h> #include <asm/percpu.h> -#include <asm/sections.h> #include <asm/topology.h> #include <asm/apicdef.h> #ifdef CONFIG_X86_64 @@ -579,6 +574,10 @@ static int __init setup_elfcorehdr(char *arg) early_param("elfcorehdr", setup_elfcorehdr); #endif +static struct x86_quirks default_x86_quirks __initdata; + +struct x86_quirks *x86_quirks __initdata = &default_x86_quirks; + /* * Determine if we were loaded by an EFI loader. If so, then we have also been * passed the efi memmap, systab, etc., so we should use these data structures @@ -824,7 +823,10 @@ void __init setup_arch(char **cmdline_p) vmi_init(); #endif + paravirt_pagetable_setup_start(swapper_pg_dir); paging_init(); + paravirt_pagetable_setup_done(swapper_pg_dir); + paravirt_post_allocator_init(); #ifdef CONFIG_X86_64 map_vsyscall(); @@ -854,14 +856,6 @@ void __init setup_arch(char **cmdline_p) init_cpu_to_node(); #endif -#ifdef CONFIG_X86_NUMAQ - /* - * need to check online nodes num, call it - * here before time_init/tsc_init - */ - numaq_tsc_disable(); -#endif - init_apic_mappings(); ioapic_init_mappings(); diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index d92373630963..07faaa5109cb 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -212,7 +212,7 @@ asmlinkage unsigned long sys_sigreturn(unsigned long __unused) badframe: if (show_unhandled_signals && printk_ratelimit()) { - printk(KERN_INFO "%s%s[%d] bad frame in sigreturn frame:" + printk("%s%s[%d] bad frame in sigreturn frame:" "%p ip:%lx sp:%lx oeax:%lx", task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG, current->comm, task_pid_nr(current), frame, regs->ip, @@ -657,12 +657,6 @@ static void do_signal(struct pt_regs *regs) void do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) { - /* Pending single-step? */ - if (thread_info_flags & _TIF_SINGLESTEP) { - regs->flags |= X86_EFLAGS_TF; - clear_thread_flag(TIF_SINGLESTEP); - } - /* deal with pending signal delivery */ if (thread_info_flags & _TIF_SIGPENDING) do_signal(regs); diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index e53b267662e7..bf87684474f1 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -487,12 +487,6 @@ static void do_signal(struct pt_regs *regs) void do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) { - /* Pending single-step? */ - if (thread_info_flags & _TIF_SINGLESTEP) { - regs->flags |= X86_EFLAGS_TF; - clear_thread_flag(TIF_SINGLESTEP); - } - #ifdef CONFIG_X86_MCE /* notify userspace of pending MCEs */ if (thread_info_flags & _TIF_MCE_NOTIFY) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 687376ab07e8..27640196eb7c 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -546,8 +546,8 @@ static inline void __inquire_remote_apic(int apicid) printk(KERN_CONT "a previous APIC delivery may have failed\n"); - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid)); - apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]); + apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(apicid)); + apic_write(APIC_ICR, APIC_DM_REMRD | regs[i]); timeout = 0; do { @@ -579,11 +579,11 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) int maxlvt; /* Target chip */ - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid)); + apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid)); /* Boot on the stack */ /* Kick the second */ - apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL); + apic_write(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL); Dprintk("Waiting for send to finish...\n"); send_status = safe_apic_wait_icr_idle(); @@ -592,14 +592,9 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) * Give the other CPU some time to accept the IPI. */ udelay(200); - /* - * Due to the Pentium erratum 3AP. - */ maxlvt = lapic_get_maxlvt(); - if (maxlvt > 3) { - apic_read_around(APIC_SPIV); + if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ apic_write(APIC_ESR, 0); - } accept_status = (apic_read(APIC_ESR) & 0xEF); Dprintk("NMI sent.\n"); @@ -625,12 +620,14 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) return send_status; } + maxlvt = lapic_get_maxlvt(); + /* * Be paranoid about clearing APIC errors. */ if (APIC_INTEGRATED(apic_version[phys_apicid])) { - apic_read_around(APIC_SPIV); - apic_write(APIC_ESR, 0); + if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ + apic_write(APIC_ESR, 0); apic_read(APIC_ESR); } @@ -639,13 +636,13 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) /* * Turn INIT on target chip */ - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); + apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); /* * Send IPI */ - apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT - | APIC_DM_INIT); + apic_write(APIC_ICR, + APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT); Dprintk("Waiting for send to finish...\n"); send_status = safe_apic_wait_icr_idle(); @@ -655,10 +652,10 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) Dprintk("Deasserting INIT.\n"); /* Target chip */ - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); + apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); /* Send IPI */ - apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT); + apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT); Dprintk("Waiting for send to finish...\n"); send_status = safe_apic_wait_icr_idle(); @@ -689,12 +686,10 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) */ Dprintk("#startup loops: %d.\n", num_starts); - maxlvt = lapic_get_maxlvt(); - for (j = 1; j <= num_starts; j++) { Dprintk("Sending STARTUP #%d.\n", j); - apic_read_around(APIC_SPIV); - apic_write(APIC_ESR, 0); + if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ + apic_write(APIC_ESR, 0); apic_read(APIC_ESR); Dprintk("After apic_write.\n"); @@ -703,12 +698,11 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) */ /* Target chip */ - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); + apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); /* Boot on the stack */ /* Kick the second */ - apic_write_around(APIC_ICR, APIC_DM_STARTUP - | (start_eip >> 12)); + apic_write(APIC_ICR, APIC_DM_STARTUP | (start_eip >> 12)); /* * Give the other CPU some time to accept the IPI. @@ -724,13 +718,8 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) * Give the other CPU some time to accept the IPI. */ udelay(200); - /* - * Due to the Pentium erratum 3AP. - */ - if (maxlvt > 3) { - apic_read_around(APIC_SPIV); + if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ apic_write(APIC_ESR, 0); - } accept_status = (apic_read(APIC_ESR) & 0xEF); if (send_status || accept_status) break; @@ -768,7 +757,7 @@ static void __cpuinit do_fork_idle(struct work_struct *work) * * Must be called after the _cpu_pda pointer table is initialized. */ -static int __cpuinit get_local_pda(int cpu) +int __cpuinit get_local_pda(int cpu) { struct x8664_pda *oldpda, *newpda; unsigned long size = sizeof(struct x8664_pda); @@ -1311,7 +1300,7 @@ static void __ref remove_cpu_from_maps(int cpu) cpu_clear(cpu, cpu_callout_map); cpu_clear(cpu, cpu_callin_map); /* was set by cpu_init() */ - clear_bit(cpu, (unsigned long *)&cpu_initialized); + cpu_clear(cpu, cpu_initialized); numa_remove_cpu(cpu); } @@ -1390,7 +1379,8 @@ static int __init parse_maxcpus(char *arg) { extern unsigned int maxcpus; - maxcpus = simple_strtoul(arg, NULL, 0); + if (arg) + maxcpus = simple_strtoul(arg, NULL, 0); return 0; } early_param("maxcpus", parse_maxcpus); diff --git a/arch/x86/kernel/smpcommon_32.c b/arch/x86/kernel/smpcommon_32.c deleted file mode 100644 index 8b137891791f..000000000000 --- a/arch/x86/kernel/smpcommon_32.c +++ /dev/null @@ -1 +0,0 @@ - diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index 92c20fee6781..e8b9863ef8c4 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c @@ -105,6 +105,20 @@ static int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs) static int enable_single_step(struct task_struct *child) { struct pt_regs *regs = task_pt_regs(child); + unsigned long oflags; + + /* + * If we stepped into a sysenter/syscall insn, it trapped in + * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP. + * If user-mode had set TF itself, then it's still clear from + * do_debug() and we need to set it again to restore the user + * state so we don't wrongly set TIF_FORCED_TF below. + * If enable_single_step() was used last and that is what + * set TIF_SINGLESTEP, then both TF and TIF_FORCED_TF are + * already set and our bookkeeping is fine. + */ + if (unlikely(test_tsk_thread_flag(child, TIF_SINGLESTEP))) + regs->flags |= X86_EFLAGS_TF; /* * Always set TIF_SINGLESTEP - this guarantees that @@ -113,11 +127,7 @@ static int enable_single_step(struct task_struct *child) */ set_tsk_thread_flag(child, TIF_SINGLESTEP); - /* - * If TF was already set, don't do anything else - */ - if (regs->flags & X86_EFLAGS_TF) - return 0; + oflags = regs->flags; /* Set TF on the kernel stack.. */ regs->flags |= X86_EFLAGS_TF; @@ -126,9 +136,22 @@ static int enable_single_step(struct task_struct *child) * ..but if TF is changed by the instruction we will trace, * don't mark it as being "us" that set it, so that we * won't clear it by hand later. + * + * Note that if we don't actually execute the popf because + * of a signal arriving right now or suchlike, we will lose + * track of the fact that it really was "us" that set it. */ - if (is_setting_trap_flag(child, regs)) + if (is_setting_trap_flag(child, regs)) { + clear_tsk_thread_flag(child, TIF_FORCED_TF); return 0; + } + + /* + * If TF was already set, check whether it was us who set it. + * If not, we should never attempt a block step. + */ + if (oflags & X86_EFLAGS_TF) + return test_tsk_thread_flag(child, TIF_FORCED_TF); set_tsk_thread_flag(child, TIF_FORCED_TF); diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c index 059ca6ee59b4..ffe3c664afc0 100644 --- a/arch/x86/kernel/time_32.c +++ b/arch/x86/kernel/time_32.c @@ -129,6 +129,7 @@ void __init hpet_time_init(void) */ void __init time_init(void) { + pre_time_init_hook(); tsc_init(); late_time_init = choose_time_init(); } diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index e94bdb6add1d..41e01b145c48 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c @@ -73,7 +73,7 @@ int is_visws_box(void) return visws_board_type >= 0; } -static int __init visws_time_init_quirk(void) +static int __init visws_time_init(void) { printk(KERN_INFO "Starting Cobalt Timer system clock\n"); @@ -93,7 +93,7 @@ static int __init visws_time_init_quirk(void) return 0; } -static int __init visws_pre_intr_init_quirk(void) +static int __init visws_pre_intr_init(void) { init_VISWS_APIC_irqs(); @@ -114,7 +114,7 @@ EXPORT_SYMBOL(sgivwfb_mem_size); long long mem_size __initdata = 0; -static char * __init visws_memory_setup_quirk(void) +static char * __init visws_memory_setup(void) { long long gfx_mem_size = 8 * MB; @@ -176,7 +176,7 @@ static void visws_machine_power_off(void) outl(PIIX_SPECIAL_STOP, 0xCFC); } -static int __init visws_get_smp_config_quirk(unsigned int early) +static int __init visws_get_smp_config(unsigned int early) { /* * Prevent MP-table parsing by the generic code: @@ -192,7 +192,7 @@ extern unsigned int __cpuinitdata maxcpus; * No problem for Linux. */ -static void __init MP_processor_info (struct mpc_config_processor *m) +static void __init MP_processor_info(struct mpc_config_processor *m) { int ver, logical_apicid; physid_mask_t apic_cpus; @@ -232,7 +232,7 @@ static void __init MP_processor_info (struct mpc_config_processor *m) apic_version[m->mpc_apicid] = ver; } -int __init visws_find_smp_config_quirk(unsigned int reserve) +static int __init visws_find_smp_config(unsigned int reserve) { struct mpc_config_processor *mp = phys_to_virt(CO_CPU_TAB_PHYS); unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS)); @@ -258,7 +258,17 @@ int __init visws_find_smp_config_quirk(unsigned int reserve) return 1; } -extern int visws_trap_init_quirk(void); +static int visws_trap_init(void); + +static struct x86_quirks visws_x86_quirks __initdata = { + .arch_time_init = visws_time_init, + .arch_pre_intr_init = visws_pre_intr_init, + .arch_memory_setup = visws_memory_setup, + .arch_intr_init = NULL, + .arch_trap_init = visws_trap_init, + .mach_get_smp_config = visws_get_smp_config, + .mach_find_smp_config = visws_find_smp_config, +}; void __init visws_early_detect(void) { @@ -272,16 +282,10 @@ void __init visws_early_detect(void) /* * Install special quirks for timer, interrupt and memory setup: - */ - arch_time_init_quirk = visws_time_init_quirk; - arch_pre_intr_init_quirk = visws_pre_intr_init_quirk; - arch_memory_setup_quirk = visws_memory_setup_quirk; - - /* * Fall back to generic behavior for traps: + * Override generic MP-table parsing: */ - arch_intr_init_quirk = NULL; - arch_trap_init_quirk = visws_trap_init_quirk; + x86_quirks = &visws_x86_quirks; /* * Install reboot quirks: @@ -294,12 +298,6 @@ void __init visws_early_detect(void) */ no_broadcast = 0; - /* - * Override generic MP-table parsing: - */ - mach_get_smp_config_quirk = visws_get_smp_config_quirk; - mach_find_smp_config_quirk = visws_find_smp_config_quirk; - #ifdef CONFIG_X86_IO_APIC /* * Turn off IO-APIC detection and initialization: @@ -426,7 +424,7 @@ static __init void cobalt_init(void) co_apic_read(CO_APIC_ID)); } -int __init visws_trap_init_quirk(void) +static int __init visws_trap_init(void) { lithium_init(); cobalt_init(); diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index b15346092b7b..0a1b1a9d922d 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -906,7 +906,6 @@ static inline int __init activate_vmi(void) #ifdef CONFIG_X86_LOCAL_APIC para_fill(pv_apic_ops.apic_read, APICRead); para_fill(pv_apic_ops.apic_write, APICWrite); - para_fill(pv_apic_ops.apic_write_atomic, APICWrite); #endif /* diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index c97d35c218db..d0e940bb6f40 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -2,7 +2,8 @@ # Makefile for Kernel-based Virtual Machine module # -common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o) +common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ + coalesced_mmio.o) ifeq ($(CONFIG_KVM_TRACE),y) common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o) endif diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 3829aa7b663f..c0f7872a9124 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -91,7 +91,7 @@ static void pit_set_gate(struct kvm *kvm, int channel, u32 val) c->gate = val; } -int pit_get_gate(struct kvm *kvm, int channel) +static int pit_get_gate(struct kvm *kvm, int channel) { WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock)); @@ -193,19 +193,16 @@ static void pit_latch_status(struct kvm *kvm, int channel) } } -int __pit_timer_fn(struct kvm_kpit_state *ps) +static int __pit_timer_fn(struct kvm_kpit_state *ps) { struct kvm_vcpu *vcpu0 = ps->pit->kvm->vcpus[0]; struct kvm_kpit_timer *pt = &ps->pit_timer; - atomic_inc(&pt->pending); - smp_mb__after_atomic_inc(); - if (vcpu0) { + if (!atomic_inc_and_test(&pt->pending)) set_bit(KVM_REQ_PENDING_TIMER, &vcpu0->requests); - if (waitqueue_active(&vcpu0->wq)) { - vcpu0->arch.mp_state = KVM_MP_STATE_RUNNABLE; - wake_up_interruptible(&vcpu0->wq); - } + if (vcpu0 && waitqueue_active(&vcpu0->wq)) { + vcpu0->arch.mp_state = KVM_MP_STATE_RUNNABLE; + wake_up_interruptible(&vcpu0->wq); } pt->timer.expires = ktime_add_ns(pt->timer.expires, pt->period); @@ -308,6 +305,7 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val) create_pit_timer(&ps->pit_timer, val, 0); break; case 2: + case 3: create_pit_timer(&ps->pit_timer, val, 1); break; default: @@ -459,7 +457,8 @@ static void pit_ioport_read(struct kvm_io_device *this, mutex_unlock(&pit_state->lock); } -static int pit_in_range(struct kvm_io_device *this, gpa_t addr) +static int pit_in_range(struct kvm_io_device *this, gpa_t addr, + int len, int is_write) { return ((addr >= KVM_PIT_BASE_ADDRESS) && (addr < KVM_PIT_BASE_ADDRESS + KVM_PIT_MEM_LENGTH)); @@ -500,7 +499,8 @@ static void speaker_ioport_read(struct kvm_io_device *this, mutex_unlock(&pit_state->lock); } -static int speaker_in_range(struct kvm_io_device *this, gpa_t addr) +static int speaker_in_range(struct kvm_io_device *this, gpa_t addr, + int len, int is_write) { return (addr == KVM_SPEAKER_BASE_ADDRESS); } @@ -575,7 +575,7 @@ void kvm_free_pit(struct kvm *kvm) } } -void __inject_pit_timer_intr(struct kvm *kvm) +static void __inject_pit_timer_intr(struct kvm *kvm) { mutex_lock(&kvm->lock); kvm_ioapic_set_irq(kvm->arch.vioapic, 0, 1); diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index ab29cf2def47..c31164e8aa46 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c @@ -130,8 +130,10 @@ void kvm_pic_set_irq(void *opaque, int irq, int level) { struct kvm_pic *s = opaque; - pic_set_irq1(&s->pics[irq >> 3], irq & 7, level); - pic_update_irq(s); + if (irq >= 0 && irq < PIC_NUM_PINS) { + pic_set_irq1(&s->pics[irq >> 3], irq & 7, level); + pic_update_irq(s); + } } /* @@ -346,7 +348,8 @@ static u32 elcr_ioport_read(void *opaque, u32 addr1) return s->elcr; } -static int picdev_in_range(struct kvm_io_device *this, gpa_t addr) +static int picdev_in_range(struct kvm_io_device *this, gpa_t addr, + int len, int is_write) { switch (addr) { case 0x20: diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index 2a15be2275c0..7ca47cbb48bb 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -30,6 +30,8 @@ #include "ioapic.h" #include "lapic.h" +#define PIC_NUM_PINS 16 + struct kvm; struct kvm_vcpu; diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index ebc03f5ae162..73f43de69f67 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -356,8 +356,9 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, case APIC_DM_SMI: printk(KERN_DEBUG "Ignoring guest SMI\n"); break; + case APIC_DM_NMI: - printk(KERN_DEBUG "Ignoring guest NMI\n"); + kvm_inject_nmi(vcpu); break; case APIC_DM_INIT: @@ -572,6 +573,8 @@ static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset) { u32 val = 0; + KVMTRACE_1D(APIC_ACCESS, apic->vcpu, (u32)offset, handler); + if (offset >= LAPIC_MMIO_LENGTH) return 0; @@ -695,6 +698,8 @@ static void apic_mmio_write(struct kvm_io_device *this, offset &= 0xff0; + KVMTRACE_1D(APIC_ACCESS, apic->vcpu, (u32)offset, handler); + switch (offset) { case APIC_ID: /* Local APIC ID */ apic_set_reg(apic, APIC_ID, val); @@ -780,7 +785,8 @@ static void apic_mmio_write(struct kvm_io_device *this, } -static int apic_mmio_range(struct kvm_io_device *this, gpa_t addr) +static int apic_mmio_range(struct kvm_io_device *this, gpa_t addr, + int len, int size) { struct kvm_lapic *apic = (struct kvm_lapic *)this->private; int ret = 0; @@ -939,8 +945,8 @@ static int __apic_timer_fn(struct kvm_lapic *apic) int result = 0; wait_queue_head_t *q = &apic->vcpu->wq; - atomic_inc(&apic->timer.pending); - set_bit(KVM_REQ_PENDING_TIMER, &apic->vcpu->requests); + if(!atomic_inc_and_test(&apic->timer.pending)) + set_bit(KVM_REQ_PENDING_TIMER, &apic->vcpu->requests); if (waitqueue_active(q)) { apic->vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; wake_up_interruptible(q); diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 676c396c9cee..81858881287e 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -31,6 +31,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu); u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu); void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8); void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value); +u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu); int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest); int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 7e7c3969f7a2..b0e4ddca6c18 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -66,7 +66,8 @@ static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg) {} #endif #if defined(MMU_DEBUG) || defined(AUDIT) -static int dbg = 1; +static int dbg = 0; +module_param(dbg, bool, 0644); #endif #ifndef MMU_DEBUG @@ -776,6 +777,15 @@ static void mmu_page_remove_parent_pte(struct kvm_mmu_page *sp, BUG(); } +static void nonpaging_prefetch_page(struct kvm_vcpu *vcpu, + struct kvm_mmu_page *sp) +{ + int i; + + for (i = 0; i < PT64_ENT_PER_PAGE; ++i) + sp->spt[i] = shadow_trap_nonpresent_pte; +} + static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn) { unsigned index; @@ -841,7 +851,10 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, hlist_add_head(&sp->hash_link, bucket); if (!metaphysical) rmap_write_protect(vcpu->kvm, gfn); - vcpu->arch.mmu.prefetch_page(vcpu, sp); + if (shadow_trap_nonpresent_pte != shadow_notrap_nonpresent_pte) + vcpu->arch.mmu.prefetch_page(vcpu, sp); + else + nonpaging_prefetch_page(vcpu, sp); return sp; } @@ -917,14 +930,17 @@ static void kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp) } kvm_mmu_page_unlink_children(kvm, sp); if (!sp->root_count) { - if (!sp->role.metaphysical) + if (!sp->role.metaphysical && !sp->role.invalid) unaccount_shadowed(kvm, sp->gfn); hlist_del(&sp->hash_link); kvm_mmu_free_page(kvm, sp); } else { + int invalid = sp->role.invalid; list_move(&sp->link, &kvm->arch.active_mmu_pages); sp->role.invalid = 1; kvm_reload_remote_mmus(kvm); + if (!sp->role.metaphysical && !invalid) + unaccount_shadowed(kvm, sp->gfn); } kvm_mmu_reset_last_pte_updated(kvm); } @@ -1103,7 +1119,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, mark_page_dirty(vcpu->kvm, gfn); pgprintk("%s: setting spte %llx\n", __func__, spte); - pgprintk("instantiating %s PTE (%s) at %d (%llx) addr %llx\n", + pgprintk("instantiating %s PTE (%s) at %ld (%llx) addr %p\n", (spte&PT_PAGE_SIZE_MASK)? "2MB" : "4kB", (spte&PT_WRITABLE_MASK)?"RW":"R", gfn, spte, shadow_pte); set_shadow_pte(shadow_pte, spte); @@ -1122,8 +1138,10 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, else kvm_release_pfn_clean(pfn); } - if (!ptwrite || !*ptwrite) + if (speculative) { vcpu->arch.last_pte_updated = shadow_pte; + vcpu->arch.last_pte_gfn = gfn; + } } static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) @@ -1171,9 +1189,10 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, return -ENOMEM; } - table[index] = __pa(new_table->spt) - | PT_PRESENT_MASK | PT_WRITABLE_MASK - | shadow_user_mask | shadow_x_mask; + set_shadow_pte(&table[index], + __pa(new_table->spt) + | PT_PRESENT_MASK | PT_WRITABLE_MASK + | shadow_user_mask | shadow_x_mask); } table_addr = table[index] & PT64_BASE_ADDR_MASK; } @@ -1211,15 +1230,6 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) } -static void nonpaging_prefetch_page(struct kvm_vcpu *vcpu, - struct kvm_mmu_page *sp) -{ - int i; - - for (i = 0; i < PT64_ENT_PER_PAGE; ++i) - sp->spt[i] = shadow_trap_nonpresent_pte; -} - static void mmu_free_roots(struct kvm_vcpu *vcpu) { int i; @@ -1671,6 +1681,18 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, vcpu->arch.update_pte.pfn = pfn; } +static void kvm_mmu_access_page(struct kvm_vcpu *vcpu, gfn_t gfn) +{ + u64 *spte = vcpu->arch.last_pte_updated; + + if (spte + && vcpu->arch.last_pte_gfn == gfn + && shadow_accessed_mask + && !(*spte & shadow_accessed_mask) + && is_shadow_present_pte(*spte)) + set_bit(PT_ACCESSED_SHIFT, (unsigned long *)spte); +} + void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new, int bytes) { @@ -1694,6 +1716,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); mmu_guess_page_from_pte_write(vcpu, gpa, new, bytes); spin_lock(&vcpu->kvm->mmu_lock); + kvm_mmu_access_page(vcpu, gfn); kvm_mmu_free_some_pages(vcpu); ++vcpu->kvm->stat.mmu_pte_write; kvm_mmu_audit(vcpu, "pre pte write"); @@ -1948,7 +1971,7 @@ void kvm_mmu_zap_all(struct kvm *kvm) kvm_flush_remote_tlbs(kvm); } -void kvm_mmu_remove_one_alloc_mmu_page(struct kvm *kvm) +static void kvm_mmu_remove_one_alloc_mmu_page(struct kvm *kvm) { struct kvm_mmu_page *page; @@ -1968,6 +1991,8 @@ static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask) list_for_each_entry(kvm, &vm_list, vm_list) { int npages; + if (!down_read_trylock(&kvm->slots_lock)) + continue; spin_lock(&kvm->mmu_lock); npages = kvm->arch.n_alloc_mmu_pages - kvm->arch.n_free_mmu_pages; @@ -1980,6 +2005,7 @@ static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask) nr_to_scan--; spin_unlock(&kvm->mmu_lock); + up_read(&kvm->slots_lock); } if (kvm_freed) list_move_tail(&kvm_freed->vm_list, &vm_list); diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 1730757bbc7a..258e5d56298e 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -15,7 +15,8 @@ #define PT_USER_MASK (1ULL << 2) #define PT_PWT_MASK (1ULL << 3) #define PT_PCD_MASK (1ULL << 4) -#define PT_ACCESSED_MASK (1ULL << 5) +#define PT_ACCESSED_SHIFT 5 +#define PT_ACCESSED_MASK (1ULL << PT_ACCESSED_SHIFT) #define PT_DIRTY_MASK (1ULL << 6) #define PT_PAGE_SIZE_MASK (1ULL << 7) #define PT_PAT_MASK (1ULL << 7) diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 934c7b619396..4d918220baeb 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -460,8 +460,9 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) { - int i, offset = 0, r = 0; - pt_element_t pt; + int i, j, offset, r; + pt_element_t pt[256 / sizeof(pt_element_t)]; + gpa_t pte_gpa; if (sp->role.metaphysical || (PTTYPE == 32 && sp->role.level > PT_PAGE_TABLE_LEVEL)) { @@ -469,19 +470,20 @@ static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu, return; } - if (PTTYPE == 32) + pte_gpa = gfn_to_gpa(sp->gfn); + if (PTTYPE == 32) { offset = sp->role.quadrant << PT64_LEVEL_BITS; + pte_gpa += offset * sizeof(pt_element_t); + } - for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { - gpa_t pte_gpa = gfn_to_gpa(sp->gfn); - pte_gpa += (i+offset) * sizeof(pt_element_t); - - r = kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &pt, - sizeof(pt_element_t)); - if (r || is_present_pte(pt)) - sp->spt[i] = shadow_trap_nonpresent_pte; - else - sp->spt[i] = shadow_notrap_nonpresent_pte; + for (i = 0; i < PT64_ENT_PER_PAGE; i += ARRAY_SIZE(pt)) { + r = kvm_read_guest_atomic(vcpu->kvm, pte_gpa, pt, sizeof pt); + pte_gpa += ARRAY_SIZE(pt) * sizeof(pt_element_t); + for (j = 0; j < ARRAY_SIZE(pt); ++j) + if (r || is_present_pte(pt[j])) + sp->spt[i+j] = shadow_trap_nonpresent_pte; + else + sp->spt[i+j] = shadow_notrap_nonpresent_pte; } } diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 6b0d5fa5bab3..b756e876dce3 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -27,6 +27,8 @@ #include <asm/desc.h> +#define __ex(x) __kvm_handle_fault_on_reboot(x) + MODULE_AUTHOR("Qumranet"); MODULE_LICENSE("GPL"); @@ -129,17 +131,17 @@ static inline void push_irq(struct kvm_vcpu *vcpu, u8 irq) static inline void clgi(void) { - asm volatile (SVM_CLGI); + asm volatile (__ex(SVM_CLGI)); } static inline void stgi(void) { - asm volatile (SVM_STGI); + asm volatile (__ex(SVM_STGI)); } static inline void invlpga(unsigned long addr, u32 asid) { - asm volatile (SVM_INVLPGA :: "a"(addr), "c"(asid)); + asm volatile (__ex(SVM_INVLPGA) :: "a"(addr), "c"(asid)); } static inline unsigned long kvm_read_cr2(void) @@ -270,19 +272,11 @@ static int has_svm(void) static void svm_hardware_disable(void *garbage) { - struct svm_cpu_data *svm_data - = per_cpu(svm_data, raw_smp_processor_id()); - - if (svm_data) { - uint64_t efer; + uint64_t efer; - wrmsrl(MSR_VM_HSAVE_PA, 0); - rdmsrl(MSR_EFER, efer); - wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK); - per_cpu(svm_data, raw_smp_processor_id()) = NULL; - __free_page(svm_data->save_area); - kfree(svm_data); - } + wrmsrl(MSR_VM_HSAVE_PA, 0); + rdmsrl(MSR_EFER, efer); + wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK); } static void svm_hardware_enable(void *garbage) @@ -321,6 +315,19 @@ static void svm_hardware_enable(void *garbage) page_to_pfn(svm_data->save_area) << PAGE_SHIFT); } +static void svm_cpu_uninit(int cpu) +{ + struct svm_cpu_data *svm_data + = per_cpu(svm_data, raw_smp_processor_id()); + + if (!svm_data) + return; + + per_cpu(svm_data, raw_smp_processor_id()) = NULL; + __free_page(svm_data->save_area); + kfree(svm_data); +} + static int svm_cpu_init(int cpu) { struct svm_cpu_data *svm_data; @@ -458,6 +465,11 @@ err: static __exit void svm_hardware_unsetup(void) { + int cpu; + + for_each_online_cpu(cpu) + svm_cpu_uninit(cpu); + __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER); iopm_base = 0; } @@ -707,10 +719,6 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu) rdtscll(vcpu->arch.host_tsc); } -static void svm_vcpu_decache(struct kvm_vcpu *vcpu) -{ -} - static void svm_cache_regs(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -949,7 +957,9 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *svm_data) static unsigned long svm_get_dr(struct kvm_vcpu *vcpu, int dr) { - return to_svm(vcpu)->db_regs[dr]; + unsigned long val = to_svm(vcpu)->db_regs[dr]; + KVMTRACE_2D(DR_READ, vcpu, (u32)dr, (u32)val, handler); + return val; } static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value, @@ -1004,6 +1014,16 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) fault_address = svm->vmcb->control.exit_info_2; error_code = svm->vmcb->control.exit_info_1; + + if (!npt_enabled) + KVMTRACE_3D(PAGE_FAULT, &svm->vcpu, error_code, + (u32)fault_address, (u32)(fault_address >> 32), + handler); + else + KVMTRACE_3D(TDP_FAULT, &svm->vcpu, error_code, + (u32)fault_address, (u32)(fault_address >> 32), + handler); + return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code); } @@ -1081,6 +1101,19 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port); } +static int nmi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +{ + KVMTRACE_0D(NMI, &svm->vcpu, handler); + return 1; +} + +static int intr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +{ + ++svm->vcpu.stat.irq_exits; + KVMTRACE_0D(INTR, &svm->vcpu, handler); + return 1; +} + static int nop_on_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) { return 1; @@ -1219,6 +1252,9 @@ static int rdmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) if (svm_get_msr(&svm->vcpu, ecx, &data)) kvm_inject_gp(&svm->vcpu, 0); else { + KVMTRACE_3D(MSR_READ, &svm->vcpu, ecx, (u32)data, + (u32)(data >> 32), handler); + svm->vmcb->save.rax = data & 0xffffffff; svm->vcpu.arch.regs[VCPU_REGS_RDX] = data >> 32; svm->next_rip = svm->vmcb->save.rip + 2; @@ -1284,16 +1320,19 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) case MSR_K7_EVNTSEL1: case MSR_K7_EVNTSEL2: case MSR_K7_EVNTSEL3: + case MSR_K7_PERFCTR0: + case MSR_K7_PERFCTR1: + case MSR_K7_PERFCTR2: + case MSR_K7_PERFCTR3: /* - * only support writing 0 to the performance counters for now - * to make Windows happy. Should be replaced by a real - * performance counter emulation later. + * Just discard all writes to the performance counters; this + * should keep both older linux and windows 64-bit guests + * happy */ - if (data != 0) - goto unhandled; + pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", ecx, data); + break; default: - unhandled: return kvm_set_msr_common(vcpu, ecx, data); } return 0; @@ -1304,6 +1343,10 @@ static int wrmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; u64 data = (svm->vmcb->save.rax & -1u) | ((u64)(svm->vcpu.arch.regs[VCPU_REGS_RDX] & -1u) << 32); + + KVMTRACE_3D(MSR_WRITE, &svm->vcpu, ecx, (u32)data, (u32)(data >> 32), + handler); + svm->next_rip = svm->vmcb->save.rip + 2; if (svm_set_msr(&svm->vcpu, ecx, data)) kvm_inject_gp(&svm->vcpu, 0); @@ -1323,6 +1366,8 @@ static int msr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) static int interrupt_window_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) { + KVMTRACE_0D(PEND_INTR, &svm->vcpu, handler); + svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_VINTR); svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; /* @@ -1364,8 +1409,8 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm, [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception, [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception, - [SVM_EXIT_INTR] = nop_on_interception, - [SVM_EXIT_NMI] = nop_on_interception, + [SVM_EXIT_INTR] = intr_interception, + [SVM_EXIT_NMI] = nmi_interception, [SVM_EXIT_SMI] = nop_on_interception, [SVM_EXIT_INIT] = nop_on_interception, [SVM_EXIT_VINTR] = interrupt_window_interception, @@ -1397,6 +1442,9 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) struct vcpu_svm *svm = to_svm(vcpu); u32 exit_code = svm->vmcb->control.exit_code; + KVMTRACE_3D(VMEXIT, vcpu, exit_code, (u32)svm->vmcb->save.rip, + (u32)((u64)svm->vmcb->save.rip >> 32), entryexit); + if (npt_enabled) { int mmu_reload = 0; if ((vcpu->arch.cr0 ^ svm->vmcb->save.cr0) & X86_CR0_PG) { @@ -1470,6 +1518,8 @@ static inline void svm_inject_irq(struct vcpu_svm *svm, int irq) { struct vmcb_control_area *control; + KVMTRACE_1D(INJ_VIRQ, &svm->vcpu, (u32)irq, handler); + control = &svm->vmcb->control; control->int_vector = irq; control->int_ctl &= ~V_INTR_PRIO_MASK; @@ -1660,9 +1710,9 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) sync_lapic_to_cr8(vcpu); save_host_msrs(vcpu); - fs_selector = read_fs(); - gs_selector = read_gs(); - ldt_selector = read_ldt(); + fs_selector = kvm_read_fs(); + gs_selector = kvm_read_gs(); + ldt_selector = kvm_read_ldt(); svm->host_cr2 = kvm_read_cr2(); svm->host_dr6 = read_dr6(); svm->host_dr7 = read_dr7(); @@ -1716,17 +1766,17 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) /* Enter guest mode */ "push %%rax \n\t" "mov %c[vmcb](%[svm]), %%rax \n\t" - SVM_VMLOAD "\n\t" - SVM_VMRUN "\n\t" - SVM_VMSAVE "\n\t" + __ex(SVM_VMLOAD) "\n\t" + __ex(SVM_VMRUN) "\n\t" + __ex(SVM_VMSAVE) "\n\t" "pop %%rax \n\t" #else /* Enter guest mode */ "push %%eax \n\t" "mov %c[vmcb](%[svm]), %%eax \n\t" - SVM_VMLOAD "\n\t" - SVM_VMRUN "\n\t" - SVM_VMSAVE "\n\t" + __ex(SVM_VMLOAD) "\n\t" + __ex(SVM_VMRUN) "\n\t" + __ex(SVM_VMSAVE) "\n\t" "pop %%eax \n\t" #endif @@ -1795,9 +1845,9 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) write_dr7(svm->host_dr7); kvm_write_cr2(svm->host_cr2); - load_fs(fs_selector); - load_gs(gs_selector); - load_ldt(ldt_selector); + kvm_load_fs(fs_selector); + kvm_load_gs(gs_selector); + kvm_load_ldt(ldt_selector); load_host_msrs(vcpu); reload_tss(vcpu); @@ -1889,7 +1939,6 @@ static struct kvm_x86_ops svm_x86_ops = { .prepare_guest_switch = svm_prepare_guest_switch, .vcpu_load = svm_vcpu_load, .vcpu_put = svm_vcpu_put, - .vcpu_decache = svm_vcpu_decache, .set_guest_debug = svm_guest_debug, .get_msr = svm_get_msr, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 10ce6ee4c491..0cac63701719 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -30,6 +30,8 @@ #include <asm/io.h> #include <asm/desc.h> +#define __ex(x) __kvm_handle_fault_on_reboot(x) + MODULE_AUTHOR("Qumranet"); MODULE_LICENSE("GPL"); @@ -53,6 +55,7 @@ struct vmcs { struct vcpu_vmx { struct kvm_vcpu vcpu; + struct list_head local_vcpus_link; int launched; u8 fail; u32 idt_vectoring_info; @@ -88,9 +91,11 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) } static int init_rmode(struct kvm *kvm); +static u64 construct_eptp(unsigned long root_hpa); static DEFINE_PER_CPU(struct vmcs *, vmxarea); static DEFINE_PER_CPU(struct vmcs *, current_vmcs); +static DEFINE_PER_CPU(struct list_head, vcpus_on_cpu); static struct page *vmx_io_bitmap_a; static struct page *vmx_io_bitmap_b; @@ -260,6 +265,11 @@ static inline int cpu_has_vmx_vpid(void) SECONDARY_EXEC_ENABLE_VPID); } +static inline int cpu_has_virtual_nmis(void) +{ + return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS; +} + static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr) { int i; @@ -278,7 +288,7 @@ static inline void __invvpid(int ext, u16 vpid, gva_t gva) u64 gva; } operand = { vpid, 0, gva }; - asm volatile (ASM_VMX_INVVPID + asm volatile (__ex(ASM_VMX_INVVPID) /* CF==1 or ZF==1 --> rc = -1 */ "; ja 1f ; ud2 ; 1:" : : "a"(&operand), "c"(ext) : "cc", "memory"); @@ -290,7 +300,7 @@ static inline void __invept(int ext, u64 eptp, gpa_t gpa) u64 eptp, gpa; } operand = {eptp, gpa}; - asm volatile (ASM_VMX_INVEPT + asm volatile (__ex(ASM_VMX_INVEPT) /* CF==1 or ZF==1 --> rc = -1 */ "; ja 1f ; ud2 ; 1:\n" : : "a" (&operand), "c" (ext) : "cc", "memory"); @@ -311,7 +321,7 @@ static void vmcs_clear(struct vmcs *vmcs) u64 phys_addr = __pa(vmcs); u8 error; - asm volatile (ASM_VMX_VMCLEAR_RAX "; setna %0" + asm volatile (__ex(ASM_VMX_VMCLEAR_RAX) "; setna %0" : "=g"(error) : "a"(&phys_addr), "m"(phys_addr) : "cc", "memory"); if (error) @@ -329,6 +339,9 @@ static void __vcpu_clear(void *arg) if (per_cpu(current_vmcs, cpu) == vmx->vmcs) per_cpu(current_vmcs, cpu) = NULL; rdtscll(vmx->vcpu.arch.host_tsc); + list_del(&vmx->local_vcpus_link); + vmx->vcpu.cpu = -1; + vmx->launched = 0; } static void vcpu_clear(struct vcpu_vmx *vmx) @@ -336,7 +349,6 @@ static void vcpu_clear(struct vcpu_vmx *vmx) if (vmx->vcpu.cpu == -1) return; smp_call_function_single(vmx->vcpu.cpu, __vcpu_clear, vmx, 1); - vmx->launched = 0; } static inline void vpid_sync_vcpu_all(struct vcpu_vmx *vmx) @@ -378,7 +390,7 @@ static unsigned long vmcs_readl(unsigned long field) { unsigned long value; - asm volatile (ASM_VMX_VMREAD_RDX_RAX + asm volatile (__ex(ASM_VMX_VMREAD_RDX_RAX) : "=a"(value) : "d"(field) : "cc"); return value; } @@ -413,7 +425,7 @@ static void vmcs_writel(unsigned long field, unsigned long value) { u8 error; - asm volatile (ASM_VMX_VMWRITE_RAX_RDX "; setna %0" + asm volatile (__ex(ASM_VMX_VMWRITE_RAX_RDX) "; setna %0" : "=q"(error) : "a"(value), "d"(field) : "cc"); if (unlikely(error)) vmwrite_error(field, value); @@ -431,10 +443,8 @@ static void vmcs_write32(unsigned long field, u32 value) static void vmcs_write64(unsigned long field, u64 value) { -#ifdef CONFIG_X86_64 - vmcs_writel(field, value); -#else vmcs_writel(field, value); +#ifndef CONFIG_X86_64 asm volatile (""); vmcs_writel(field+1, value >> 32); #endif @@ -474,7 +484,7 @@ static void reload_tss(void) struct descriptor_table gdt; struct desc_struct *descs; - get_gdt(&gdt); + kvm_get_gdt(&gdt); descs = (void *)gdt.base; descs[GDT_ENTRY_TSS].type = 9; /* available TSS */ load_TR_desc(); @@ -530,9 +540,9 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) * Set host fs and gs selectors. Unfortunately, 22.2.3 does not * allow segment selectors with cpl > 0 or ti == 1. */ - vmx->host_state.ldt_sel = read_ldt(); + vmx->host_state.ldt_sel = kvm_read_ldt(); vmx->host_state.gs_ldt_reload_needed = vmx->host_state.ldt_sel; - vmx->host_state.fs_sel = read_fs(); + vmx->host_state.fs_sel = kvm_read_fs(); if (!(vmx->host_state.fs_sel & 7)) { vmcs_write16(HOST_FS_SELECTOR, vmx->host_state.fs_sel); vmx->host_state.fs_reload_needed = 0; @@ -540,7 +550,7 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) vmcs_write16(HOST_FS_SELECTOR, 0); vmx->host_state.fs_reload_needed = 1; } - vmx->host_state.gs_sel = read_gs(); + vmx->host_state.gs_sel = kvm_read_gs(); if (!(vmx->host_state.gs_sel & 7)) vmcs_write16(HOST_GS_SELECTOR, vmx->host_state.gs_sel); else { @@ -576,15 +586,15 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx) ++vmx->vcpu.stat.host_state_reload; vmx->host_state.loaded = 0; if (vmx->host_state.fs_reload_needed) - load_fs(vmx->host_state.fs_sel); + kvm_load_fs(vmx->host_state.fs_sel); if (vmx->host_state.gs_ldt_reload_needed) { - load_ldt(vmx->host_state.ldt_sel); + kvm_load_ldt(vmx->host_state.ldt_sel); /* * If we have to reload gs, we must take care to * preserve our gs base. */ local_irq_save(flags); - load_gs(vmx->host_state.gs_sel); + kvm_load_gs(vmx->host_state.gs_sel); #ifdef CONFIG_X86_64 wrmsrl(MSR_GS_BASE, vmcs_readl(HOST_GS_BASE)); #endif @@ -617,13 +627,17 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) vcpu_clear(vmx); kvm_migrate_timers(vcpu); vpid_sync_vcpu_all(vmx); + local_irq_disable(); + list_add(&vmx->local_vcpus_link, + &per_cpu(vcpus_on_cpu, cpu)); + local_irq_enable(); } if (per_cpu(current_vmcs, cpu) != vmx->vmcs) { u8 error; per_cpu(current_vmcs, cpu) = vmx->vmcs; - asm volatile (ASM_VMX_VMPTRLD_RAX "; setna %0" + asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) "; setna %0" : "=g"(error) : "a"(&phys_addr), "m"(phys_addr) : "cc"); if (error) @@ -640,8 +654,8 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) * Linux uses per-cpu TSS and GDT, so set these when switching * processors. */ - vmcs_writel(HOST_TR_BASE, read_tr_base()); /* 22.2.4 */ - get_gdt(&dt); + vmcs_writel(HOST_TR_BASE, kvm_read_tr_base()); /* 22.2.4 */ + kvm_get_gdt(&dt); vmcs_writel(HOST_GDTR_BASE, dt.base); /* 22.2.4 */ rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp); @@ -684,11 +698,6 @@ static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu) update_exception_bitmap(vcpu); } -static void vmx_vcpu_decache(struct kvm_vcpu *vcpu) -{ - vcpu_clear(to_vmx(vcpu)); -} - static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) { return vmcs_readl(GUEST_RFLAGS); @@ -913,6 +922,18 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) case MSR_IA32_TIME_STAMP_COUNTER: guest_write_tsc(data); break; + case MSR_P6_PERFCTR0: + case MSR_P6_PERFCTR1: + case MSR_P6_EVNTSEL0: + case MSR_P6_EVNTSEL1: + /* + * Just discard all writes to the performance counters; this + * should keep both older linux and windows 64-bit guests + * happy + */ + pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", msr_index, data); + + break; default: vmx_load_host_state(vmx); msr = find_msr_entry(vmx, msr_index); @@ -1022,6 +1043,7 @@ static void hardware_enable(void *garbage) u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); u64 old; + INIT_LIST_HEAD(&per_cpu(vcpus_on_cpu, cpu)); rdmsrl(MSR_IA32_FEATURE_CONTROL, old); if ((old & (MSR_IA32_FEATURE_CONTROL_LOCKED | MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED)) @@ -1032,13 +1054,25 @@ static void hardware_enable(void *garbage) MSR_IA32_FEATURE_CONTROL_LOCKED | MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED); write_cr4(read_cr4() | X86_CR4_VMXE); /* FIXME: not cpu hotplug safe */ - asm volatile (ASM_VMX_VMXON_RAX : : "a"(&phys_addr), "m"(phys_addr) + asm volatile (ASM_VMX_VMXON_RAX + : : "a"(&phys_addr), "m"(phys_addr) : "memory", "cc"); } +static void vmclear_local_vcpus(void) +{ + int cpu = raw_smp_processor_id(); + struct vcpu_vmx *vmx, *n; + + list_for_each_entry_safe(vmx, n, &per_cpu(vcpus_on_cpu, cpu), + local_vcpus_link) + __vcpu_clear(vmx); +} + static void hardware_disable(void *garbage) { - asm volatile (ASM_VMX_VMXOFF : : : "cc"); + vmclear_local_vcpus(); + asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc"); write_cr4(read_cr4() & ~X86_CR4_VMXE); } @@ -1072,7 +1106,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) u32 _vmentry_control = 0; min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING; - opt = 0; + opt = PIN_BASED_VIRTUAL_NMIS; if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS, &_pin_based_exec_control) < 0) return -EIO; @@ -1389,6 +1423,8 @@ static void exit_lmode(struct kvm_vcpu *vcpu) static void vmx_flush_tlb(struct kvm_vcpu *vcpu) { vpid_sync_vcpu_all(to_vmx(vcpu)); + if (vm_need_ept()) + ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa)); } static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) @@ -1420,7 +1456,7 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0, if (!(cr0 & X86_CR0_PG)) { /* From paging/starting to nonpaging */ vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, - vmcs_config.cpu_based_exec_ctrl | + vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) | (CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING)); vcpu->arch.cr0 = cr0; @@ -1430,7 +1466,7 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0, } else if (!is_paging(vcpu)) { /* From nonpaging to paging */ vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, - vmcs_config.cpu_based_exec_ctrl & + vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) & ~(CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING)); vcpu->arch.cr0 = cr0; @@ -1821,7 +1857,7 @@ static void allocate_vpid(struct vcpu_vmx *vmx) spin_unlock(&vmx_vpid_lock); } -void vmx_disable_intercept_for_msr(struct page *msr_bitmap, u32 msr) +static void vmx_disable_intercept_for_msr(struct page *msr_bitmap, u32 msr) { void *va; @@ -1907,8 +1943,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */ vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS); /* 22.2.4 */ vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS); /* 22.2.4 */ - vmcs_write16(HOST_FS_SELECTOR, read_fs()); /* 22.2.4 */ - vmcs_write16(HOST_GS_SELECTOR, read_gs()); /* 22.2.4 */ + vmcs_write16(HOST_FS_SELECTOR, kvm_read_fs()); /* 22.2.4 */ + vmcs_write16(HOST_GS_SELECTOR, kvm_read_gs()); /* 22.2.4 */ vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS); /* 22.2.4 */ #ifdef CONFIG_X86_64 rdmsrl(MSR_FS_BASE, a); @@ -1922,7 +1958,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */ - get_idt(&dt); + kvm_get_idt(&dt); vmcs_writel(HOST_IDTR_BASE, dt.base); /* 22.2.4 */ asm("mov $.Lkvm_vmx_return, %0" : "=r"(kvm_vmx_return)); @@ -2114,6 +2150,13 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq) irq | INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK); } +static void vmx_inject_nmi(struct kvm_vcpu *vcpu) +{ + vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, + INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); + vcpu->arch.nmi_pending = 0; +} + static void kvm_do_inject_irq(struct kvm_vcpu *vcpu) { int word_index = __ffs(vcpu->arch.irq_summary); @@ -2554,8 +2597,6 @@ static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) exit_qualification = vmcs_read64(EXIT_QUALIFICATION); offset = exit_qualification & 0xffful; - KVMTRACE_1D(APIC_ACCESS, vcpu, (u32)offset, handler); - er = emulate_instruction(vcpu, kvm_run, 0, 0, 0); if (er != EMULATE_DONE) { @@ -2639,6 +2680,19 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) return 1; } +static int handle_nmi_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + u32 cpu_based_vm_exec_control; + + /* clear pending NMI */ + cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); + cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING; + vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); + ++vcpu->stat.nmi_window_exits; + + return 1; +} + /* * The exit handlers return 1 if the exit was handled fully and guest execution * may resume. Otherwise they set the kvm_run parameter to indicate what needs @@ -2649,6 +2703,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu, [EXIT_REASON_EXCEPTION_NMI] = handle_exception, [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt, [EXIT_REASON_TRIPLE_FAULT] = handle_triple_fault, + [EXIT_REASON_NMI_WINDOW] = handle_nmi_window, [EXIT_REASON_IO_INSTRUCTION] = handle_io, [EXIT_REASON_CR_ACCESS] = handle_cr, [EXIT_REASON_DR_ACCESS] = handle_dr, @@ -2736,17 +2791,52 @@ static void enable_irq_window(struct kvm_vcpu *vcpu) vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); } +static void enable_nmi_window(struct kvm_vcpu *vcpu) +{ + u32 cpu_based_vm_exec_control; + + if (!cpu_has_virtual_nmis()) + return; + + cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); + cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING; + vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); +} + +static int vmx_nmi_enabled(struct kvm_vcpu *vcpu) +{ + u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); + return !(guest_intr & (GUEST_INTR_STATE_NMI | + GUEST_INTR_STATE_MOV_SS | + GUEST_INTR_STATE_STI)); +} + +static int vmx_irq_enabled(struct kvm_vcpu *vcpu) +{ + u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); + return (!(guest_intr & (GUEST_INTR_STATE_MOV_SS | + GUEST_INTR_STATE_STI)) && + (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF)); +} + +static void enable_intr_window(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.nmi_pending) + enable_nmi_window(vcpu); + else if (kvm_cpu_has_interrupt(vcpu)) + enable_irq_window(vcpu); +} + static void vmx_intr_assist(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); - u32 idtv_info_field, intr_info_field; - int has_ext_irq, interrupt_window_open; + u32 idtv_info_field, intr_info_field, exit_intr_info_field; int vector; update_tpr_threshold(vcpu); - has_ext_irq = kvm_cpu_has_interrupt(vcpu); intr_info_field = vmcs_read32(VM_ENTRY_INTR_INFO_FIELD); + exit_intr_info_field = vmcs_read32(VM_EXIT_INTR_INFO); idtv_info_field = vmx->idt_vectoring_info; if (intr_info_field & INTR_INFO_VALID_MASK) { if (idtv_info_field & INTR_INFO_VALID_MASK) { @@ -2754,8 +2844,7 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu) if (printk_ratelimit()) printk(KERN_ERR "Fault when IDT_Vectoring\n"); } - if (has_ext_irq) - enable_irq_window(vcpu); + enable_intr_window(vcpu); return; } if (unlikely(idtv_info_field & INTR_INFO_VALID_MASK)) { @@ -2765,30 +2854,56 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu) u8 vect = idtv_info_field & VECTORING_INFO_VECTOR_MASK; vmx_inject_irq(vcpu, vect); - if (unlikely(has_ext_irq)) - enable_irq_window(vcpu); + enable_intr_window(vcpu); return; } KVMTRACE_1D(REDELIVER_EVT, vcpu, idtv_info_field, handler); - vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field); + /* + * SDM 3: 25.7.1.2 + * Clear bit "block by NMI" before VM entry if a NMI delivery + * faulted. + */ + if ((idtv_info_field & VECTORING_INFO_TYPE_MASK) + == INTR_TYPE_NMI_INTR && cpu_has_virtual_nmis()) + vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, + vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & + ~GUEST_INTR_STATE_NMI); + + vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field + & ~INTR_INFO_RESVD_BITS_MASK); vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, vmcs_read32(VM_EXIT_INSTRUCTION_LEN)); if (unlikely(idtv_info_field & INTR_INFO_DELIVER_CODE_MASK)) vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, vmcs_read32(IDT_VECTORING_ERROR_CODE)); - if (unlikely(has_ext_irq)) - enable_irq_window(vcpu); + enable_intr_window(vcpu); return; } - if (!has_ext_irq) + if (cpu_has_virtual_nmis()) { + /* + * SDM 3: 25.7.1.2 + * Re-set bit "block by NMI" before VM entry if vmexit caused by + * a guest IRET fault. + */ + if ((exit_intr_info_field & INTR_INFO_UNBLOCK_NMI) && + (exit_intr_info_field & INTR_INFO_VECTOR_MASK) != 8) + vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, + vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) | + GUEST_INTR_STATE_NMI); + else if (vcpu->arch.nmi_pending) { + if (vmx_nmi_enabled(vcpu)) + vmx_inject_nmi(vcpu); + enable_intr_window(vcpu); + return; + } + + } + if (!kvm_cpu_has_interrupt(vcpu)) return; - interrupt_window_open = - ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && - (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0); - if (interrupt_window_open) { + if (vmx_irq_enabled(vcpu)) { vector = kvm_cpu_get_interrupt(vcpu); vmx_inject_irq(vcpu, vector); kvm_timer_intr_post(vcpu, vector); @@ -2838,7 +2953,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) "push %%edx; push %%ebp;" "push %%ecx \n\t" #endif - ASM_VMX_VMWRITE_RSP_RDX "\n\t" + __ex(ASM_VMX_VMWRITE_RSP_RDX) "\n\t" /* Check if vmlaunch of vmresume is needed */ "cmpl $0, %c[launched](%0) \n\t" /* Load guest registers. Don't clobber flags. */ @@ -2873,9 +2988,9 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) #endif /* Enter guest mode */ "jne .Llaunched \n\t" - ASM_VMX_VMLAUNCH "\n\t" + __ex(ASM_VMX_VMLAUNCH) "\n\t" "jmp .Lkvm_vmx_return \n\t" - ".Llaunched: " ASM_VMX_VMRESUME "\n\t" + ".Llaunched: " __ex(ASM_VMX_VMRESUME) "\n\t" ".Lkvm_vmx_return: " /* Save guest registers, load host registers, keep flags */ #ifdef CONFIG_X86_64 @@ -2949,7 +3064,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) fixup_rmode_irq(vmx); vcpu->arch.interrupt_window_open = - (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0; + (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & + (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)) == 0; asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); vmx->launched = 1; @@ -2957,7 +3073,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) intr_info = vmcs_read32(VM_EXIT_INTR_INFO); /* We need to handle NMIs before interrupts are enabled */ - if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) { /* nmi */ + if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200 && + (intr_info & INTR_INFO_VALID_MASK)) { KVMTRACE_0D(NMI, vcpu, handler); asm("int $2"); } @@ -2968,7 +3085,7 @@ static void vmx_free_vmcs(struct kvm_vcpu *vcpu) struct vcpu_vmx *vmx = to_vmx(vcpu); if (vmx->vmcs) { - on_each_cpu(__vcpu_clear, vmx, 1); + vcpu_clear(vmx); free_vmcs(vmx->vmcs); vmx->vmcs = NULL; } @@ -3095,7 +3212,6 @@ static struct kvm_x86_ops vmx_x86_ops = { .prepare_guest_switch = vmx_save_host_state, .vcpu_load = vmx_vcpu_load, .vcpu_put = vmx_vcpu_put, - .vcpu_decache = vmx_vcpu_decache, .set_guest_debug = set_guest_debug, .guest_debug_pre = kvm_guest_debug_pre, diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h index 79d94c610dfe..425a13436b3f 100644 --- a/arch/x86/kvm/vmx.h +++ b/arch/x86/kvm/vmx.h @@ -40,6 +40,7 @@ #define CPU_BASED_CR8_LOAD_EXITING 0x00080000 #define CPU_BASED_CR8_STORE_EXITING 0x00100000 #define CPU_BASED_TPR_SHADOW 0x00200000 +#define CPU_BASED_VIRTUAL_NMI_PENDING 0x00400000 #define CPU_BASED_MOV_DR_EXITING 0x00800000 #define CPU_BASED_UNCOND_IO_EXITING 0x01000000 #define CPU_BASED_USE_IO_BITMAPS 0x02000000 @@ -216,7 +217,7 @@ enum vmcs_field { #define EXIT_REASON_TRIPLE_FAULT 2 #define EXIT_REASON_PENDING_INTERRUPT 7 - +#define EXIT_REASON_NMI_WINDOW 8 #define EXIT_REASON_TASK_SWITCH 9 #define EXIT_REASON_CPUID 10 #define EXIT_REASON_HLT 12 @@ -251,7 +252,9 @@ enum vmcs_field { #define INTR_INFO_VECTOR_MASK 0xff /* 7:0 */ #define INTR_INFO_INTR_TYPE_MASK 0x700 /* 10:8 */ #define INTR_INFO_DELIVER_CODE_MASK 0x800 /* 11 */ +#define INTR_INFO_UNBLOCK_NMI 0x1000 /* 12 */ #define INTR_INFO_VALID_MASK 0x80000000 /* 31 */ +#define INTR_INFO_RESVD_BITS_MASK 0x7ffff000 #define VECTORING_INFO_VECTOR_MASK INTR_INFO_VECTOR_MASK #define VECTORING_INFO_TYPE_MASK INTR_INFO_INTR_TYPE_MASK @@ -259,9 +262,16 @@ enum vmcs_field { #define VECTORING_INFO_VALID_MASK INTR_INFO_VALID_MASK #define INTR_TYPE_EXT_INTR (0 << 8) /* external interrupt */ +#define INTR_TYPE_NMI_INTR (2 << 8) /* NMI */ #define INTR_TYPE_EXCEPTION (3 << 8) /* processor exception */ #define INTR_TYPE_SOFT_INTR (4 << 8) /* software interrupt */ +/* GUEST_INTERRUPTIBILITY_INFO flags. */ +#define GUEST_INTR_STATE_STI 0x00000001 +#define GUEST_INTR_STATE_MOV_SS 0x00000002 +#define GUEST_INTR_STATE_SMI 0x00000004 +#define GUEST_INTR_STATE_NMI 0x00000008 + /* * Exit Qualifications for MOV for Control Register Access */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0faa2546b1cd..9f1cdb011cff 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -72,6 +72,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "mmio_exits", VCPU_STAT(mmio_exits) }, { "signal_exits", VCPU_STAT(signal_exits) }, { "irq_window", VCPU_STAT(irq_window_exits) }, + { "nmi_window", VCPU_STAT(nmi_window_exits) }, { "halt_exits", VCPU_STAT(halt_exits) }, { "halt_wakeup", VCPU_STAT(halt_wakeup) }, { "hypercalls", VCPU_STAT(hypercalls) }, @@ -173,6 +174,12 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr, kvm_queue_exception_e(vcpu, PF_VECTOR, error_code); } +void kvm_inject_nmi(struct kvm_vcpu *vcpu) +{ + vcpu->arch.nmi_pending = 1; +} +EXPORT_SYMBOL_GPL(kvm_inject_nmi); + void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code) { WARN_ON(vcpu->arch.exception.pending); @@ -604,6 +611,38 @@ static void kvm_write_guest_time(struct kvm_vcpu *v) mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT); } +static bool msr_mtrr_valid(unsigned msr) +{ + switch (msr) { + case 0x200 ... 0x200 + 2 * KVM_NR_VAR_MTRR - 1: + case MSR_MTRRfix64K_00000: + case MSR_MTRRfix16K_80000: + case MSR_MTRRfix16K_A0000: + case MSR_MTRRfix4K_C0000: + case MSR_MTRRfix4K_C8000: + case MSR_MTRRfix4K_D0000: + case MSR_MTRRfix4K_D8000: + case MSR_MTRRfix4K_E0000: + case MSR_MTRRfix4K_E8000: + case MSR_MTRRfix4K_F0000: + case MSR_MTRRfix4K_F8000: + case MSR_MTRRdefType: + case MSR_IA32_CR_PAT: + return true; + case 0x2f8: + return true; + } + return false; +} + +static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data) +{ + if (!msr_mtrr_valid(msr)) + return 1; + + vcpu->arch.mtrr[msr - 0x200] = data; + return 0; +} int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) { @@ -625,8 +664,9 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) break; case MSR_IA32_UCODE_REV: case MSR_IA32_UCODE_WRITE: - case 0x200 ... 0x2ff: /* MTRRs */ break; + case 0x200 ... 0x2ff: + return set_msr_mtrr(vcpu, msr, data); case MSR_IA32_APICBASE: kvm_set_apic_base(vcpu, data); break; @@ -684,6 +724,15 @@ int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) return kvm_x86_ops->get_msr(vcpu, msr_index, pdata); } +static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) +{ + if (!msr_mtrr_valid(msr)) + return 1; + + *pdata = vcpu->arch.mtrr[msr - 0x200]; + return 0; +} + int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) { u64 data; @@ -705,11 +754,13 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) case MSR_IA32_MC0_MISC+16: case MSR_IA32_UCODE_REV: case MSR_IA32_EBL_CR_POWERON: - /* MTRR registers */ - case 0xfe: - case 0x200 ... 0x2ff: data = 0; break; + case MSR_MTRRcap: + data = 0x500 | KVM_NR_VAR_MTRR; + break; + case 0x200 ... 0x2ff: + return get_msr_mtrr(vcpu, msr, pdata); case 0xcd: /* fsb frequency */ data = 3; break; @@ -817,41 +868,6 @@ out: return r; } -/* - * Make sure that a cpu that is being hot-unplugged does not have any vcpus - * cached on it. - */ -void decache_vcpus_on_cpu(int cpu) -{ - struct kvm *vm; - struct kvm_vcpu *vcpu; - int i; - - spin_lock(&kvm_lock); - list_for_each_entry(vm, &vm_list, vm_list) - for (i = 0; i < KVM_MAX_VCPUS; ++i) { - vcpu = vm->vcpus[i]; - if (!vcpu) - continue; - /* - * If the vcpu is locked, then it is running on some - * other cpu and therefore it is not cached on the - * cpu in question. - * - * If it's not locked, check the last cpu it executed - * on. - */ - if (mutex_trylock(&vcpu->mutex)) { - if (vcpu->cpu == cpu) { - kvm_x86_ops->vcpu_decache(vcpu); - vcpu->cpu = -1; - } - mutex_unlock(&vcpu->mutex); - } - } - spin_unlock(&kvm_lock); -} - int kvm_dev_ioctl_check_extension(long ext) { int r; @@ -869,6 +885,9 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_MP_STATE: r = 1; break; + case KVM_CAP_COALESCED_MMIO: + r = KVM_COALESCED_MMIO_PAGE_OFFSET; + break; case KVM_CAP_VAPIC: r = !kvm_x86_ops->cpu_has_accelerated_tpr(); break; @@ -1781,13 +1800,14 @@ static void kvm_init_msr_list(void) * Only apic need an MMIO device hook, so shortcut now.. */ static struct kvm_io_device *vcpu_find_pervcpu_dev(struct kvm_vcpu *vcpu, - gpa_t addr) + gpa_t addr, int len, + int is_write) { struct kvm_io_device *dev; if (vcpu->arch.apic) { dev = &vcpu->arch.apic->dev; - if (dev->in_range(dev, addr)) + if (dev->in_range(dev, addr, len, is_write)) return dev; } return NULL; @@ -1795,13 +1815,15 @@ static struct kvm_io_device *vcpu_find_pervcpu_dev(struct kvm_vcpu *vcpu, static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu, - gpa_t addr) + gpa_t addr, int len, + int is_write) { struct kvm_io_device *dev; - dev = vcpu_find_pervcpu_dev(vcpu, addr); + dev = vcpu_find_pervcpu_dev(vcpu, addr, len, is_write); if (dev == NULL) - dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr); + dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr, len, + is_write); return dev; } @@ -1869,7 +1891,7 @@ mmio: * Is this MMIO handled locally? */ mutex_lock(&vcpu->kvm->lock); - mmio_dev = vcpu_find_mmio_dev(vcpu, gpa); + mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 0); if (mmio_dev) { kvm_iodevice_read(mmio_dev, gpa, bytes, val); mutex_unlock(&vcpu->kvm->lock); @@ -1924,7 +1946,7 @@ mmio: * Is this MMIO handled locally? */ mutex_lock(&vcpu->kvm->lock); - mmio_dev = vcpu_find_mmio_dev(vcpu, gpa); + mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 1); if (mmio_dev) { kvm_iodevice_write(mmio_dev, gpa, bytes, val); mutex_unlock(&vcpu->kvm->lock); @@ -2020,6 +2042,7 @@ int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address) int emulate_clts(struct kvm_vcpu *vcpu) { + KVMTRACE_0D(CLTS, vcpu, handler); kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 & ~X86_CR0_TS); return X86EMUL_CONTINUE; } @@ -2053,21 +2076,19 @@ int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) { - static int reported; u8 opcodes[4]; unsigned long rip = vcpu->arch.rip; unsigned long rip_linear; - rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS); - - if (reported) + if (!printk_ratelimit()) return; + rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS); + emulator_read_std(rip_linear, (void *)opcodes, 4, vcpu); printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n", context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]); - reported = 1; } EXPORT_SYMBOL_GPL(kvm_report_emulation_failure); @@ -2105,27 +2126,6 @@ int emulate_instruction(struct kvm_vcpu *vcpu, ? X86EMUL_MODE_PROT64 : cs_db ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; - if (vcpu->arch.emulate_ctxt.mode == X86EMUL_MODE_PROT64) { - vcpu->arch.emulate_ctxt.cs_base = 0; - vcpu->arch.emulate_ctxt.ds_base = 0; - vcpu->arch.emulate_ctxt.es_base = 0; - vcpu->arch.emulate_ctxt.ss_base = 0; - } else { - vcpu->arch.emulate_ctxt.cs_base = - get_segment_base(vcpu, VCPU_SREG_CS); - vcpu->arch.emulate_ctxt.ds_base = - get_segment_base(vcpu, VCPU_SREG_DS); - vcpu->arch.emulate_ctxt.es_base = - get_segment_base(vcpu, VCPU_SREG_ES); - vcpu->arch.emulate_ctxt.ss_base = - get_segment_base(vcpu, VCPU_SREG_SS); - } - - vcpu->arch.emulate_ctxt.gs_base = - get_segment_base(vcpu, VCPU_SREG_GS); - vcpu->arch.emulate_ctxt.fs_base = - get_segment_base(vcpu, VCPU_SREG_FS); - r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); /* Reject the instructions other than VMCALL/VMMCALL when @@ -2300,9 +2300,10 @@ static void pio_string_write(struct kvm_io_device *pio_dev, } static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu, - gpa_t addr) + gpa_t addr, int len, + int is_write) { - return kvm_io_bus_find_dev(&vcpu->kvm->pio_bus, addr); + return kvm_io_bus_find_dev(&vcpu->kvm->pio_bus, addr, len, is_write); } int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, @@ -2331,11 +2332,10 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, kvm_x86_ops->cache_regs(vcpu); memcpy(vcpu->arch.pio_data, &vcpu->arch.regs[VCPU_REGS_RAX], 4); - kvm_x86_ops->decache_regs(vcpu); kvm_x86_ops->skip_emulated_instruction(vcpu); - pio_dev = vcpu_find_pio_dev(vcpu, port); + pio_dev = vcpu_find_pio_dev(vcpu, port, size, !in); if (pio_dev) { kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data); complete_pio(vcpu); @@ -2417,7 +2417,9 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, } } - pio_dev = vcpu_find_pio_dev(vcpu, port); + pio_dev = vcpu_find_pio_dev(vcpu, port, + vcpu->arch.pio.cur_count, + !vcpu->arch.pio.in); if (!vcpu->arch.pio.in) { /* string PIO write */ ret = pio_copy_data(vcpu); @@ -2600,27 +2602,41 @@ void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw, unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr) { + unsigned long value; + kvm_x86_ops->decache_cr4_guest_bits(vcpu); switch (cr) { case 0: - return vcpu->arch.cr0; + value = vcpu->arch.cr0; + break; case 2: - return vcpu->arch.cr2; + value = vcpu->arch.cr2; + break; case 3: - return vcpu->arch.cr3; + value = vcpu->arch.cr3; + break; case 4: - return vcpu->arch.cr4; + value = vcpu->arch.cr4; + break; case 8: - return kvm_get_cr8(vcpu); + value = kvm_get_cr8(vcpu); + break; default: vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); return 0; } + KVMTRACE_3D(CR_READ, vcpu, (u32)cr, (u32)value, + (u32)((u64)value >> 32), handler); + + return value; } void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val, unsigned long *rflags) { + KVMTRACE_3D(CR_WRITE, vcpu, (u32)cr, (u32)val, + (u32)((u64)val >> 32), handler); + switch (cr) { case 0: kvm_set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val)); @@ -2771,8 +2787,10 @@ static void vapic_exit(struct kvm_vcpu *vcpu) if (!apic || !apic->vapic_addr) return; + down_read(&vcpu->kvm->slots_lock); kvm_release_page_dirty(apic->vapic_page); mark_page_dirty(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT); + up_read(&vcpu->kvm->slots_lock); } static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) @@ -2928,9 +2946,7 @@ out: post_kvm_run_save(vcpu, kvm_run); - down_read(&vcpu->kvm->slots_lock); vapic_exit(vcpu); - up_read(&vcpu->kvm->slots_lock); return r; } @@ -2942,15 +2958,15 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) vcpu_load(vcpu); + if (vcpu->sigset_active) + sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); + if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { kvm_vcpu_block(vcpu); - vcpu_put(vcpu); - return -EAGAIN; + r = -EAGAIN; + goto out; } - if (vcpu->sigset_active) - sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); - /* re-sync apic's tpr */ if (!irqchip_in_kernel(vcpu->kvm)) kvm_set_cr8(vcpu, kvm_run->cr8); @@ -3070,8 +3086,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) return 0; } -static void get_segment(struct kvm_vcpu *vcpu, - struct kvm_segment *var, int seg) +void kvm_get_segment(struct kvm_vcpu *vcpu, + struct kvm_segment *var, int seg) { kvm_x86_ops->get_segment(vcpu, var, seg); } @@ -3080,7 +3096,7 @@ void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) { struct kvm_segment cs; - get_segment(vcpu, &cs, VCPU_SREG_CS); + kvm_get_segment(vcpu, &cs, VCPU_SREG_CS); *db = cs.db; *l = cs.l; } @@ -3094,15 +3110,15 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, vcpu_load(vcpu); - get_segment(vcpu, &sregs->cs, VCPU_SREG_CS); - get_segment(vcpu, &sregs->ds, VCPU_SREG_DS); - get_segment(vcpu, &sregs->es, VCPU_SREG_ES); - get_segment(vcpu, &sregs->fs, VCPU_SREG_FS); - get_segment(vcpu, &sregs->gs, VCPU_SREG_GS); - get_segment(vcpu, &sregs->ss, VCPU_SREG_SS); + kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS); + kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS); + kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES); + kvm_get_segment(vcpu, &sregs->fs, VCPU_SREG_FS); + kvm_get_segment(vcpu, &sregs->gs, VCPU_SREG_GS); + kvm_get_segment(vcpu, &sregs->ss, VCPU_SREG_SS); - get_segment(vcpu, &sregs->tr, VCPU_SREG_TR); - get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR); + kvm_get_segment(vcpu, &sregs->tr, VCPU_SREG_TR); + kvm_get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR); kvm_x86_ops->get_idt(vcpu, &dt); sregs->idt.limit = dt.limit; @@ -3154,7 +3170,7 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, return 0; } -static void set_segment(struct kvm_vcpu *vcpu, +static void kvm_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg) { kvm_x86_ops->set_segment(vcpu, var, seg); @@ -3191,7 +3207,7 @@ static void get_segment_descritptor_dtable(struct kvm_vcpu *vcpu, if (selector & 1 << 2) { struct kvm_segment kvm_seg; - get_segment(vcpu, &kvm_seg, VCPU_SREG_LDTR); + kvm_get_segment(vcpu, &kvm_seg, VCPU_SREG_LDTR); if (kvm_seg.unusable) dtable->limit = 0; @@ -3297,7 +3313,7 @@ static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg) { struct kvm_segment kvm_seg; - get_segment(vcpu, &kvm_seg, seg); + kvm_get_segment(vcpu, &kvm_seg, seg); return kvm_seg.selector; } @@ -3313,8 +3329,8 @@ static int load_segment_descriptor_to_kvm_desct(struct kvm_vcpu *vcpu, return 0; } -static int load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, - int type_bits, int seg) +int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, + int type_bits, int seg) { struct kvm_segment kvm_seg; @@ -3327,7 +3343,7 @@ static int load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, if (!kvm_seg.s) kvm_seg.unusable = 1; - set_segment(vcpu, &kvm_seg, seg); + kvm_set_segment(vcpu, &kvm_seg, seg); return 0; } @@ -3373,25 +3389,25 @@ static int load_state_from_tss32(struct kvm_vcpu *vcpu, vcpu->arch.regs[VCPU_REGS_RSI] = tss->esi; vcpu->arch.regs[VCPU_REGS_RDI] = tss->edi; - if (load_segment_descriptor(vcpu, tss->ldt_selector, 0, VCPU_SREG_LDTR)) + if (kvm_load_segment_descriptor(vcpu, tss->ldt_selector, 0, VCPU_SREG_LDTR)) return 1; - if (load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES)) + if (kvm_load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES)) return 1; - if (load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS)) + if (kvm_load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS)) return 1; - if (load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS)) + if (kvm_load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS)) return 1; - if (load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS)) + if (kvm_load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS)) return 1; - if (load_segment_descriptor(vcpu, tss->fs, 1, VCPU_SREG_FS)) + if (kvm_load_segment_descriptor(vcpu, tss->fs, 1, VCPU_SREG_FS)) return 1; - if (load_segment_descriptor(vcpu, tss->gs, 1, VCPU_SREG_GS)) + if (kvm_load_segment_descriptor(vcpu, tss->gs, 1, VCPU_SREG_GS)) return 1; return 0; } @@ -3432,24 +3448,24 @@ static int load_state_from_tss16(struct kvm_vcpu *vcpu, vcpu->arch.regs[VCPU_REGS_RSI] = tss->si; vcpu->arch.regs[VCPU_REGS_RDI] = tss->di; - if (load_segment_descriptor(vcpu, tss->ldt, 0, VCPU_SREG_LDTR)) + if (kvm_load_segment_descriptor(vcpu, tss->ldt, 0, VCPU_SREG_LDTR)) return 1; - if (load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES)) + if (kvm_load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES)) return 1; - if (load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS)) + if (kvm_load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS)) return 1; - if (load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS)) + if (kvm_load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS)) return 1; - if (load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS)) + if (kvm_load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS)) return 1; return 0; } -int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector, +static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector, struct desc_struct *cseg_desc, struct desc_struct *nseg_desc) { @@ -3472,7 +3488,7 @@ out: return ret; } -int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector, +static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector, struct desc_struct *cseg_desc, struct desc_struct *nseg_desc) { @@ -3502,7 +3518,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) struct desc_struct nseg_desc; int ret = 0; - get_segment(vcpu, &tr_seg, VCPU_SREG_TR); + kvm_get_segment(vcpu, &tr_seg, VCPU_SREG_TR); if (load_guest_segment_descriptor(vcpu, tss_selector, &nseg_desc)) goto out; @@ -3561,7 +3577,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 | X86_CR0_TS); seg_desct_to_kvm_desct(&nseg_desc, tss_selector, &tr_seg); tr_seg.type = 11; - set_segment(vcpu, &tr_seg, VCPU_SREG_TR); + kvm_set_segment(vcpu, &tr_seg, VCPU_SREG_TR); out: kvm_x86_ops->decache_regs(vcpu); return ret; @@ -3628,15 +3644,15 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, } } - set_segment(vcpu, &sregs->cs, VCPU_SREG_CS); - set_segment(vcpu, &sregs->ds, VCPU_SREG_DS); - set_segment(vcpu, &sregs->es, VCPU_SREG_ES); - set_segment(vcpu, &sregs->fs, VCPU_SREG_FS); - set_segment(vcpu, &sregs->gs, VCPU_SREG_GS); - set_segment(vcpu, &sregs->ss, VCPU_SREG_SS); + kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS); + kvm_set_segment(vcpu, &sregs->ds, VCPU_SREG_DS); + kvm_set_segment(vcpu, &sregs->es, VCPU_SREG_ES); + kvm_set_segment(vcpu, &sregs->fs, VCPU_SREG_FS); + kvm_set_segment(vcpu, &sregs->gs, VCPU_SREG_GS); + kvm_set_segment(vcpu, &sregs->ss, VCPU_SREG_SS); - set_segment(vcpu, &sregs->tr, VCPU_SREG_TR); - set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR); + kvm_set_segment(vcpu, &sregs->tr, VCPU_SREG_TR); + kvm_set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR); vcpu_put(vcpu); @@ -3751,14 +3767,14 @@ void fx_init(struct kvm_vcpu *vcpu) * allocate ram with GFP_KERNEL. */ if (!used_math()) - fx_save(&vcpu->arch.host_fx_image); + kvm_fx_save(&vcpu->arch.host_fx_image); /* Initialize guest FPU by resetting ours and saving into guest's */ preempt_disable(); - fx_save(&vcpu->arch.host_fx_image); - fx_finit(); - fx_save(&vcpu->arch.guest_fx_image); - fx_restore(&vcpu->arch.host_fx_image); + kvm_fx_save(&vcpu->arch.host_fx_image); + kvm_fx_finit(); + kvm_fx_save(&vcpu->arch.guest_fx_image); + kvm_fx_restore(&vcpu->arch.host_fx_image); preempt_enable(); vcpu->arch.cr0 |= X86_CR0_ET; @@ -3775,8 +3791,8 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) return; vcpu->guest_fpu_loaded = 1; - fx_save(&vcpu->arch.host_fx_image); - fx_restore(&vcpu->arch.guest_fx_image); + kvm_fx_save(&vcpu->arch.host_fx_image); + kvm_fx_restore(&vcpu->arch.guest_fx_image); } EXPORT_SYMBOL_GPL(kvm_load_guest_fpu); @@ -3786,8 +3802,8 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) return; vcpu->guest_fpu_loaded = 0; - fx_save(&vcpu->arch.guest_fx_image); - fx_restore(&vcpu->arch.host_fx_image); + kvm_fx_save(&vcpu->arch.guest_fx_image); + kvm_fx_restore(&vcpu->arch.host_fx_image); ++vcpu->stat.fpu_reload; } EXPORT_SYMBOL_GPL(kvm_put_guest_fpu); @@ -4016,6 +4032,11 @@ int kvm_arch_set_memory_region(struct kvm *kvm, return 0; } +void kvm_arch_flush_shadow(struct kvm *kvm) +{ + kvm_mmu_zap_all(kvm); +} + int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) { return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c index 932f216d890c..f2f90468f8b1 100644 --- a/arch/x86/kvm/x86_emulate.c +++ b/arch/x86/kvm/x86_emulate.c @@ -121,7 +121,7 @@ static u16 opcode_table[256] = { 0, 0, 0, DstReg | SrcMem32 | ModRM | Mov /* movsxd (x86/64) */ , 0, 0, 0, 0, /* 0x68 - 0x6F */ - 0, 0, ImplicitOps | Mov | Stack, 0, + SrcImm | Mov | Stack, 0, SrcImmByte | Mov | Stack, 0, SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* insb, insw/insd */ SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* outsb, outsw/outsd */ /* 0x70 - 0x77 */ @@ -138,9 +138,11 @@ static u16 opcode_table[256] = { /* 0x88 - 0x8F */ ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov, ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov, - 0, ModRM | DstReg, 0, Group | Group1A, - /* 0x90 - 0x9F */ - 0, 0, 0, 0, 0, 0, 0, 0, + DstMem | SrcReg | ModRM | Mov, ModRM | DstReg, + DstReg | SrcMem | ModRM | Mov, Group | Group1A, + /* 0x90 - 0x97 */ + DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, + /* 0x98 - 0x9F */ 0, 0, 0, 0, ImplicitOps | Stack, ImplicitOps | Stack, 0, 0, /* 0xA0 - 0xA7 */ ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs, @@ -152,7 +154,8 @@ static u16 opcode_table[256] = { ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String, ByteOp | ImplicitOps | String, ImplicitOps | String, /* 0xB0 - 0xBF */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + DstReg | SrcImm | Mov, 0, 0, 0, 0, 0, 0, 0, /* 0xC0 - 0xC7 */ ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImmByte | ModRM, 0, ImplicitOps | Stack, 0, 0, @@ -168,7 +171,8 @@ static u16 opcode_table[256] = { /* 0xE0 - 0xE7 */ 0, 0, 0, 0, 0, 0, 0, 0, /* 0xE8 - 0xEF */ - ImplicitOps | Stack, SrcImm|ImplicitOps, 0, SrcImmByte|ImplicitOps, + ImplicitOps | Stack, SrcImm | ImplicitOps, + ImplicitOps, SrcImmByte | ImplicitOps, 0, 0, 0, 0, /* 0xF0 - 0xF7 */ 0, 0, 0, 0, @@ -215,7 +219,7 @@ static u16 twobyte_table[256] = { /* 0xA0 - 0xA7 */ 0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, 0, 0, /* 0xA8 - 0xAF */ - 0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, 0, 0, + 0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, ModRM, 0, /* 0xB0 - 0xB7 */ ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 0, DstMem | SrcReg | ModRM | BitOp, @@ -518,6 +522,39 @@ static inline void jmp_rel(struct decode_cache *c, int rel) register_address_increment(c, &c->eip, rel); } +static void set_seg_override(struct decode_cache *c, int seg) +{ + c->has_seg_override = true; + c->seg_override = seg; +} + +static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg) +{ + if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS) + return 0; + + return kvm_x86_ops->get_segment_base(ctxt->vcpu, seg); +} + +static unsigned long seg_override_base(struct x86_emulate_ctxt *ctxt, + struct decode_cache *c) +{ + if (!c->has_seg_override) + return 0; + + return seg_base(ctxt, c->seg_override); +} + +static unsigned long es_base(struct x86_emulate_ctxt *ctxt) +{ + return seg_base(ctxt, VCPU_SREG_ES); +} + +static unsigned long ss_base(struct x86_emulate_ctxt *ctxt) +{ + return seg_base(ctxt, VCPU_SREG_SS); +} + static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops, unsigned long linear, u8 *dest) @@ -660,7 +697,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, { struct decode_cache *c = &ctxt->decode; u8 sib; - int index_reg = 0, base_reg = 0, scale, rip_relative = 0; + int index_reg = 0, base_reg = 0, scale; int rc = 0; if (c->rex_prefix) { @@ -731,47 +768,28 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, } if (c->modrm_rm == 2 || c->modrm_rm == 3 || (c->modrm_rm == 6 && c->modrm_mod != 0)) - if (!c->override_base) - c->override_base = &ctxt->ss_base; + if (!c->has_seg_override) + set_seg_override(c, VCPU_SREG_SS); c->modrm_ea = (u16)c->modrm_ea; } else { /* 32/64-bit ModR/M decode. */ - switch (c->modrm_rm) { - case 4: - case 12: + if ((c->modrm_rm & 7) == 4) { sib = insn_fetch(u8, 1, c->eip); index_reg |= (sib >> 3) & 7; base_reg |= sib & 7; scale = sib >> 6; - switch (base_reg) { - case 5: - if (c->modrm_mod != 0) - c->modrm_ea += c->regs[base_reg]; - else - c->modrm_ea += - insn_fetch(s32, 4, c->eip); - break; - default: + if ((base_reg & 7) == 5 && c->modrm_mod == 0) + c->modrm_ea += insn_fetch(s32, 4, c->eip); + else c->modrm_ea += c->regs[base_reg]; - } - switch (index_reg) { - case 4: - break; - default: + if (index_reg != 4) c->modrm_ea += c->regs[index_reg] << scale; - } - break; - case 5: - if (c->modrm_mod != 0) - c->modrm_ea += c->regs[c->modrm_rm]; - else if (ctxt->mode == X86EMUL_MODE_PROT64) - rip_relative = 1; - break; - default: + } else if ((c->modrm_rm & 7) == 5 && c->modrm_mod == 0) { + if (ctxt->mode == X86EMUL_MODE_PROT64) + c->rip_relative = 1; + } else c->modrm_ea += c->regs[c->modrm_rm]; - break; - } switch (c->modrm_mod) { case 0: if (c->modrm_rm == 5) @@ -785,22 +803,6 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, break; } } - if (rip_relative) { - c->modrm_ea += c->eip; - switch (c->d & SrcMask) { - case SrcImmByte: - c->modrm_ea += 1; - break; - case SrcImm: - if (c->d & ByteOp) - c->modrm_ea += 1; - else - if (c->op_bytes == 8) - c->modrm_ea += 4; - else - c->modrm_ea += c->op_bytes; - } - } done: return rc; } @@ -838,6 +840,7 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) memset(c, 0, sizeof(struct decode_cache)); c->eip = ctxt->vcpu->arch.rip; + ctxt->cs_base = seg_base(ctxt, VCPU_SREG_CS); memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); switch (mode) { @@ -876,23 +879,15 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) /* switch between 2/4 bytes */ c->ad_bytes = def_ad_bytes ^ 6; break; + case 0x26: /* ES override */ case 0x2e: /* CS override */ - c->override_base = &ctxt->cs_base; - break; + case 0x36: /* SS override */ case 0x3e: /* DS override */ - c->override_base = &ctxt->ds_base; - break; - case 0x26: /* ES override */ - c->override_base = &ctxt->es_base; + set_seg_override(c, (c->b >> 3) & 3); break; case 0x64: /* FS override */ - c->override_base = &ctxt->fs_base; - break; case 0x65: /* GS override */ - c->override_base = &ctxt->gs_base; - break; - case 0x36: /* SS override */ - c->override_base = &ctxt->ss_base; + set_seg_override(c, c->b & 7); break; case 0x40 ... 0x4f: /* REX */ if (mode != X86EMUL_MODE_PROT64) @@ -964,15 +959,11 @@ done_prefixes: if (rc) goto done; - if (!c->override_base) - c->override_base = &ctxt->ds_base; - if (mode == X86EMUL_MODE_PROT64 && - c->override_base != &ctxt->fs_base && - c->override_base != &ctxt->gs_base) - c->override_base = NULL; + if (!c->has_seg_override) + set_seg_override(c, VCPU_SREG_DS); - if (c->override_base) - c->modrm_ea += *c->override_base; + if (!(!c->twobyte && c->b == 0x8d)) + c->modrm_ea += seg_override_base(ctxt, c); if (c->ad_bytes != 8) c->modrm_ea = (u32)c->modrm_ea; @@ -1049,6 +1040,7 @@ done_prefixes: break; case DstMem: if ((c->d & ModRM) && c->modrm_mod == 3) { + c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; c->dst.type = OP_REG; c->dst.val = c->dst.orig_val = c->modrm_val; c->dst.ptr = c->modrm_ptr; @@ -1058,6 +1050,9 @@ done_prefixes: break; } + if (c->rip_relative) + c->modrm_ea += c->eip; + done: return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; } @@ -1070,7 +1065,7 @@ static inline void emulate_push(struct x86_emulate_ctxt *ctxt) c->dst.bytes = c->op_bytes; c->dst.val = c->src.val; register_address_increment(c, &c->regs[VCPU_REGS_RSP], -c->op_bytes); - c->dst.ptr = (void *) register_address(c, ctxt->ss_base, + c->dst.ptr = (void *) register_address(c, ss_base(ctxt), c->regs[VCPU_REGS_RSP]); } @@ -1080,7 +1075,7 @@ static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt, struct decode_cache *c = &ctxt->decode; int rc; - rc = ops->read_std(register_address(c, ctxt->ss_base, + rc = ops->read_std(register_address(c, ss_base(ctxt), c->regs[VCPU_REGS_RSP]), &c->dst.val, c->dst.bytes, ctxt->vcpu); if (rc != 0) @@ -1402,11 +1397,11 @@ special_insn: register_address_increment(c, &c->regs[VCPU_REGS_RSP], -c->op_bytes); c->dst.ptr = (void *) register_address( - c, ctxt->ss_base, c->regs[VCPU_REGS_RSP]); + c, ss_base(ctxt), c->regs[VCPU_REGS_RSP]); break; case 0x58 ... 0x5f: /* pop reg */ pop_instruction: - if ((rc = ops->read_std(register_address(c, ctxt->ss_base, + if ((rc = ops->read_std(register_address(c, ss_base(ctxt), c->regs[VCPU_REGS_RSP]), c->dst.ptr, c->op_bytes, ctxt->vcpu)) != 0) goto done; @@ -1420,9 +1415,8 @@ special_insn: goto cannot_emulate; c->dst.val = (s32) c->src.val; break; + case 0x68: /* push imm */ case 0x6a: /* push imm8 */ - c->src.val = 0L; - c->src.val = insn_fetch(s8, 1, c->eip); emulate_push(ctxt); break; case 0x6c: /* insb */ @@ -1433,7 +1427,7 @@ special_insn: c->rep_prefix ? address_mask(c, c->regs[VCPU_REGS_RCX]) : 1, (ctxt->eflags & EFLG_DF), - register_address(c, ctxt->es_base, + register_address(c, es_base(ctxt), c->regs[VCPU_REGS_RDI]), c->rep_prefix, c->regs[VCPU_REGS_RDX]) == 0) { @@ -1449,9 +1443,8 @@ special_insn: c->rep_prefix ? address_mask(c, c->regs[VCPU_REGS_RCX]) : 1, (ctxt->eflags & EFLG_DF), - register_address(c, c->override_base ? - *c->override_base : - ctxt->ds_base, + register_address(c, + seg_override_base(ctxt, c), c->regs[VCPU_REGS_RSI]), c->rep_prefix, c->regs[VCPU_REGS_RDX]) == 0) { @@ -1490,6 +1483,7 @@ special_insn: emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags); break; case 0x86 ... 0x87: /* xchg */ + xchg: /* Write back the register source. */ switch (c->dst.bytes) { case 1: @@ -1514,14 +1508,60 @@ special_insn: break; case 0x88 ... 0x8b: /* mov */ goto mov; + case 0x8c: { /* mov r/m, sreg */ + struct kvm_segment segreg; + + if (c->modrm_reg <= 5) + kvm_get_segment(ctxt->vcpu, &segreg, c->modrm_reg); + else { + printk(KERN_INFO "0x8c: Invalid segreg in modrm byte 0x%02x\n", + c->modrm); + goto cannot_emulate; + } + c->dst.val = segreg.selector; + break; + } case 0x8d: /* lea r16/r32, m */ c->dst.val = c->modrm_ea; break; + case 0x8e: { /* mov seg, r/m16 */ + uint16_t sel; + int type_bits; + int err; + + sel = c->src.val; + if (c->modrm_reg <= 5) { + type_bits = (c->modrm_reg == 1) ? 9 : 1; + err = kvm_load_segment_descriptor(ctxt->vcpu, sel, + type_bits, c->modrm_reg); + } else { + printk(KERN_INFO "Invalid segreg in modrm byte 0x%02x\n", + c->modrm); + goto cannot_emulate; + } + + if (err < 0) + goto cannot_emulate; + + c->dst.type = OP_NONE; /* Disable writeback. */ + break; + } case 0x8f: /* pop (sole member of Grp1a) */ rc = emulate_grp1a(ctxt, ops); if (rc != 0) goto done; break; + case 0x90: /* nop / xchg r8,rax */ + if (!(c->rex_prefix & 1)) { /* nop */ + c->dst.type = OP_NONE; + break; + } + case 0x91 ... 0x97: /* xchg reg,rax */ + c->src.type = c->dst.type = OP_REG; + c->src.bytes = c->dst.bytes = c->op_bytes; + c->src.ptr = (unsigned long *) &c->regs[VCPU_REGS_RAX]; + c->src.val = *(c->src.ptr); + goto xchg; case 0x9c: /* pushf */ c->src.val = (unsigned long) ctxt->eflags; emulate_push(ctxt); @@ -1540,11 +1580,10 @@ special_insn: c->dst.type = OP_MEM; c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; c->dst.ptr = (unsigned long *)register_address(c, - ctxt->es_base, + es_base(ctxt), c->regs[VCPU_REGS_RDI]); if ((rc = ops->read_emulated(register_address(c, - c->override_base ? *c->override_base : - ctxt->ds_base, + seg_override_base(ctxt, c), c->regs[VCPU_REGS_RSI]), &c->dst.val, c->dst.bytes, ctxt->vcpu)) != 0) @@ -1560,8 +1599,7 @@ special_insn: c->src.type = OP_NONE; /* Disable writeback. */ c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; c->src.ptr = (unsigned long *)register_address(c, - c->override_base ? *c->override_base : - ctxt->ds_base, + seg_override_base(ctxt, c), c->regs[VCPU_REGS_RSI]); if ((rc = ops->read_emulated((unsigned long)c->src.ptr, &c->src.val, @@ -1572,7 +1610,7 @@ special_insn: c->dst.type = OP_NONE; /* Disable writeback. */ c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; c->dst.ptr = (unsigned long *)register_address(c, - ctxt->es_base, + es_base(ctxt), c->regs[VCPU_REGS_RDI]); if ((rc = ops->read_emulated((unsigned long)c->dst.ptr, &c->dst.val, @@ -1596,7 +1634,7 @@ special_insn: c->dst.type = OP_MEM; c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; c->dst.ptr = (unsigned long *)register_address(c, - ctxt->es_base, + es_base(ctxt), c->regs[VCPU_REGS_RDI]); c->dst.val = c->regs[VCPU_REGS_RAX]; register_address_increment(c, &c->regs[VCPU_REGS_RDI], @@ -1608,8 +1646,7 @@ special_insn: c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX]; if ((rc = ops->read_emulated(register_address(c, - c->override_base ? *c->override_base : - ctxt->ds_base, + seg_override_base(ctxt, c), c->regs[VCPU_REGS_RSI]), &c->dst.val, c->dst.bytes, @@ -1622,6 +1659,8 @@ special_insn: case 0xae ... 0xaf: /* scas */ DPRINTF("Urk! I don't handle SCAS.\n"); goto cannot_emulate; + case 0xb8: /* mov r, imm */ + goto mov; case 0xc0 ... 0xc1: emulate_grp2(ctxt); break; @@ -1660,13 +1699,39 @@ special_insn: break; } case 0xe9: /* jmp rel */ - case 0xeb: /* jmp rel short */ + goto jmp; + case 0xea: /* jmp far */ { + uint32_t eip; + uint16_t sel; + + switch (c->op_bytes) { + case 2: + eip = insn_fetch(u16, 2, c->eip); + break; + case 4: + eip = insn_fetch(u32, 4, c->eip); + break; + default: + DPRINTF("jmp far: Invalid op_bytes\n"); + goto cannot_emulate; + } + sel = insn_fetch(u16, 2, c->eip); + if (kvm_load_segment_descriptor(ctxt->vcpu, sel, 9, VCPU_SREG_CS) < 0) { + DPRINTF("jmp far: Failed to load CS descriptor\n"); + goto cannot_emulate; + } + + c->eip = eip; + break; + } + case 0xeb: + jmp: /* jmp rel short */ jmp_rel(c, c->src.val); c->dst.type = OP_NONE; /* Disable writeback. */ break; case 0xf4: /* hlt */ ctxt->vcpu->arch.halt_request = 1; - goto done; + break; case 0xf5: /* cmc */ /* complement carry flag from eflags reg */ ctxt->eflags ^= EFLG_CF; @@ -1882,6 +1947,8 @@ twobyte_insn: c->src.val &= (c->dst.bytes << 3) - 1; emulate_2op_SrcV_nobyte("bts", c->src, c->dst, ctxt->eflags); break; + case 0xae: /* clflush */ + break; case 0xb0 ... 0xb1: /* cmpxchg */ /* * Save real source value, then compare EAX against diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 50dad44fb542..0313a5eec412 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -991,7 +991,6 @@ __init void lguest_init(void) #ifdef CONFIG_X86_LOCAL_APIC /* apic read/write intercepts */ pv_apic_ops.apic_write = lguest_apic_write; - pv_apic_ops.apic_write_atomic = lguest_apic_write; pv_apic_ops.apic_read = lguest_apic_read; #endif diff --git a/arch/x86/mach-default/setup.c b/arch/x86/mach-default/setup.c index 48278fa7d3de..3d317836be9e 100644 --- a/arch/x86/mach-default/setup.c +++ b/arch/x86/mach-default/setup.c @@ -10,14 +10,6 @@ #include <asm/e820.h> #include <asm/setup.h> -/* - * Any quirks to be performed to initialize timers/irqs/etc? - */ -int (*arch_time_init_quirk)(void); -int (*arch_pre_intr_init_quirk)(void); -int (*arch_intr_init_quirk)(void); -int (*arch_trap_init_quirk)(void); - #ifdef CONFIG_HOTPLUG_CPU #define DEFAULT_SEND_IPI (1) #else @@ -37,8 +29,8 @@ int no_broadcast=DEFAULT_SEND_IPI; **/ void __init pre_intr_init_hook(void) { - if (arch_pre_intr_init_quirk) { - if (arch_pre_intr_init_quirk()) + if (x86_quirks->arch_pre_intr_init) { + if (x86_quirks->arch_pre_intr_init()) return; } init_ISA_irqs(); @@ -64,8 +56,8 @@ static struct irqaction irq2 = { **/ void __init intr_init_hook(void) { - if (arch_intr_init_quirk) { - if (arch_intr_init_quirk()) + if (x86_quirks->arch_intr_init) { + if (x86_quirks->arch_intr_init()) return; } #ifdef CONFIG_X86_LOCAL_APIC @@ -97,8 +89,8 @@ void __init pre_setup_arch_hook(void) **/ void __init trap_init_hook(void) { - if (arch_trap_init_quirk) { - if (arch_trap_init_quirk()) + if (x86_quirks->arch_trap_init) { + if (x86_quirks->arch_trap_init()) return; } } @@ -111,6 +103,16 @@ static struct irqaction irq0 = { }; /** + * pre_time_init_hook - do any specific initialisations before. + * + **/ +void __init pre_time_init_hook(void) +{ + if (x86_quirks->arch_pre_time_init) + x86_quirks->arch_pre_time_init(); +} + +/** * time_init_hook - do any specific initialisations for the system timer. * * Description: @@ -119,13 +121,13 @@ static struct irqaction irq0 = { **/ void __init time_init_hook(void) { - if (arch_time_init_quirk) { + if (x86_quirks->arch_time_init) { /* * A nonzero return code does not mean failure, it means * that the architecture quirk does not want any * generic (timer) setup to be performed after this: */ - if (arch_time_init_quirk()) + if (x86_quirks->arch_time_init()) return; } diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 9873716e9f76..1fbb844c3d7a 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -21,3 +21,4 @@ obj-$(CONFIG_K8_NUMA) += k8topology_64.o endif obj-$(CONFIG_ACPI_NUMA) += srat_$(BITS).o +obj-$(CONFIG_MEMTEST) += memtest.o diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 9689a5138e64..d37f29376b0c 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -844,6 +844,9 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, reserve_early(table_start << PAGE_SHIFT, table_end << PAGE_SHIFT, "PGTABLE"); + if (!after_init_bootmem) + early_memtest(start, end); + return end >> PAGE_SHIFT; } @@ -868,8 +871,6 @@ void __init paging_init(void) */ sparse_init(); zone_sizes_init(); - - paravirt_post_allocator_init(); } /* diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 306049edd553..ec37121f6709 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -517,118 +517,6 @@ static void __init init_gbpages(void) direct_gbpages = 0; } -#ifdef CONFIG_MEMTEST - -static void __init memtest(unsigned long start_phys, unsigned long size, - unsigned pattern) -{ - unsigned long i; - unsigned long *start; - unsigned long start_bad; - unsigned long last_bad; - unsigned long val; - unsigned long start_phys_aligned; - unsigned long count; - unsigned long incr; - - switch (pattern) { - case 0: - val = 0UL; - break; - case 1: - val = -1UL; - break; - case 2: - val = 0x5555555555555555UL; - break; - case 3: - val = 0xaaaaaaaaaaaaaaaaUL; - break; - default: - return; - } - - incr = sizeof(unsigned long); - start_phys_aligned = ALIGN(start_phys, incr); - count = (size - (start_phys_aligned - start_phys))/incr; - start = __va(start_phys_aligned); - start_bad = 0; - last_bad = 0; - - for (i = 0; i < count; i++) - start[i] = val; - for (i = 0; i < count; i++, start++, start_phys_aligned += incr) { - if (*start != val) { - if (start_phys_aligned == last_bad + incr) { - last_bad += incr; - } else { - if (start_bad) { - printk(KERN_CONT "\n %016lx bad mem addr %016lx - %016lx reserved", - val, start_bad, last_bad + incr); - reserve_early(start_bad, last_bad - start_bad, "BAD RAM"); - } - start_bad = last_bad = start_phys_aligned; - } - } - } - if (start_bad) { - printk(KERN_CONT "\n %016lx bad mem addr %016lx - %016lx reserved", - val, start_bad, last_bad + incr); - reserve_early(start_bad, last_bad - start_bad, "BAD RAM"); - } - -} - -/* default is disabled */ -static int memtest_pattern __initdata; - -static int __init parse_memtest(char *arg) -{ - if (arg) - memtest_pattern = simple_strtoul(arg, NULL, 0); - return 0; -} - -early_param("memtest", parse_memtest); - -static void __init early_memtest(unsigned long start, unsigned long end) -{ - u64 t_start, t_size; - unsigned pattern; - - if (!memtest_pattern) - return; - - printk(KERN_INFO "early_memtest: pattern num %d", memtest_pattern); - for (pattern = 0; pattern < memtest_pattern; pattern++) { - t_start = start; - t_size = 0; - while (t_start < end) { - t_start = find_e820_area_size(t_start, &t_size, 1); - - /* done ? */ - if (t_start >= end) - break; - if (t_start + t_size > end) - t_size = end - t_start; - - printk(KERN_CONT "\n %016llx - %016llx pattern %d", - (unsigned long long)t_start, - (unsigned long long)t_start + t_size, pattern); - - memtest(t_start, t_size, pattern); - - t_start += t_size; - } - } - printk(KERN_CONT "\n"); -} -#else -static void __init early_memtest(unsigned long start, unsigned long end) -{ -} -#endif - static unsigned long __init kernel_physical_mapping_init(unsigned long start, unsigned long end, unsigned long page_size_mask) diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c new file mode 100644 index 000000000000..672e17f8262a --- /dev/null +++ b/arch/x86/mm/memtest.c @@ -0,0 +1,123 @@ +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/mm.h> +#include <linux/smp.h> +#include <linux/init.h> +#include <linux/pfn.h> + +#include <asm/e820.h> + +static void __init memtest(unsigned long start_phys, unsigned long size, + unsigned pattern) +{ + unsigned long i; + unsigned long *start; + unsigned long start_bad; + unsigned long last_bad; + unsigned long val; + unsigned long start_phys_aligned; + unsigned long count; + unsigned long incr; + + switch (pattern) { + case 0: + val = 0UL; + break; + case 1: + val = -1UL; + break; + case 2: +#ifdef CONFIG_X86_64 + val = 0x5555555555555555UL; +#else + val = 0x55555555UL; +#endif + break; + case 3: +#ifdef CONFIG_X86_64 + val = 0xaaaaaaaaaaaaaaaaUL; +#else + val = 0xaaaaaaaaUL; +#endif + break; + default: + return; + } + + incr = sizeof(unsigned long); + start_phys_aligned = ALIGN(start_phys, incr); + count = (size - (start_phys_aligned - start_phys))/incr; + start = __va(start_phys_aligned); + start_bad = 0; + last_bad = 0; + + for (i = 0; i < count; i++) + start[i] = val; + for (i = 0; i < count; i++, start++, start_phys_aligned += incr) { + if (*start != val) { + if (start_phys_aligned == last_bad + incr) { + last_bad += incr; + } else { + if (start_bad) { + printk(KERN_CONT "\n %010lx bad mem addr %010lx - %010lx reserved", + val, start_bad, last_bad + incr); + reserve_early(start_bad, last_bad - start_bad, "BAD RAM"); + } + start_bad = last_bad = start_phys_aligned; + } + } + } + if (start_bad) { + printk(KERN_CONT "\n %016lx bad mem addr %010lx - %010lx reserved", + val, start_bad, last_bad + incr); + reserve_early(start_bad, last_bad - start_bad, "BAD RAM"); + } + +} + +/* default is disabled */ +static int memtest_pattern __initdata; + +static int __init parse_memtest(char *arg) +{ + if (arg) + memtest_pattern = simple_strtoul(arg, NULL, 0); + return 0; +} + +early_param("memtest", parse_memtest); + +void __init early_memtest(unsigned long start, unsigned long end) +{ + u64 t_start, t_size; + unsigned pattern; + + if (!memtest_pattern) + return; + + printk(KERN_INFO "early_memtest: pattern num %d", memtest_pattern); + for (pattern = 0; pattern < memtest_pattern; pattern++) { + t_start = start; + t_size = 0; + while (t_start < end) { + t_start = find_e820_area_size(t_start, &t_size, 1); + + /* done ? */ + if (t_start >= end) + break; + if (t_start + t_size > end) + t_size = end - t_start; + + printk(KERN_CONT "\n %010llx - %010llx pattern %d", + (unsigned long long)t_start, + (unsigned long long)t_start + t_size, pattern); + + memtest(t_start, t_size, pattern); + + t_start += t_size; + } + } + printk(KERN_CONT "\n"); +} diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index d4585077977a..2fe30916d4b6 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -12,6 +12,8 @@ #include <linux/gfp.h> #include <linux/fs.h> #include <linux/bootmem.h> +#include <linux/debugfs.h> +#include <linux/seq_file.h> #include <asm/msr.h> #include <asm/tlbflush.h> @@ -373,8 +375,8 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, return vma_prot; } -#ifdef CONFIG_NONPROMISC_DEVMEM -/* This check is done in drivers/char/mem.c in case of NONPROMISC_DEVMEM*/ +#ifdef CONFIG_STRICT_DEVMEM +/* This check is done in drivers/char/mem.c in case of STRICT_DEVMEM*/ static inline int range_is_allowed(unsigned long pfn, unsigned long size) { return 1; @@ -398,7 +400,7 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size) } return 1; } -#endif /* CONFIG_NONPROMISC_DEVMEM */ +#endif /* CONFIG_STRICT_DEVMEM */ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, unsigned long size, pgprot_t *vma_prot) @@ -489,3 +491,89 @@ void unmap_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot) free_memtype(addr, addr + size); } + +#if defined(CONFIG_DEBUG_FS) + +/* get Nth element of the linked list */ +static struct memtype *memtype_get_idx(loff_t pos) +{ + struct memtype *list_node, *print_entry; + int i = 1; + + print_entry = kmalloc(sizeof(struct memtype), GFP_KERNEL); + if (!print_entry) + return NULL; + + spin_lock(&memtype_lock); + list_for_each_entry(list_node, &memtype_list, nd) { + if (pos == i) { + *print_entry = *list_node; + spin_unlock(&memtype_lock); + return print_entry; + } + ++i; + } + spin_unlock(&memtype_lock); + kfree(print_entry); + return NULL; +} + +static void *memtype_seq_start(struct seq_file *seq, loff_t *pos) +{ + if (*pos == 0) { + ++*pos; + seq_printf(seq, "PAT memtype list:\n"); + } + + return memtype_get_idx(*pos); +} + +static void *memtype_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + ++*pos; + return memtype_get_idx(*pos); +} + +static void memtype_seq_stop(struct seq_file *seq, void *v) +{ +} + +static int memtype_seq_show(struct seq_file *seq, void *v) +{ + struct memtype *print_entry = (struct memtype *)v; + + seq_printf(seq, "%s @ 0x%Lx-0x%Lx\n", cattr_name(print_entry->type), + print_entry->start, print_entry->end); + kfree(print_entry); + return 0; +} + +static struct seq_operations memtype_seq_ops = { + .start = memtype_seq_start, + .next = memtype_seq_next, + .stop = memtype_seq_stop, + .show = memtype_seq_show, +}; + +static int memtype_seq_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &memtype_seq_ops); +} + +static const struct file_operations memtype_fops = { + .open = memtype_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +static int __init pat_memtype_list_init(void) +{ + debugfs_create_file("pat_memtype_list", S_IRUSR, arch_debugfs_dir, + NULL, &memtype_fops); + return 0; +} + +late_initcall(pat_memtype_list_init); + +#endif /* CONFIG_DEBUG_FS */ diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile index e515e8db842a..d49202e740ea 100644 --- a/arch/x86/pci/Makefile +++ b/arch/x86/pci/Makefile @@ -5,13 +5,13 @@ obj-$(CONFIG_PCI_MMCONFIG) += mmconfig_$(BITS).o direct.o mmconfig-shared.o obj-$(CONFIG_PCI_DIRECT) += direct.o obj-$(CONFIG_PCI_OLPC) += olpc.o -pci-y := fixup.o -pci-$(CONFIG_ACPI) += acpi.o -pci-y += legacy.o irq.o +obj-y += fixup.o +obj-$(CONFIG_ACPI) += acpi.o +obj-y += legacy.o irq.o -pci-$(CONFIG_X86_VISWS) += visws.o +obj-$(CONFIG_X86_VISWS) += visws.o -pci-$(CONFIG_X86_NUMAQ) += numa.o +obj-$(CONFIG_X86_NUMAQ) += numaq_32.o -obj-y += $(pci-y) common.o early.o +obj-y += common.o early.o obj-y += amd_bus.o diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c index 132876cc6fca..ec9ce35e44d6 100644 --- a/arch/x86/pci/legacy.c +++ b/arch/x86/pci/legacy.c @@ -57,14 +57,17 @@ static int __init pci_legacy_init(void) int __init pci_subsys_init(void) { +#ifdef CONFIG_X86_NUMAQ + pci_numaq_init(); +#endif #ifdef CONFIG_ACPI pci_acpi_init(); #endif +#ifdef CONFIG_X86_VISWS + pci_visws_init(); +#endif pci_legacy_init(); pcibios_irq_init(); -#ifdef CONFIG_X86_NUMAQ - pci_numa_init(); -#endif pcibios_init(); return 0; diff --git a/arch/x86/pci/numa.c b/arch/x86/pci/numaq_32.c index 8b5ca1966731..f4b16dc11dad 100644 --- a/arch/x86/pci/numa.c +++ b/arch/x86/pci/numaq_32.c @@ -1,5 +1,5 @@ /* - * numa.c - Low-level PCI access for NUMA-Q machines + * numaq_32.c - Low-level PCI access for NUMA-Q machines */ #include <linux/pci.h> @@ -151,7 +151,7 @@ static void __devinit pci_fixup_i450nx(struct pci_dev *d) } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82451NX, pci_fixup_i450nx); -int __init pci_numa_init(void) +int __init pci_numaq_init(void) { int quad; diff --git a/arch/x86/pci/pci.h b/arch/x86/pci/pci.h index 3e25deb821ac..15b9cf6be729 100644 --- a/arch/x86/pci/pci.h +++ b/arch/x86/pci/pci.h @@ -108,7 +108,8 @@ extern void __init dmi_check_skip_isa_align(void); /* some common used subsys_initcalls */ extern int __init pci_acpi_init(void); extern int __init pcibios_irq_init(void); -extern int __init pci_numa_init(void); +extern int __init pci_visws_init(void); +extern int __init pci_numaq_init(void); extern int __init pcibios_init(void); /* pci-mmconfig.c */ diff --git a/arch/x86/pci/visws.c b/arch/x86/pci/visws.c index 1a7bed492bb1..42f4cb19faca 100644 --- a/arch/x86/pci/visws.c +++ b/arch/x86/pci/visws.c @@ -86,8 +86,14 @@ void __init pcibios_update_irq(struct pci_dev *dev, int irq) pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); } -static int __init pci_visws_init(void) +int __init pci_visws_init(void) { + if (!is_visws_box()) + return -1; + + pcibios_enable_irq = &pci_visws_enable_irq; + pcibios_disable_irq = &pci_visws_disable_irq; + /* The VISWS supports configuration access type 1 only */ pci_probe = (pci_probe | PCI_PROBE_CONF1) & ~(PCI_PROBE_BIOS | PCI_PROBE_CONF2); @@ -105,18 +111,3 @@ static int __init pci_visws_init(void) pcibios_resource_survey(); return 0; } - -static __init int pci_subsys_init(void) -{ - if (!is_visws_box()) - return -1; - - pcibios_enable_irq = &pci_visws_enable_irq; - pcibios_disable_irq = &pci_visws_disable_irq; - - pci_visws_init(); - pcibios_init(); - - return 0; -} -subsys_initcall(pci_subsys_init); diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile index b7ad9f89d21f..4d6ef0a336d6 100644 --- a/arch/x86/vdso/Makefile +++ b/arch/x86/vdso/Makefile @@ -62,7 +62,7 @@ $(obj)/%-syms.lds: $(obj)/%.so.dbg FORCE # Build multiple 32-bit vDSO images to choose from at boot time. # obj-$(VDSO32-y) += vdso32-syms.lds -vdso32.so-$(CONFIG_X86_32) += int80 +vdso32.so-$(VDSO32-y) += int80 vdso32.so-$(CONFIG_COMPAT) += syscall vdso32.so-$(VDSO32-y) += sysenter diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c index 0bce5429a515..513f330c5832 100644 --- a/arch/x86/vdso/vdso32-setup.c +++ b/arch/x86/vdso/vdso32-setup.c @@ -193,17 +193,12 @@ static __init void relocate_vdso(Elf32_Ehdr *ehdr) } } -/* - * These symbols are defined by vdso32.S to mark the bounds - * of the ELF DSO images included therein. - */ -extern const char vdso32_default_start, vdso32_default_end; -extern const char vdso32_sysenter_start, vdso32_sysenter_end; static struct page *vdso32_pages[1]; #ifdef CONFIG_X86_64 #define vdso32_sysenter() (boot_cpu_has(X86_FEATURE_SYSENTER32)) +#define vdso32_syscall() (boot_cpu_has(X86_FEATURE_SYSCALL32)) /* May not be __init: called during resume */ void syscall32_cpu_init(void) @@ -226,6 +221,7 @@ static inline void map_compat_vdso(int map) #else /* CONFIG_X86_32 */ #define vdso32_sysenter() (boot_cpu_has(X86_FEATURE_SEP)) +#define vdso32_syscall() (0) void enable_sep_cpu(void) { @@ -296,12 +292,15 @@ int __init sysenter_setup(void) gate_vma_init(); #endif - if (!vdso32_sysenter()) { - vsyscall = &vdso32_default_start; - vsyscall_len = &vdso32_default_end - &vdso32_default_start; - } else { + if (vdso32_syscall()) { + vsyscall = &vdso32_syscall_start; + vsyscall_len = &vdso32_syscall_end - &vdso32_syscall_start; + } else if (vdso32_sysenter()){ vsyscall = &vdso32_sysenter_start; vsyscall_len = &vdso32_sysenter_end - &vdso32_sysenter_start; + } else { + vsyscall = &vdso32_int80_start; + vsyscall_len = &vdso32_int80_end - &vdso32_int80_start; } memcpy(syscall_page, vsyscall, vsyscall_len); diff --git a/arch/x86/vdso/vdso32.S b/arch/x86/vdso/vdso32.S index 1e36f72cab86..2ce5f82c333b 100644 --- a/arch/x86/vdso/vdso32.S +++ b/arch/x86/vdso/vdso32.S @@ -2,14 +2,17 @@ __INITDATA - .globl vdso32_default_start, vdso32_default_end -vdso32_default_start: -#ifdef CONFIG_X86_32 + .globl vdso32_int80_start, vdso32_int80_end +vdso32_int80_start: .incbin "arch/x86/vdso/vdso32-int80.so" -#else +vdso32_int80_end: + + .globl vdso32_syscall_start, vdso32_syscall_end +vdso32_syscall_start: +#ifdef CONFIG_COMPAT .incbin "arch/x86/vdso/vdso32-syscall.so" #endif -vdso32_default_end: +vdso32_syscall_end: .globl vdso32_sysenter_start, vdso32_sysenter_end vdso32_sysenter_start: diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c index 19a6cfaf5db9..257ba4a10abf 100644 --- a/arch/x86/vdso/vma.c +++ b/arch/x86/vdso/vma.c @@ -21,7 +21,8 @@ unsigned int __read_mostly vdso_enabled = 1; extern char vdso_start[], vdso_end[]; extern unsigned short vdso_sync_cpuid; -struct page **vdso_pages; +static struct page **vdso_pages; +static unsigned vdso_size; static inline void *var_ref(void *p, char *name) { @@ -38,6 +39,7 @@ static int __init init_vdso_vars(void) int i; char *vbase; + vdso_size = npages << PAGE_SHIFT; vdso_pages = kmalloc(sizeof(struct page *) * npages, GFP_KERNEL); if (!vdso_pages) goto oom; @@ -101,20 +103,19 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack) struct mm_struct *mm = current->mm; unsigned long addr; int ret; - unsigned len = round_up(vdso_end - vdso_start, PAGE_SIZE); if (!vdso_enabled) return 0; down_write(&mm->mmap_sem); - addr = vdso_addr(mm->start_stack, len); - addr = get_unmapped_area(NULL, addr, len, 0, 0); + addr = vdso_addr(mm->start_stack, vdso_size); + addr = get_unmapped_area(NULL, addr, vdso_size, 0, 0); if (IS_ERR_VALUE(addr)) { ret = addr; goto up_fail; } - ret = install_special_mapping(mm, addr, len, + ret = install_special_mapping(mm, addr, vdso_size, VM_READ|VM_EXEC| VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| VM_ALWAYSDUMP, diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index c2cc99580871..3815e425f470 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -6,8 +6,8 @@ config XEN bool "Xen guest support" select PARAVIRT select PARAVIRT_CLOCK - depends on X86_32 - depends on X86_CMPXCHG && X86_TSC && X86_PAE && !(X86_VISWS || X86_VOYAGER) + depends on X86_64 || (X86_32 && X86_PAE && !(X86_VISWS || X86_VOYAGER)) + depends on X86_CMPXCHG && X86_TSC help This is the Linux Xen port. Enabling this will allow the kernel to boot in a paravirtualized environment under the @@ -15,10 +15,16 @@ config XEN config XEN_MAX_DOMAIN_MEMORY int "Maximum allowed size of a domain in gigabytes" - default 8 + default 8 if X86_32 + default 32 if X86_64 depends on XEN help The pseudo-physical to machine address array is sized according to the maximum possible memory size of a Xen domain. This array uses 1 page per gigabyte, so there's no - need to be too stingy here.
\ No newline at end of file + need to be too stingy here. + +config XEN_SAVE_RESTORE + bool + depends on PM + default y
\ No newline at end of file diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 2ba2d1649131..59c1e539aed2 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile @@ -1,4 +1,4 @@ obj-y := enlighten.o setup.o multicalls.o mmu.o \ - time.o xen-asm.o grant-table.o suspend.o + time.o xen-asm_$(BITS).o grant-table.o suspend.o obj-$(CONFIG_SMP) += smp.o diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index bb508456ef52..194bbd6e3241 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -33,6 +33,7 @@ #include <xen/interface/sched.h> #include <xen/features.h> #include <xen/page.h> +#include <xen/hvc-console.h> #include <asm/paravirt.h> #include <asm/page.h> @@ -40,12 +41,12 @@ #include <asm/xen/hypervisor.h> #include <asm/fixmap.h> #include <asm/processor.h> +#include <asm/msr-index.h> #include <asm/setup.h> #include <asm/desc.h> #include <asm/pgtable.h> #include <asm/tlbflush.h> #include <asm/reboot.h> -#include <asm/pgalloc.h> #include "xen-ops.h" #include "mmu.h" @@ -57,6 +58,18 @@ DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); /* + * Identity map, in addition to plain kernel map. This needs to be + * large enough to allocate page table pages to allocate the rest. + * Each page can map 2MB. + */ +static pte_t level1_ident_pgt[PTRS_PER_PTE * 4] __page_aligned_bss; + +#ifdef CONFIG_X86_64 +/* l3 pud for userspace vsyscall mapping */ +static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss; +#endif /* CONFIG_X86_64 */ + +/* * Note about cr3 (pagetable base) values: * * xen_cr3 contains the current logical cr3 value; it contains the @@ -167,10 +180,14 @@ void xen_vcpu_restore(void) static void __init xen_banner(void) { + unsigned version = HYPERVISOR_xen_version(XENVER_version, NULL); + struct xen_extraversion extra; + HYPERVISOR_xen_version(XENVER_extraversion, &extra); + printk(KERN_INFO "Booting paravirtualized kernel on %s\n", pv_info.name); - printk(KERN_INFO "Hypervisor signature: %s%s\n", - xen_start_info->magic, + printk(KERN_INFO "Xen version: %d.%d%s%s\n", + version >> 16, version & 0xffff, extra.extraversion, xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); } @@ -363,14 +380,6 @@ static void load_TLS_descriptor(struct thread_struct *t, static void xen_load_tls(struct thread_struct *t, unsigned int cpu) { - xen_mc_batch(); - - load_TLS_descriptor(t, cpu, 0); - load_TLS_descriptor(t, cpu, 1); - load_TLS_descriptor(t, cpu, 2); - - xen_mc_issue(PARAVIRT_LAZY_CPU); - /* * XXX sleazy hack: If we're being called in a lazy-cpu zone, * it means we're in a context switch, and %gs has just been @@ -379,10 +388,39 @@ static void xen_load_tls(struct thread_struct *t, unsigned int cpu) * Either way, it has been saved, and the new value will get * loaded properly. This will go away as soon as Xen has been * modified to not save/restore %gs for normal hypercalls. + * + * On x86_64, this hack is not used for %gs, because gs points + * to KERNEL_GS_BASE (and uses it for PDA references), so we + * must not zero %gs on x86_64 + * + * For x86_64, we need to zero %fs, otherwise we may get an + * exception between the new %fs descriptor being loaded and + * %fs being effectively cleared at __switch_to(). */ - if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) + if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) { +#ifdef CONFIG_X86_32 loadsegment(gs, 0); +#else + loadsegment(fs, 0); +#endif + } + + xen_mc_batch(); + + load_TLS_descriptor(t, cpu, 0); + load_TLS_descriptor(t, cpu, 1); + load_TLS_descriptor(t, cpu, 2); + + xen_mc_issue(PARAVIRT_LAZY_CPU); +} + +#ifdef CONFIG_X86_64 +static void xen_load_gs_index(unsigned int idx) +{ + if (HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, idx)) + BUG(); } +#endif static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum, const void *ptr) @@ -400,23 +438,18 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum, preempt_enable(); } -static int cvt_gate_to_trap(int vector, u32 low, u32 high, +static int cvt_gate_to_trap(int vector, const gate_desc *val, struct trap_info *info) { - u8 type, dpl; - - type = (high >> 8) & 0x1f; - dpl = (high >> 13) & 3; - - if (type != 0xf && type != 0xe) + if (val->type != 0xf && val->type != 0xe) return 0; info->vector = vector; - info->address = (high & 0xffff0000) | (low & 0x0000ffff); - info->cs = low >> 16; - info->flags = dpl; + info->address = gate_offset(*val); + info->cs = gate_segment(*val); + info->flags = val->dpl; /* interrupt gates clear IF */ - if (type == 0xe) + if (val->type == 0xe) info->flags |= 4; return 1; @@ -443,11 +476,10 @@ static void xen_write_idt_entry(gate_desc *dt, int entrynum, const gate_desc *g) if (p >= start && (p + 8) <= end) { struct trap_info info[2]; - u32 *desc = (u32 *)g; info[1].address = 0; - if (cvt_gate_to_trap(entrynum, desc[0], desc[1], &info[0])) + if (cvt_gate_to_trap(entrynum, g, &info[0])) if (HYPERVISOR_set_trap_table(info)) BUG(); } @@ -460,13 +492,13 @@ static void xen_convert_trap_info(const struct desc_ptr *desc, { unsigned in, out, count; - count = (desc->size+1) / 8; + count = (desc->size+1) / sizeof(gate_desc); BUG_ON(count > 256); for (in = out = 0; in < count; in++) { - const u32 *entry = (u32 *)(desc->address + in * 8); + gate_desc *entry = (gate_desc*)(desc->address) + in; - if (cvt_gate_to_trap(in, entry[0], entry[1], &traps[out])) + if (cvt_gate_to_trap(in, entry, &traps[out])) out++; } traps[out].address = 0; @@ -695,33 +727,89 @@ static void set_current_cr3(void *v) x86_write_percpu(xen_current_cr3, (unsigned long)v); } -static void xen_write_cr3(unsigned long cr3) +static void __xen_write_cr3(bool kernel, unsigned long cr3) { struct mmuext_op *op; struct multicall_space mcs; - unsigned long mfn = pfn_to_mfn(PFN_DOWN(cr3)); + unsigned long mfn; - BUG_ON(preemptible()); + if (cr3) + mfn = pfn_to_mfn(PFN_DOWN(cr3)); + else + mfn = 0; - mcs = xen_mc_entry(sizeof(*op)); /* disables interrupts */ + WARN_ON(mfn == 0 && kernel); - /* Update while interrupts are disabled, so its atomic with - respect to ipis */ - x86_write_percpu(xen_cr3, cr3); + mcs = __xen_mc_entry(sizeof(*op)); op = mcs.args; - op->cmd = MMUEXT_NEW_BASEPTR; + op->cmd = kernel ? MMUEXT_NEW_BASEPTR : MMUEXT_NEW_USER_BASEPTR; op->arg1.mfn = mfn; MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); - /* Update xen_update_cr3 once the batch has actually - been submitted. */ - xen_mc_callback(set_current_cr3, (void *)cr3); + if (kernel) { + x86_write_percpu(xen_cr3, cr3); + + /* Update xen_current_cr3 once the batch has actually + been submitted. */ + xen_mc_callback(set_current_cr3, (void *)cr3); + } +} + +static void xen_write_cr3(unsigned long cr3) +{ + BUG_ON(preemptible()); + + xen_mc_batch(); /* disables interrupts */ + + /* Update while interrupts are disabled, so its atomic with + respect to ipis */ + x86_write_percpu(xen_cr3, cr3); + + __xen_write_cr3(true, cr3); + +#ifdef CONFIG_X86_64 + { + pgd_t *user_pgd = xen_get_user_pgd(__va(cr3)); + if (user_pgd) + __xen_write_cr3(false, __pa(user_pgd)); + else + __xen_write_cr3(false, 0); + } +#endif xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */ } +static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) +{ + int ret; + + ret = 0; + + switch(msr) { +#ifdef CONFIG_X86_64 + unsigned which; + u64 base; + + case MSR_FS_BASE: which = SEGBASE_FS; goto set; + case MSR_KERNEL_GS_BASE: which = SEGBASE_GS_USER; goto set; + case MSR_GS_BASE: which = SEGBASE_GS_KERNEL; goto set; + + set: + base = ((u64)high << 32) | low; + if (HYPERVISOR_set_segment_base(which, base) != 0) + ret = -EFAULT; + break; +#endif + default: + ret = native_write_msr_safe(msr, low, high); + } + + return ret; +} + /* Early in boot, while setting up the initial pagetable, assume everything is pinned. */ static __init void xen_alloc_pte_init(struct mm_struct *mm, u32 pfn) @@ -778,6 +866,48 @@ static void xen_alloc_pmd(struct mm_struct *mm, u32 pfn) xen_alloc_ptpage(mm, pfn, PT_PMD); } +static int xen_pgd_alloc(struct mm_struct *mm) +{ + pgd_t *pgd = mm->pgd; + int ret = 0; + + BUG_ON(PagePinned(virt_to_page(pgd))); + +#ifdef CONFIG_X86_64 + { + struct page *page = virt_to_page(pgd); + pgd_t *user_pgd; + + BUG_ON(page->private != 0); + + ret = -ENOMEM; + + user_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); + page->private = (unsigned long)user_pgd; + + if (user_pgd != NULL) { + user_pgd[pgd_index(VSYSCALL_START)] = + __pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE); + ret = 0; + } + + BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd)))); + } +#endif + + return ret; +} + +static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd) +{ +#ifdef CONFIG_X86_64 + pgd_t *user_pgd = xen_get_user_pgd(pgd); + + if (user_pgd) + free_page((unsigned long)user_pgd); +#endif +} + /* This should never happen until we're OK to use struct page */ static void xen_release_ptpage(u32 pfn, unsigned level) { @@ -803,6 +933,18 @@ static void xen_release_pmd(u32 pfn) xen_release_ptpage(pfn, PT_PMD); } +#if PAGETABLE_LEVELS == 4 +static void xen_alloc_pud(struct mm_struct *mm, u32 pfn) +{ + xen_alloc_ptpage(mm, pfn, PT_PUD); +} + +static void xen_release_pud(u32 pfn) +{ + xen_release_ptpage(pfn, PT_PUD); +} +#endif + #ifdef CONFIG_HIGHPTE static void *xen_kmap_atomic_pte(struct page *page, enum km_type type) { @@ -841,68 +983,16 @@ static __init void xen_set_pte_init(pte_t *ptep, pte_t pte) static __init void xen_pagetable_setup_start(pgd_t *base) { - pgd_t *xen_pgd = (pgd_t *)xen_start_info->pt_base; - int i; - - /* special set_pte for pagetable initialization */ - pv_mmu_ops.set_pte = xen_set_pte_init; - - init_mm.pgd = base; - /* - * copy top-level of Xen-supplied pagetable into place. This - * is a stand-in while we copy the pmd pages. - */ - memcpy(base, xen_pgd, PTRS_PER_PGD * sizeof(pgd_t)); - - /* - * For PAE, need to allocate new pmds, rather than - * share Xen's, since Xen doesn't like pmd's being - * shared between address spaces. - */ - for (i = 0; i < PTRS_PER_PGD; i++) { - if (pgd_val_ma(xen_pgd[i]) & _PAGE_PRESENT) { - pmd_t *pmd = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE); - - memcpy(pmd, (void *)pgd_page_vaddr(xen_pgd[i]), - PAGE_SIZE); - - make_lowmem_page_readonly(pmd); - - set_pgd(&base[i], __pgd(1 + __pa(pmd))); - } else - pgd_clear(&base[i]); - } - - /* make sure zero_page is mapped RO so we can use it in pagetables */ - make_lowmem_page_readonly(empty_zero_page); - make_lowmem_page_readonly(base); - /* - * Switch to new pagetable. This is done before - * pagetable_init has done anything so that the new pages - * added to the table can be prepared properly for Xen. - */ - xen_write_cr3(__pa(base)); - - /* Unpin initial Xen pagetable */ - pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, - PFN_DOWN(__pa(xen_start_info->pt_base))); } void xen_setup_shared_info(void) { if (!xen_feature(XENFEAT_auto_translated_physmap)) { - unsigned long addr = fix_to_virt(FIX_PARAVIRT_BOOTMAP); - - /* - * Create a mapping for the shared info page. - * Should be set_fixmap(), but shared_info is a machine - * address with no corresponding pseudo-phys address. - */ - set_pte_mfn(addr, - PFN_DOWN(xen_start_info->shared_info), - PAGE_KERNEL); - - HYPERVISOR_shared_info = (struct shared_info *)addr; + set_fixmap(FIX_PARAVIRT_BOOTMAP, + xen_start_info->shared_info); + + HYPERVISOR_shared_info = + (struct shared_info *)fix_to_virt(FIX_PARAVIRT_BOOTMAP); } else HYPERVISOR_shared_info = (struct shared_info *)__va(xen_start_info->shared_info); @@ -917,26 +1007,32 @@ void xen_setup_shared_info(void) static __init void xen_pagetable_setup_done(pgd_t *base) { - /* This will work as long as patching hasn't happened yet - (which it hasn't) */ - pv_mmu_ops.alloc_pte = xen_alloc_pte; - pv_mmu_ops.alloc_pmd = xen_alloc_pmd; - pv_mmu_ops.release_pte = xen_release_pte; - pv_mmu_ops.release_pmd = xen_release_pmd; - pv_mmu_ops.set_pte = xen_set_pte; - xen_setup_shared_info(); - - /* Actually pin the pagetable down, but we can't set PG_pinned - yet because the page structures don't exist yet. */ - pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(base))); } static __init void xen_post_allocator_init(void) { + pv_mmu_ops.set_pte = xen_set_pte; pv_mmu_ops.set_pmd = xen_set_pmd; pv_mmu_ops.set_pud = xen_set_pud; +#if PAGETABLE_LEVELS == 4 + pv_mmu_ops.set_pgd = xen_set_pgd; +#endif + + /* This will work as long as patching hasn't happened yet + (which it hasn't) */ + pv_mmu_ops.alloc_pte = xen_alloc_pte; + pv_mmu_ops.alloc_pmd = xen_alloc_pmd; + pv_mmu_ops.release_pte = xen_release_pte; + pv_mmu_ops.release_pmd = xen_release_pmd; +#if PAGETABLE_LEVELS == 4 + pv_mmu_ops.alloc_pud = xen_alloc_pud; + pv_mmu_ops.release_pud = xen_release_pud; +#endif +#ifdef CONFIG_X86_64 + SetPagePinned(virt_to_page(level3_user_vsyscall)); +#endif xen_mark_init_mm_pinned(); } @@ -950,6 +1046,7 @@ void xen_setup_vcpu_info_placement(void) /* xen_vcpu_setup managed to place the vcpu_info within the percpu area for all cpus, so make use of it */ +#ifdef CONFIG_X86_32 if (have_vcpu_info_placement) { printk(KERN_INFO "Xen: using vcpu_info placement\n"); @@ -959,6 +1056,7 @@ void xen_setup_vcpu_info_placement(void) pv_irq_ops.irq_enable = xen_irq_enable_direct; pv_mmu_ops.read_cr2 = xen_read_cr2_direct; } +#endif } static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf, @@ -979,10 +1077,12 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf, goto patch_site switch (type) { +#ifdef CONFIG_X86_32 SITE(pv_irq_ops, irq_enable); SITE(pv_irq_ops, irq_disable); SITE(pv_irq_ops, save_fl); SITE(pv_irq_ops, restore_fl); +#endif /* CONFIG_X86_32 */ #undef SITE patch_site: @@ -1025,8 +1125,15 @@ static void xen_set_fixmap(unsigned idx, unsigned long phys, pgprot_t prot) #ifdef CONFIG_X86_F00F_BUG case FIX_F00F_IDT: #endif +#ifdef CONFIG_X86_32 case FIX_WP_TEST: case FIX_VDSO: +# ifdef CONFIG_HIGHMEM + case FIX_KMAP_BEGIN ... FIX_KMAP_END: +# endif +#else + case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE: +#endif #ifdef CONFIG_X86_LOCAL_APIC case FIX_APIC_BASE: /* maps dummy local APIC */ #endif @@ -1039,6 +1146,15 @@ static void xen_set_fixmap(unsigned idx, unsigned long phys, pgprot_t prot) } __native_set_fixmap(idx, pte); + +#ifdef CONFIG_X86_64 + /* Replicate changes to map the vsyscall page into the user + pagetable vsyscall mapping. */ + if (idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) { + unsigned long vaddr = __fix_to_virt(idx); + set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte); + } +#endif } static const struct pv_info xen_info __initdata = { @@ -1084,18 +1200,25 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { .wbinvd = native_wbinvd, .read_msr = native_read_msr_safe, - .write_msr = native_write_msr_safe, + .write_msr = xen_write_msr_safe, .read_tsc = native_read_tsc, .read_pmc = native_read_pmc, .iret = xen_iret, .irq_enable_sysexit = xen_sysexit, +#ifdef CONFIG_X86_64 + .usergs_sysret32 = xen_sysret32, + .usergs_sysret64 = xen_sysret64, +#endif .load_tr_desc = paravirt_nop, .set_ldt = xen_set_ldt, .load_gdt = xen_load_gdt, .load_idt = xen_load_idt, .load_tls = xen_load_tls, +#ifdef CONFIG_X86_64 + .load_gs_index = xen_load_gs_index, +#endif .store_gdt = native_store_gdt, .store_idt = native_store_idt, @@ -1109,14 +1232,34 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { .set_iopl_mask = xen_set_iopl_mask, .io_delay = xen_io_delay, + /* Xen takes care of %gs when switching to usermode for us */ + .swapgs = paravirt_nop, + .lazy_mode = { .enter = paravirt_enter_lazy_cpu, .leave = xen_leave_lazy, }, }; +static void __init __xen_init_IRQ(void) +{ +#ifdef CONFIG_X86_64 + int i; + + /* Create identity vector->irq map */ + for(i = 0; i < NR_VECTORS; i++) { + int cpu; + + for_each_possible_cpu(cpu) + per_cpu(vector_irq, cpu)[i] = i; + } +#endif /* CONFIG_X86_64 */ + + xen_init_IRQ(); +} + static const struct pv_irq_ops xen_irq_ops __initdata = { - .init_IRQ = xen_init_IRQ, + .init_IRQ = __xen_init_IRQ, .save_fl = xen_save_fl, .restore_fl = xen_restore_fl, .irq_disable = xen_irq_disable, @@ -1124,14 +1267,13 @@ static const struct pv_irq_ops xen_irq_ops __initdata = { .safe_halt = xen_safe_halt, .halt = xen_halt, #ifdef CONFIG_X86_64 - .adjust_exception_frame = paravirt_nop, + .adjust_exception_frame = xen_adjust_exception_frame, #endif }; static const struct pv_apic_ops xen_apic_ops __initdata = { #ifdef CONFIG_X86_LOCAL_APIC .apic_write = xen_apic_write, - .apic_write_atomic = xen_apic_write, .apic_read = xen_apic_read, .setup_boot_clock = paravirt_nop, .setup_secondary_clock = paravirt_nop, @@ -1157,8 +1299,8 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { .pte_update = paravirt_nop, .pte_update_defer = paravirt_nop, - .pgd_alloc = __paravirt_pgd_alloc, - .pgd_free = paravirt_nop, + .pgd_alloc = xen_pgd_alloc, + .pgd_free = xen_pgd_free, .alloc_pte = xen_alloc_pte_init, .release_pte = xen_release_pte_init, @@ -1170,7 +1312,11 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { .kmap_atomic_pte = xen_kmap_atomic_pte, #endif - .set_pte = NULL, /* see xen_pagetable_setup_* */ +#ifdef CONFIG_X86_64 + .set_pte = xen_set_pte, +#else + .set_pte = xen_set_pte_init, +#endif .set_pte_at = xen_set_pte_at, .set_pmd = xen_set_pmd_hyper, @@ -1184,15 +1330,26 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { .make_pte = xen_make_pte, .make_pgd = xen_make_pgd, +#ifdef CONFIG_X86_PAE .set_pte_atomic = xen_set_pte_atomic, .set_pte_present = xen_set_pte_at, - .set_pud = xen_set_pud_hyper, .pte_clear = xen_pte_clear, .pmd_clear = xen_pmd_clear, +#endif /* CONFIG_X86_PAE */ + .set_pud = xen_set_pud_hyper, .make_pmd = xen_make_pmd, .pmd_val = xen_pmd_val, +#if PAGETABLE_LEVELS == 4 + .pud_val = xen_pud_val, + .make_pud = xen_make_pud, + .set_pgd = xen_set_pgd_hyper, + + .alloc_pud = xen_alloc_pte_init, + .release_pud = xen_release_pte_init, +#endif /* PAGETABLE_LEVELS == 4 */ + .activate_mm = xen_activate_mm, .dup_mmap = xen_dup_mmap, .exit_mmap = xen_exit_mmap, @@ -1205,21 +1362,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { .set_fixmap = xen_set_fixmap, }; -#ifdef CONFIG_SMP -static const struct smp_ops xen_smp_ops __initdata = { - .smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu, - .smp_prepare_cpus = xen_smp_prepare_cpus, - .cpu_up = xen_cpu_up, - .smp_cpus_done = xen_smp_cpus_done, - - .smp_send_stop = xen_smp_send_stop, - .smp_send_reschedule = xen_smp_send_reschedule, - - .send_call_func_ipi = xen_smp_send_call_function_ipi, - .send_call_func_single_ipi = xen_smp_send_call_function_single_ipi, -}; -#endif /* CONFIG_SMP */ - static void xen_reboot(int reason) { struct sched_shutdown r = { .reason = reason }; @@ -1264,6 +1406,7 @@ static const struct machine_ops __initdata xen_machine_ops = { static void __init xen_reserve_top(void) { +#ifdef CONFIG_X86_32 unsigned long top = HYPERVISOR_VIRT_START; struct xen_platform_parameters pp; @@ -1271,8 +1414,248 @@ static void __init xen_reserve_top(void) top = pp.virt_start; reserve_top_address(-top + 2 * PAGE_SIZE); +#endif /* CONFIG_X86_32 */ +} + +/* + * Like __va(), but returns address in the kernel mapping (which is + * all we have until the physical memory mapping has been set up. + */ +static void *__ka(phys_addr_t paddr) +{ +#ifdef CONFIG_X86_64 + return (void *)(paddr + __START_KERNEL_map); +#else + return __va(paddr); +#endif } +/* Convert a machine address to physical address */ +static unsigned long m2p(phys_addr_t maddr) +{ + phys_addr_t paddr; + + maddr &= PTE_MASK; + paddr = mfn_to_pfn(maddr >> PAGE_SHIFT) << PAGE_SHIFT; + + return paddr; +} + +/* Convert a machine address to kernel virtual */ +static void *m2v(phys_addr_t maddr) +{ + return __ka(m2p(maddr)); +} + +#ifdef CONFIG_X86_64 +static void walk(pgd_t *pgd, unsigned long addr) +{ + unsigned l4idx = pgd_index(addr); + unsigned l3idx = pud_index(addr); + unsigned l2idx = pmd_index(addr); + unsigned l1idx = pte_index(addr); + pgd_t l4; + pud_t l3; + pmd_t l2; + pte_t l1; + + xen_raw_printk("walk %p, %lx -> %d %d %d %d\n", + pgd, addr, l4idx, l3idx, l2idx, l1idx); + + l4 = pgd[l4idx]; + xen_raw_printk(" l4: %016lx\n", l4.pgd); + xen_raw_printk(" %016lx\n", pgd_val(l4)); + + l3 = ((pud_t *)(m2v(l4.pgd)))[l3idx]; + xen_raw_printk(" l3: %016lx\n", l3.pud); + xen_raw_printk(" %016lx\n", pud_val(l3)); + + l2 = ((pmd_t *)(m2v(l3.pud)))[l2idx]; + xen_raw_printk(" l2: %016lx\n", l2.pmd); + xen_raw_printk(" %016lx\n", pmd_val(l2)); + + l1 = ((pte_t *)(m2v(l2.pmd)))[l1idx]; + xen_raw_printk(" l1: %016lx\n", l1.pte); + xen_raw_printk(" %016lx\n", pte_val(l1)); +} +#endif + +static void set_page_prot(void *addr, pgprot_t prot) +{ + unsigned long pfn = __pa(addr) >> PAGE_SHIFT; + pte_t pte = pfn_pte(pfn, prot); + + xen_raw_printk("addr=%p pfn=%lx mfn=%lx prot=%016llx pte=%016llx\n", + addr, pfn, get_phys_to_machine(pfn), + pgprot_val(prot), pte.pte); + + if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0)) + BUG(); +} + +static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) +{ + unsigned pmdidx, pteidx; + unsigned ident_pte; + unsigned long pfn; + + ident_pte = 0; + pfn = 0; + for(pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) { + pte_t *pte_page; + + /* Reuse or allocate a page of ptes */ + if (pmd_present(pmd[pmdidx])) + pte_page = m2v(pmd[pmdidx].pmd); + else { + /* Check for free pte pages */ + if (ident_pte == ARRAY_SIZE(level1_ident_pgt)) + break; + + pte_page = &level1_ident_pgt[ident_pte]; + ident_pte += PTRS_PER_PTE; + + pmd[pmdidx] = __pmd(__pa(pte_page) | _PAGE_TABLE); + } + + /* Install mappings */ + for(pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) { + pte_t pte; + + if (pfn > max_pfn_mapped) + max_pfn_mapped = pfn; + + if (!pte_none(pte_page[pteidx])) + continue; + + pte = pfn_pte(pfn, PAGE_KERNEL_EXEC); + pte_page[pteidx] = pte; + } + } + + for(pteidx = 0; pteidx < ident_pte; pteidx += PTRS_PER_PTE) + set_page_prot(&level1_ident_pgt[pteidx], PAGE_KERNEL_RO); + + set_page_prot(pmd, PAGE_KERNEL_RO); +} + +#ifdef CONFIG_X86_64 +static void convert_pfn_mfn(void *v) +{ + pte_t *pte = v; + int i; + + /* All levels are converted the same way, so just treat them + as ptes. */ + for(i = 0; i < PTRS_PER_PTE; i++) + pte[i] = xen_make_pte(pte[i].pte); +} + +/* + * Set up the inital kernel pagetable. + * + * We can construct this by grafting the Xen provided pagetable into + * head_64.S's preconstructed pagetables. We copy the Xen L2's into + * level2_ident_pgt, level2_kernel_pgt and level2_fixmap_pgt. This + * means that only the kernel has a physical mapping to start with - + * but that's enough to get __va working. We need to fill in the rest + * of the physical mapping once some sort of allocator has been set + * up. + */ +static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) +{ + pud_t *l3; + pmd_t *l2; + + /* Zap identity mapping */ + init_level4_pgt[0] = __pgd(0); + + /* Pre-constructed entries are in pfn, so convert to mfn */ + convert_pfn_mfn(init_level4_pgt); + convert_pfn_mfn(level3_ident_pgt); + convert_pfn_mfn(level3_kernel_pgt); + + l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd); + l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud); + + memcpy(level2_ident_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); + memcpy(level2_kernel_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); + + l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd); + l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud); + memcpy(level2_fixmap_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); + + /* Set up identity map */ + xen_map_identity_early(level2_ident_pgt, max_pfn); + + /* Make pagetable pieces RO */ + set_page_prot(init_level4_pgt, PAGE_KERNEL_RO); + set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO); + set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO); + set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO); + set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); + set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); + + /* Pin down new L4 */ + pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, + PFN_DOWN(__pa_symbol(init_level4_pgt))); + + /* Unpin Xen-provided one */ + pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); + + /* Switch over */ + pgd = init_level4_pgt; + + /* + * At this stage there can be no user pgd, and no page + * structure to attach it to, so make sure we just set kernel + * pgd. + */ + xen_mc_batch(); + __xen_write_cr3(true, __pa(pgd)); + xen_mc_issue(PARAVIRT_LAZY_CPU); + + reserve_early(__pa(xen_start_info->pt_base), + __pa(xen_start_info->pt_base + + xen_start_info->nr_pt_frames * PAGE_SIZE), + "XEN PAGETABLES"); + + return pgd; +} +#else /* !CONFIG_X86_64 */ +static pmd_t level2_kernel_pgt[PTRS_PER_PMD] __page_aligned_bss; + +static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) +{ + pmd_t *kernel_pmd; + + init_pg_tables_start = __pa(pgd); + init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE; + max_pfn_mapped = PFN_DOWN(init_pg_tables_end + 512*1024); + + kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); + memcpy(level2_kernel_pgt, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD); + + xen_map_identity_early(level2_kernel_pgt, max_pfn); + + memcpy(swapper_pg_dir, pgd, sizeof(pgd_t) * PTRS_PER_PGD); + set_pgd(&swapper_pg_dir[KERNEL_PGD_BOUNDARY], + __pgd(__pa(level2_kernel_pgt) | _PAGE_PRESENT)); + + set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); + set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO); + set_page_prot(empty_zero_page, PAGE_KERNEL_RO); + + pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); + + xen_write_cr3(__pa(swapper_pg_dir)); + + pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(swapper_pg_dir))); + + return swapper_pg_dir; +} +#endif /* CONFIG_X86_64 */ + /* First C function to be called on Xen boot */ asmlinkage void __init xen_start_kernel(void) { @@ -1301,53 +1684,56 @@ asmlinkage void __init xen_start_kernel(void) machine_ops = xen_machine_ops; -#ifdef CONFIG_SMP - smp_ops = xen_smp_ops; +#ifdef CONFIG_X86_64 + /* Disable until direct per-cpu data access. */ + have_vcpu_info_placement = 0; + x86_64_init_pda(); #endif + xen_smp_init(); + /* Get mfn list */ if (!xen_feature(XENFEAT_auto_translated_physmap)) xen_build_dynamic_phys_to_machine(); pgd = (pgd_t *)xen_start_info->pt_base; - init_pg_tables_start = __pa(pgd); - init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE; - max_pfn_mapped = (init_pg_tables_end + 512*1024) >> PAGE_SHIFT; - - init_mm.pgd = pgd; /* use the Xen pagetables to start */ - - /* keep using Xen gdt for now; no urgent need to change it */ - - x86_write_percpu(xen_cr3, __pa(pgd)); - x86_write_percpu(xen_current_cr3, __pa(pgd)); + /* Prevent unwanted bits from being set in PTEs. */ + __supported_pte_mask &= ~_PAGE_GLOBAL; + if (!is_initial_xendomain()) + __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD); /* Don't do the full vcpu_info placement stuff until we have a possible map and a non-dummy shared_info. */ per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; + xen_raw_console_write("mapping kernel into physical memory\n"); + pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages); + + init_mm.pgd = pgd; + + /* keep using Xen gdt for now; no urgent need to change it */ + pv_info.kernel_rpl = 1; if (xen_feature(XENFEAT_supervisor_mode_kernel)) pv_info.kernel_rpl = 0; - /* Prevent unwanted bits from being set in PTEs. */ - __supported_pte_mask &= ~_PAGE_GLOBAL; - if (!is_initial_xendomain()) - __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD); - /* set the limit of our address space */ xen_reserve_top(); +#ifdef CONFIG_X86_32 /* set up basic CPUID stuff */ cpu_detect(&new_cpu_data); new_cpu_data.hard_math = 1; new_cpu_data.x86_capability[0] = cpuid_edx(1); +#endif /* Poke various useful things into boot_params */ boot_params.hdr.type_of_loader = (9 << 4) | 0; boot_params.hdr.ramdisk_image = xen_start_info->mod_start ? __pa(xen_start_info->mod_start) : 0; boot_params.hdr.ramdisk_size = xen_start_info->mod_len; + boot_params.hdr.cmd_line_ptr = __pa(xen_start_info->cmd_line); if (!is_initial_xendomain()) { add_preferred_console("xenboot", 0, NULL); @@ -1355,6 +1741,21 @@ asmlinkage void __init xen_start_kernel(void) add_preferred_console("hvc", 0, NULL); } + xen_raw_console_write("about to get started...\n"); + +#if 0 + xen_raw_printk("&boot_params=%p __pa(&boot_params)=%lx __va(__pa(&boot_params))=%lx\n", + &boot_params, __pa_symbol(&boot_params), + __va(__pa_symbol(&boot_params))); + + walk(pgd, &boot_params); + walk(pgd, __va(__pa(&boot_params))); +#endif + /* Start the world */ +#ifdef CONFIG_X86_32 i386_start_kernel(); +#else + x86_64_start_reservations((char *)__pa_symbol(&boot_params)); +#endif } diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index ff0aa74afaa1..a44d56e38bd1 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -44,8 +44,10 @@ #include <asm/pgtable.h> #include <asm/tlbflush.h> +#include <asm/fixmap.h> #include <asm/mmu_context.h> #include <asm/paravirt.h> +#include <asm/linkage.h> #include <asm/xen/hypercall.h> #include <asm/xen/hypervisor.h> @@ -56,26 +58,29 @@ #include "multicalls.h" #include "mmu.h" +/* + * Just beyond the highest usermode address. STACK_TOP_MAX has a + * redzone above it, so round it up to a PGD boundary. + */ +#define USER_LIMIT ((STACK_TOP_MAX + PGDIR_SIZE - 1) & PGDIR_MASK) + + #define P2M_ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(unsigned long)) #define TOP_ENTRIES (MAX_DOMAIN_PAGES / P2M_ENTRIES_PER_PAGE) /* Placeholder for holes in the address space */ -static unsigned long p2m_missing[P2M_ENTRIES_PER_PAGE] - __attribute__((section(".data.page_aligned"))) = +static unsigned long p2m_missing[P2M_ENTRIES_PER_PAGE] __page_aligned_data = { [ 0 ... P2M_ENTRIES_PER_PAGE-1 ] = ~0UL }; /* Array of pointers to pages containing p2m entries */ -static unsigned long *p2m_top[TOP_ENTRIES] - __attribute__((section(".data.page_aligned"))) = +static unsigned long *p2m_top[TOP_ENTRIES] __page_aligned_data = { [ 0 ... TOP_ENTRIES - 1] = &p2m_missing[0] }; /* Arrays of p2m arrays expressed in mfns used for save/restore */ -static unsigned long p2m_top_mfn[TOP_ENTRIES] - __attribute__((section(".bss.page_aligned"))); +static unsigned long p2m_top_mfn[TOP_ENTRIES] __page_aligned_bss; -static unsigned long p2m_top_mfn_list[ - PAGE_ALIGN(TOP_ENTRIES / P2M_ENTRIES_PER_PAGE)] - __attribute__((section(".bss.page_aligned"))); +static unsigned long p2m_top_mfn_list[TOP_ENTRIES / P2M_ENTRIES_PER_PAGE] + __page_aligned_bss; static inline unsigned p2m_top_index(unsigned long pfn) { @@ -181,15 +186,16 @@ void set_phys_to_machine(unsigned long pfn, unsigned long mfn) p2m_top[topidx][idx] = mfn; } -xmaddr_t arbitrary_virt_to_machine(unsigned long address) +xmaddr_t arbitrary_virt_to_machine(void *vaddr) { + unsigned long address = (unsigned long)vaddr; unsigned int level; pte_t *pte = lookup_address(address, &level); unsigned offset = address & ~PAGE_MASK; BUG_ON(pte == NULL); - return XMADDR((pte_mfn(*pte) << PAGE_SHIFT) + offset); + return XMADDR(((phys_addr_t)pte_mfn(*pte) << PAGE_SHIFT) + offset); } void make_lowmem_page_readonly(void *vaddr) @@ -256,7 +262,8 @@ void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val) xen_mc_batch(); - u.ptr = virt_to_machine(ptr).maddr; + /* ptr may be ioremapped for 64-bit pagetable setup */ + u.ptr = arbitrary_virt_to_machine(ptr).maddr; u.val = pmd_val_ma(val); extend_mmu_update(&u); @@ -283,35 +290,7 @@ void xen_set_pmd(pmd_t *ptr, pmd_t val) */ void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags) { - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - - pgd = swapper_pg_dir + pgd_index(vaddr); - if (pgd_none(*pgd)) { - BUG(); - return; - } - pud = pud_offset(pgd, vaddr); - if (pud_none(*pud)) { - BUG(); - return; - } - pmd = pmd_offset(pud, vaddr); - if (pmd_none(*pmd)) { - BUG(); - return; - } - pte = pte_offset_kernel(pmd, vaddr); - /* <mfn,flags> stored as-is, to permit clearing entries */ - xen_set_pte(pte, mfn_pte(mfn, flags)); - - /* - * It's enough to flush this one mapping. - * (PGE mappings get flushed as well) - */ - __flush_tlb_one(vaddr); + set_pte_vaddr(vaddr, mfn_pte(mfn, flags)); } void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, @@ -418,7 +397,8 @@ void xen_set_pud_hyper(pud_t *ptr, pud_t val) xen_mc_batch(); - u.ptr = virt_to_machine(ptr).maddr; + /* ptr may be ioremapped for 64-bit pagetable setup */ + u.ptr = arbitrary_virt_to_machine(ptr).maddr; u.val = pud_val_ma(val); extend_mmu_update(&u); @@ -441,14 +421,19 @@ void xen_set_pud(pud_t *ptr, pud_t val) void xen_set_pte(pte_t *ptep, pte_t pte) { +#ifdef CONFIG_X86_PAE ptep->pte_high = pte.pte_high; smp_wmb(); ptep->pte_low = pte.pte_low; +#else + *ptep = pte; +#endif } +#ifdef CONFIG_X86_PAE void xen_set_pte_atomic(pte_t *ptep, pte_t pte) { - set_64bit((u64 *)ptep, pte_val_ma(pte)); + set_64bit((u64 *)ptep, native_pte_val(pte)); } void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) @@ -462,6 +447,7 @@ void xen_pmd_clear(pmd_t *pmdp) { set_pmd(pmdp, __pmd(0)); } +#endif /* CONFIG_X86_PAE */ pmd_t xen_make_pmd(pmdval_t pmd) { @@ -469,78 +455,189 @@ pmd_t xen_make_pmd(pmdval_t pmd) return native_make_pmd(pmd); } +#if PAGETABLE_LEVELS == 4 +pudval_t xen_pud_val(pud_t pud) +{ + return pte_mfn_to_pfn(pud.pud); +} + +pud_t xen_make_pud(pudval_t pud) +{ + pud = pte_pfn_to_mfn(pud); + + return native_make_pud(pud); +} + +pgd_t *xen_get_user_pgd(pgd_t *pgd) +{ + pgd_t *pgd_page = (pgd_t *)(((unsigned long)pgd) & PAGE_MASK); + unsigned offset = pgd - pgd_page; + pgd_t *user_ptr = NULL; + + if (offset < pgd_index(USER_LIMIT)) { + struct page *page = virt_to_page(pgd_page); + user_ptr = (pgd_t *)page->private; + if (user_ptr) + user_ptr += offset; + } + + return user_ptr; +} + +static void __xen_set_pgd_hyper(pgd_t *ptr, pgd_t val) +{ + struct mmu_update u; + + u.ptr = virt_to_machine(ptr).maddr; + u.val = pgd_val_ma(val); + extend_mmu_update(&u); +} + +/* + * Raw hypercall-based set_pgd, intended for in early boot before + * there's a page structure. This implies: + * 1. The only existing pagetable is the kernel's + * 2. It is always pinned + * 3. It has no user pagetable attached to it + */ +void __init xen_set_pgd_hyper(pgd_t *ptr, pgd_t val) +{ + preempt_disable(); + + xen_mc_batch(); + + __xen_set_pgd_hyper(ptr, val); + + xen_mc_issue(PARAVIRT_LAZY_MMU); + + preempt_enable(); +} + +void xen_set_pgd(pgd_t *ptr, pgd_t val) +{ + pgd_t *user_ptr = xen_get_user_pgd(ptr); + + /* If page is not pinned, we can just update the entry + directly */ + if (!page_pinned(ptr)) { + *ptr = val; + if (user_ptr) { + WARN_ON(page_pinned(user_ptr)); + *user_ptr = val; + } + return; + } + + /* If it's pinned, then we can at least batch the kernel and + user updates together. */ + xen_mc_batch(); + + __xen_set_pgd_hyper(ptr, val); + if (user_ptr) + __xen_set_pgd_hyper(user_ptr, val); + + xen_mc_issue(PARAVIRT_LAZY_MMU); +} +#endif /* PAGETABLE_LEVELS == 4 */ + /* - (Yet another) pagetable walker. This one is intended for pinning a - pagetable. This means that it walks a pagetable and calls the - callback function on each page it finds making up the page table, - at every level. It walks the entire pagetable, but it only bothers - pinning pte pages which are below pte_limit. In the normal case - this will be TASK_SIZE, but at boot we need to pin up to - FIXADDR_TOP. But the important bit is that we don't pin beyond - there, because then we start getting into Xen's ptes. -*/ -static int pgd_walk(pgd_t *pgd_base, int (*func)(struct page *, enum pt_level), + * (Yet another) pagetable walker. This one is intended for pinning a + * pagetable. This means that it walks a pagetable and calls the + * callback function on each page it finds making up the page table, + * at every level. It walks the entire pagetable, but it only bothers + * pinning pte pages which are below limit. In the normal case this + * will be STACK_TOP_MAX, but at boot we need to pin up to + * FIXADDR_TOP. + * + * For 32-bit the important bit is that we don't pin beyond there, + * because then we start getting into Xen's ptes. + * + * For 64-bit, we must skip the Xen hole in the middle of the address + * space, just after the big x86-64 virtual hole. + */ +static int pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level), unsigned long limit) { - pgd_t *pgd = pgd_base; int flush = 0; - unsigned long addr = 0; - unsigned long pgd_next; + unsigned hole_low, hole_high; + unsigned pgdidx_limit, pudidx_limit, pmdidx_limit; + unsigned pgdidx, pudidx, pmdidx; - BUG_ON(limit > FIXADDR_TOP); + /* The limit is the last byte to be touched */ + limit--; + BUG_ON(limit >= FIXADDR_TOP); if (xen_feature(XENFEAT_auto_translated_physmap)) return 0; - for (; addr != FIXADDR_TOP; pgd++, addr = pgd_next) { + /* + * 64-bit has a great big hole in the middle of the address + * space, which contains the Xen mappings. On 32-bit these + * will end up making a zero-sized hole and so is a no-op. + */ + hole_low = pgd_index(USER_LIMIT); + hole_high = pgd_index(PAGE_OFFSET); + + pgdidx_limit = pgd_index(limit); +#if PTRS_PER_PUD > 1 + pudidx_limit = pud_index(limit); +#else + pudidx_limit = 0; +#endif +#if PTRS_PER_PMD > 1 + pmdidx_limit = pmd_index(limit); +#else + pmdidx_limit = 0; +#endif + + flush |= (*func)(virt_to_page(pgd), PT_PGD); + + for (pgdidx = 0; pgdidx <= pgdidx_limit; pgdidx++) { pud_t *pud; - unsigned long pud_limit, pud_next; - pgd_next = pud_limit = pgd_addr_end(addr, FIXADDR_TOP); + if (pgdidx >= hole_low && pgdidx < hole_high) + continue; - if (!pgd_val(*pgd)) + if (!pgd_val(pgd[pgdidx])) continue; - pud = pud_offset(pgd, 0); + pud = pud_offset(&pgd[pgdidx], 0); if (PTRS_PER_PUD > 1) /* not folded */ flush |= (*func)(virt_to_page(pud), PT_PUD); - for (; addr != pud_limit; pud++, addr = pud_next) { + for (pudidx = 0; pudidx < PTRS_PER_PUD; pudidx++) { pmd_t *pmd; - unsigned long pmd_limit; - pud_next = pud_addr_end(addr, pud_limit); - - if (pud_next < limit) - pmd_limit = pud_next; - else - pmd_limit = limit; + if (pgdidx == pgdidx_limit && + pudidx > pudidx_limit) + goto out; - if (pud_none(*pud)) + if (pud_none(pud[pudidx])) continue; - pmd = pmd_offset(pud, 0); + pmd = pmd_offset(&pud[pudidx], 0); if (PTRS_PER_PMD > 1) /* not folded */ flush |= (*func)(virt_to_page(pmd), PT_PMD); - for (; addr != pmd_limit; pmd++) { - addr += (PAGE_SIZE * PTRS_PER_PTE); - if ((pmd_limit-1) < (addr-1)) { - addr = pmd_limit; - break; - } + for (pmdidx = 0; pmdidx < PTRS_PER_PMD; pmdidx++) { + struct page *pte; + + if (pgdidx == pgdidx_limit && + pudidx == pudidx_limit && + pmdidx > pmdidx_limit) + goto out; - if (pmd_none(*pmd)) + if (pmd_none(pmd[pmdidx])) continue; - flush |= (*func)(pmd_page(*pmd), PT_PTE); + pte = pmd_page(pmd[pmdidx]); + flush |= (*func)(pte, PT_PTE); } } } - - flush |= (*func)(virt_to_page(pgd_base), PT_PGD); +out: return flush; } @@ -622,14 +719,31 @@ void xen_pgd_pin(pgd_t *pgd) { xen_mc_batch(); - if (pgd_walk(pgd, pin_page, TASK_SIZE)) { + if (pgd_walk(pgd, pin_page, USER_LIMIT)) { /* re-enable interrupts for kmap_flush_unused */ xen_mc_issue(0); kmap_flush_unused(); xen_mc_batch(); } +#ifdef CONFIG_X86_64 + { + pgd_t *user_pgd = xen_get_user_pgd(pgd); + + xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(pgd))); + + if (user_pgd) { + pin_page(virt_to_page(user_pgd), PT_PGD); + xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(user_pgd))); + } + } +#else /* CONFIG_X86_32 */ +#ifdef CONFIG_X86_PAE + /* Need to make sure unshared kernel PMD is pinnable */ + pin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD); +#endif xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd))); +#endif /* CONFIG_X86_64 */ xen_mc_issue(0); } @@ -656,9 +770,11 @@ void xen_mm_pin_all(void) spin_unlock_irqrestore(&pgd_lock, flags); } -/* The init_mm pagetable is really pinned as soon as its created, but - that's before we have page structures to store the bits. So do all - the book-keeping now. */ +/* + * The init_mm pagetable is really pinned as soon as its created, but + * that's before we have page structures to store the bits. So do all + * the book-keeping now. + */ static __init int mark_pinned(struct page *page, enum pt_level level) { SetPagePinned(page); @@ -708,7 +824,23 @@ static void xen_pgd_unpin(pgd_t *pgd) xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); - pgd_walk(pgd, unpin_page, TASK_SIZE); +#ifdef CONFIG_X86_64 + { + pgd_t *user_pgd = xen_get_user_pgd(pgd); + + if (user_pgd) { + xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(user_pgd))); + unpin_page(virt_to_page(user_pgd), PT_PGD); + } + } +#endif + +#ifdef CONFIG_X86_PAE + /* Need to make sure unshared kernel PMD is unpinned */ + pin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD); +#endif + + pgd_walk(pgd, unpin_page, USER_LIMIT); xen_mc_issue(0); } @@ -727,7 +859,6 @@ void xen_mm_unpin_all(void) list_for_each_entry(page, &pgd_list, lru) { if (PageSavePinned(page)) { BUG_ON(!PagePinned(page)); - printk("unpinning pinned %p\n", page_address(page)); xen_pgd_unpin((pgd_t *)page_address(page)); ClearPageSavePinned(page); } @@ -757,8 +888,15 @@ void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) static void drop_other_mm_ref(void *info) { struct mm_struct *mm = info; + struct mm_struct *active_mm; + +#ifdef CONFIG_X86_64 + active_mm = read_pda(active_mm); +#else + active_mm = __get_cpu_var(cpu_tlbstate).active_mm; +#endif - if (__get_cpu_var(cpu_tlbstate).active_mm == mm) + if (active_mm == mm) leave_mm(smp_processor_id()); /* If this cpu still has a stale cr3 reference, then make sure diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h index 297bf9f5b8bc..0f59bd03f9e3 100644 --- a/arch/x86/xen/mmu.h +++ b/arch/x86/xen/mmu.h @@ -10,18 +10,6 @@ enum pt_level { PT_PTE }; -/* - * Page-directory addresses above 4GB do not fit into architectural %cr3. - * When accessing %cr3, or equivalent field in vcpu_guest_context, guests - * must use the following accessor macros to pack/unpack valid MFNs. - * - * Note that Xen is using the fact that the pagetable base is always - * page-aligned, and putting the 12 MSB of the address into the 12 LSB - * of cr3. - */ -#define xen_pfn_to_cr3(pfn) (((unsigned)(pfn) << 12) | ((unsigned)(pfn) >> 20)) -#define xen_cr3_to_pfn(cr3) (((unsigned)(cr3) >> 12) | ((unsigned)(cr3) << 20)) - void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); @@ -44,13 +32,26 @@ pgd_t xen_make_pgd(pgdval_t); void xen_set_pte(pte_t *ptep, pte_t pteval); void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pteval); + +#ifdef CONFIG_X86_PAE void xen_set_pte_atomic(pte_t *ptep, pte_t pte); +void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep); +void xen_pmd_clear(pmd_t *pmdp); +#endif /* CONFIG_X86_PAE */ + void xen_set_pmd(pmd_t *pmdp, pmd_t pmdval); void xen_set_pud(pud_t *ptr, pud_t val); void xen_set_pmd_hyper(pmd_t *pmdp, pmd_t pmdval); void xen_set_pud_hyper(pud_t *ptr, pud_t val); -void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep); -void xen_pmd_clear(pmd_t *pmdp); + +#if PAGETABLE_LEVELS == 4 +pudval_t xen_pud_val(pud_t pud); +pud_t xen_make_pud(pudval_t pudval); +void xen_set_pgd(pgd_t *pgdp, pgd_t pgd); +void xen_set_pgd_hyper(pgd_t *pgdp, pgd_t pgd); +#endif + +pgd_t *xen_get_user_pgd(pgd_t *pgd); pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pte_t *ptep); void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c index 3c63c4da7ed1..9efd1c6c9776 100644 --- a/arch/x86/xen/multicalls.c +++ b/arch/x86/xen/multicalls.c @@ -76,6 +76,7 @@ void xen_mc_flush(void) if (ret) { printk(KERN_ERR "%d multicall(s) failed: cpu %d\n", ret, smp_processor_id()); + dump_stack(); for (i = 0; i < b->mcidx; i++) { printk(" call %2d/%d: op=%lu arg=[%lx] result=%ld\n", i+1, b->mcidx, diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index e0a39595bde3..b6acc3a0af46 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -83,30 +83,72 @@ static void xen_idle(void) /* * Set the bit indicating "nosegneg" library variants should be used. + * We only need to bother in pure 32-bit mode; compat 32-bit processes + * can have un-truncated segments, so wrapping around is allowed. */ static void __init fiddle_vdso(void) { - extern const char vdso32_default_start; - u32 *mask = VDSO32_SYMBOL(&vdso32_default_start, NOTE_MASK); +#ifdef CONFIG_X86_32 + u32 *mask; + mask = VDSO32_SYMBOL(&vdso32_int80_start, NOTE_MASK); *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT; + mask = VDSO32_SYMBOL(&vdso32_sysenter_start, NOTE_MASK); + *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT; +#endif } -void xen_enable_sysenter(void) +static __cpuinit int register_callback(unsigned type, const void *func) { - int cpu = smp_processor_id(); - extern void xen_sysenter_target(void); - /* Mask events on entry, even though they get enabled immediately */ - static struct callback_register sysenter = { - .type = CALLBACKTYPE_sysenter, - .address = { __KERNEL_CS, (unsigned long)xen_sysenter_target }, + struct callback_register callback = { + .type = type, + .address = XEN_CALLBACK(__KERNEL_CS, func), .flags = CALLBACKF_mask_events, }; - if (!boot_cpu_has(X86_FEATURE_SEP) || - HYPERVISOR_callback_op(CALLBACKOP_register, &sysenter) != 0) { - clear_cpu_cap(&cpu_data(cpu), X86_FEATURE_SEP); - clear_cpu_cap(&boot_cpu_data, X86_FEATURE_SEP); + return HYPERVISOR_callback_op(CALLBACKOP_register, &callback); +} + +void __cpuinit xen_enable_sysenter(void) +{ + extern void xen_sysenter_target(void); + int ret; + unsigned sysenter_feature; + +#ifdef CONFIG_X86_32 + sysenter_feature = X86_FEATURE_SEP; +#else + sysenter_feature = X86_FEATURE_SYSENTER32; +#endif + + if (!boot_cpu_has(sysenter_feature)) + return; + + ret = register_callback(CALLBACKTYPE_sysenter, xen_sysenter_target); + if(ret != 0) + setup_clear_cpu_cap(sysenter_feature); +} + +void __cpuinit xen_enable_syscall(void) +{ +#ifdef CONFIG_X86_64 + int ret; + extern void xen_syscall_target(void); + extern void xen_syscall32_target(void); + + ret = register_callback(CALLBACKTYPE_syscall, xen_syscall_target); + if (ret != 0) { + printk(KERN_ERR "Failed to set syscall callback: %d\n", ret); + /* Pretty fatal; 64-bit userspace has no other + mechanism for syscalls. */ } + + if (boot_cpu_has(X86_FEATURE_SYSCALL32)) { + ret = register_callback(CALLBACKTYPE_syscall32, + xen_syscall32_target); + if (ret != 0) + setup_clear_cpu_cap(X86_FEATURE_SYSCALL32); + } +#endif /* CONFIG_X86_64 */ } void __init xen_arch_setup(void) @@ -120,10 +162,12 @@ void __init xen_arch_setup(void) if (!xen_feature(XENFEAT_auto_translated_physmap)) HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_pae_extended_cr3); - HYPERVISOR_set_callbacks(__KERNEL_CS, (unsigned long)xen_hypervisor_callback, - __KERNEL_CS, (unsigned long)xen_failsafe_callback); + if (register_callback(CALLBACKTYPE_event, xen_hypervisor_callback) || + register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback)) + BUG(); xen_enable_sysenter(); + xen_enable_syscall(); set_iopl.iopl = 1; rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); @@ -143,11 +187,6 @@ void __init xen_arch_setup(void) pm_idle = xen_idle; -#ifdef CONFIG_SMP - /* fill cpus_possible with all available cpus */ - xen_fill_possible_map(); -#endif - paravirt_disable_iospace(); fiddle_vdso(); diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 233156f39b7f..f702199312a5 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -66,13 +66,22 @@ static __cpuinit void cpu_bringup_and_idle(void) int cpu = smp_processor_id(); cpu_init(); + preempt_disable(); + xen_enable_sysenter(); + xen_enable_syscall(); - preempt_disable(); - per_cpu(cpu_state, cpu) = CPU_ONLINE; + cpu = smp_processor_id(); + smp_store_cpu_info(cpu); + cpu_data(cpu).x86_max_cores = 1; + set_cpu_sibling_map(cpu); xen_setup_cpu_clockevents(); + cpu_set(cpu, cpu_online_map); + x86_write_percpu(cpu_state, CPU_ONLINE); + wmb(); + /* We can take interrupts now: we're officially "up". */ local_irq_enable(); @@ -141,56 +150,37 @@ static int xen_smp_intr_init(unsigned int cpu) return rc; } -void __init xen_fill_possible_map(void) +static void __init xen_fill_possible_map(void) { int i, rc; for (i = 0; i < NR_CPUS; i++) { rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL); - if (rc >= 0) + if (rc >= 0) { + num_processors++; cpu_set(i, cpu_possible_map); + } } } -void __init xen_smp_prepare_boot_cpu(void) +static void __init xen_smp_prepare_boot_cpu(void) { - int cpu; - BUG_ON(smp_processor_id() != 0); native_smp_prepare_boot_cpu(); /* We've switched to the "real" per-cpu gdt, so make sure the old memory can be recycled */ - make_lowmem_page_readwrite(&per_cpu__gdt_page); - - for_each_possible_cpu(cpu) { - cpus_clear(per_cpu(cpu_sibling_map, cpu)); - /* - * cpu_core_map lives in a per cpu area that is cleared - * when the per cpu array is allocated. - * - * cpus_clear(per_cpu(cpu_core_map, cpu)); - */ - } + make_lowmem_page_readwrite(&per_cpu_var(gdt_page)); xen_setup_vcpu_info_placement(); } -void __init xen_smp_prepare_cpus(unsigned int max_cpus) +static void __init xen_smp_prepare_cpus(unsigned int max_cpus) { unsigned cpu; - for_each_possible_cpu(cpu) { - cpus_clear(per_cpu(cpu_sibling_map, cpu)); - /* - * cpu_core_ map will be zeroed when the per - * cpu area is allocated. - * - * cpus_clear(per_cpu(cpu_core_map, cpu)); - */ - } - smp_store_cpu_info(0); + cpu_data(0).x86_max_cores = 1; set_cpu_sibling_map(0); if (xen_smp_intr_init(0)) @@ -225,7 +215,7 @@ static __cpuinit int cpu_initialize_context(unsigned int cpu, struct task_struct *idle) { struct vcpu_guest_context *ctxt; - struct gdt_page *gdt = &per_cpu(gdt_page, cpu); + struct desc_struct *gdt; if (cpu_test_and_set(cpu, xen_cpu_initialized_map)) return 0; @@ -234,12 +224,15 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) if (ctxt == NULL) return -ENOMEM; + gdt = get_cpu_gdt_table(cpu); + ctxt->flags = VGCF_IN_KERNEL; ctxt->user_regs.ds = __USER_DS; ctxt->user_regs.es = __USER_DS; - ctxt->user_regs.fs = __KERNEL_PERCPU; - ctxt->user_regs.gs = 0; ctxt->user_regs.ss = __KERNEL_DS; +#ifdef CONFIG_X86_32 + ctxt->user_regs.fs = __KERNEL_PERCPU; +#endif ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle; ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */ @@ -249,11 +242,11 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) ctxt->ldt_ents = 0; - BUG_ON((unsigned long)gdt->gdt & ~PAGE_MASK); - make_lowmem_page_readonly(gdt->gdt); + BUG_ON((unsigned long)gdt & ~PAGE_MASK); + make_lowmem_page_readonly(gdt); - ctxt->gdt_frames[0] = virt_to_mfn(gdt->gdt); - ctxt->gdt_ents = ARRAY_SIZE(gdt->gdt); + ctxt->gdt_frames[0] = virt_to_mfn(gdt); + ctxt->gdt_ents = GDT_ENTRIES; ctxt->user_regs.cs = __KERNEL_CS; ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs); @@ -261,9 +254,11 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) ctxt->kernel_ss = __KERNEL_DS; ctxt->kernel_sp = idle->thread.sp0; +#ifdef CONFIG_X86_32 ctxt->event_callback_cs = __KERNEL_CS; - ctxt->event_callback_eip = (unsigned long)xen_hypervisor_callback; ctxt->failsafe_callback_cs = __KERNEL_CS; +#endif + ctxt->event_callback_eip = (unsigned long)xen_hypervisor_callback; ctxt->failsafe_callback_eip = (unsigned long)xen_failsafe_callback; per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir); @@ -276,7 +271,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) return 0; } -int __cpuinit xen_cpu_up(unsigned int cpu) +static int __cpuinit xen_cpu_up(unsigned int cpu) { struct task_struct *idle = idle_task(cpu); int rc; @@ -287,11 +282,28 @@ int __cpuinit xen_cpu_up(unsigned int cpu) return rc; #endif +#ifdef CONFIG_X86_64 + /* Allocate node local memory for AP pdas */ + WARN_ON(cpu == 0); + if (cpu > 0) { + rc = get_local_pda(cpu); + if (rc) + return rc; + } +#endif + +#ifdef CONFIG_X86_32 init_gdt(cpu); per_cpu(current_task, cpu) = idle; irq_ctx_init(cpu); +#else + cpu_pda(cpu)->pcurrent = idle; + clear_tsk_thread_flag(idle, TIF_FORK); +#endif xen_setup_timer(cpu); + per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; + /* make sure interrupts start blocked */ per_cpu(xen_vcpu, cpu)->evtchn_upcall_mask = 1; @@ -306,20 +318,18 @@ int __cpuinit xen_cpu_up(unsigned int cpu) if (rc) return rc; - smp_store_cpu_info(cpu); - set_cpu_sibling_map(cpu); - /* This must be done before setting cpu_online_map */ - wmb(); - - cpu_set(cpu, cpu_online_map); - rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL); BUG_ON(rc); + while(per_cpu(cpu_state, cpu) != CPU_ONLINE) { + HYPERVISOR_sched_op(SCHEDOP_yield, 0); + barrier(); + } + return 0; } -void xen_smp_cpus_done(unsigned int max_cpus) +static void xen_smp_cpus_done(unsigned int max_cpus) { } @@ -335,12 +345,12 @@ static void stop_self(void *v) BUG(); } -void xen_smp_send_stop(void) +static void xen_smp_send_stop(void) { smp_call_function(stop_self, NULL, 0); } -void xen_smp_send_reschedule(int cpu) +static void xen_smp_send_reschedule(int cpu) { xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR); } @@ -355,7 +365,7 @@ static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector) xen_send_IPI_one(cpu, vector); } -void xen_smp_send_call_function_ipi(cpumask_t mask) +static void xen_smp_send_call_function_ipi(cpumask_t mask) { int cpu; @@ -370,7 +380,7 @@ void xen_smp_send_call_function_ipi(cpumask_t mask) } } -void xen_smp_send_call_function_single_ipi(int cpu) +static void xen_smp_send_call_function_single_ipi(int cpu) { xen_send_IPI_mask(cpumask_of_cpu(cpu), XEN_CALL_FUNCTION_SINGLE_VECTOR); } @@ -379,7 +389,11 @@ static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id) { irq_enter(); generic_smp_call_function_interrupt(); +#ifdef CONFIG_X86_32 __get_cpu_var(irq_stat).irq_call_count++; +#else + add_pda(irq_call_count, 1); +#endif irq_exit(); return IRQ_HANDLED; @@ -389,8 +403,31 @@ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id) { irq_enter(); generic_smp_call_function_single_interrupt(); +#ifdef CONFIG_X86_32 __get_cpu_var(irq_stat).irq_call_count++; +#else + add_pda(irq_call_count, 1); +#endif irq_exit(); return IRQ_HANDLED; } + +static const struct smp_ops xen_smp_ops __initdata = { + .smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu, + .smp_prepare_cpus = xen_smp_prepare_cpus, + .cpu_up = xen_cpu_up, + .smp_cpus_done = xen_smp_cpus_done, + + .smp_send_stop = xen_smp_send_stop, + .smp_send_reschedule = xen_smp_send_reschedule, + + .send_call_func_ipi = xen_smp_send_call_function_ipi, + .send_call_func_single_ipi = xen_smp_send_call_function_single_ipi, +}; + +void __init xen_smp_init(void) +{ + smp_ops = xen_smp_ops; + xen_fill_possible_map(); +} diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index 251669a932d4..2a234db5949b 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c @@ -38,8 +38,11 @@ void xen_post_suspend(int suspend_cancelled) xen_cpu_initialized_map = cpu_online_map; #endif xen_vcpu_restore(); - xen_timer_resume(); } } +void xen_arch_resume(void) +{ + /* nothing */ +} diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm_32.S index 2497a30f41de..2497a30f41de 100644 --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm_32.S diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S new file mode 100644 index 000000000000..4038cbfe3331 --- /dev/null +++ b/arch/x86/xen/xen-asm_64.S @@ -0,0 +1,271 @@ +/* + Asm versions of Xen pv-ops, suitable for either direct use or inlining. + The inline versions are the same as the direct-use versions, with the + pre- and post-amble chopped off. + + This code is encoded for size rather than absolute efficiency, + with a view to being able to inline as much as possible. + + We only bother with direct forms (ie, vcpu in pda) of the operations + here; the indirect forms are better handled in C, since they're + generally too large to inline anyway. + */ + +#include <linux/linkage.h> + +#include <asm/asm-offsets.h> +#include <asm/processor-flags.h> +#include <asm/errno.h> +#include <asm/segment.h> + +#include <xen/interface/xen.h> + +#define RELOC(x, v) .globl x##_reloc; x##_reloc=v +#define ENDPATCH(x) .globl x##_end; x##_end=. + +/* Pseudo-flag used for virtual NMI, which we don't implement yet */ +#define XEN_EFLAGS_NMI 0x80000000 + +#if 0 +#include <asm/percpu.h> + +/* + Enable events. This clears the event mask and tests the pending + event status with one and operation. If there are pending + events, then enter the hypervisor to get them handled. + */ +ENTRY(xen_irq_enable_direct) + /* Unmask events */ + movb $0, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask) + + /* Preempt here doesn't matter because that will deal with + any pending interrupts. The pending check may end up being + run on the wrong CPU, but that doesn't hurt. */ + + /* Test for pending */ + testb $0xff, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_pending) + jz 1f + +2: call check_events +1: +ENDPATCH(xen_irq_enable_direct) + ret + ENDPROC(xen_irq_enable_direct) + RELOC(xen_irq_enable_direct, 2b+1) + +/* + Disabling events is simply a matter of making the event mask + non-zero. + */ +ENTRY(xen_irq_disable_direct) + movb $1, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask) +ENDPATCH(xen_irq_disable_direct) + ret + ENDPROC(xen_irq_disable_direct) + RELOC(xen_irq_disable_direct, 0) + +/* + (xen_)save_fl is used to get the current interrupt enable status. + Callers expect the status to be in X86_EFLAGS_IF, and other bits + may be set in the return value. We take advantage of this by + making sure that X86_EFLAGS_IF has the right value (and other bits + in that byte are 0), but other bits in the return value are + undefined. We need to toggle the state of the bit, because + Xen and x86 use opposite senses (mask vs enable). + */ +ENTRY(xen_save_fl_direct) + testb $0xff, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask) + setz %ah + addb %ah,%ah +ENDPATCH(xen_save_fl_direct) + ret + ENDPROC(xen_save_fl_direct) + RELOC(xen_save_fl_direct, 0) + +/* + In principle the caller should be passing us a value return + from xen_save_fl_direct, but for robustness sake we test only + the X86_EFLAGS_IF flag rather than the whole byte. After + setting the interrupt mask state, it checks for unmasked + pending events and enters the hypervisor to get them delivered + if so. + */ +ENTRY(xen_restore_fl_direct) + testb $X86_EFLAGS_IF>>8, %ah + setz PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask) + /* Preempt here doesn't matter because that will deal with + any pending interrupts. The pending check may end up being + run on the wrong CPU, but that doesn't hurt. */ + + /* check for unmasked and pending */ + cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_pending) + jz 1f +2: call check_events +1: +ENDPATCH(xen_restore_fl_direct) + ret + ENDPROC(xen_restore_fl_direct) + RELOC(xen_restore_fl_direct, 2b+1) + + +/* + Force an event check by making a hypercall, + but preserve regs before making the call. + */ +check_events: + push %rax + push %rcx + push %rdx + push %rsi + push %rdi + push %r8 + push %r9 + push %r10 + push %r11 + call force_evtchn_callback + pop %r11 + pop %r10 + pop %r9 + pop %r8 + pop %rdi + pop %rsi + pop %rdx + pop %rcx + pop %rax + ret +#endif + +ENTRY(xen_adjust_exception_frame) + mov 8+0(%rsp),%rcx + mov 8+8(%rsp),%r11 + ret $16 + +hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32 +/* + Xen64 iret frame: + + ss + rsp + rflags + cs + rip <-- standard iret frame + + flags + + rcx } + r11 }<-- pushed by hypercall page +rsp -> rax } + */ +ENTRY(xen_iret) + pushq $0 +1: jmp hypercall_iret +ENDPATCH(xen_iret) +RELOC(xen_iret, 1b+1) + +/* + sysexit is not used for 64-bit processes, so it's + only ever used to return to 32-bit compat userspace. + */ +ENTRY(xen_sysexit) + pushq $__USER32_DS + pushq %rcx + pushq $X86_EFLAGS_IF + pushq $__USER32_CS + pushq %rdx + + pushq $VGCF_in_syscall +1: jmp hypercall_iret +ENDPATCH(xen_sysexit) +RELOC(xen_sysexit, 1b+1) + +ENTRY(xen_sysret64) + /* We're already on the usermode stack at this point, but still + with the kernel gs, so we can easily switch back */ + movq %rsp, %gs:pda_oldrsp + movq %gs:pda_kernelstack,%rsp + + pushq $__USER_DS + pushq %gs:pda_oldrsp + pushq %r11 + pushq $__USER_CS + pushq %rcx + + pushq $VGCF_in_syscall +1: jmp hypercall_iret +ENDPATCH(xen_sysret64) +RELOC(xen_sysret64, 1b+1) + +ENTRY(xen_sysret32) + /* We're already on the usermode stack at this point, but still + with the kernel gs, so we can easily switch back */ + movq %rsp, %gs:pda_oldrsp + movq %gs:pda_kernelstack, %rsp + + pushq $__USER32_DS + pushq %gs:pda_oldrsp + pushq %r11 + pushq $__USER32_CS + pushq %rcx + + pushq $VGCF_in_syscall +1: jmp hypercall_iret +ENDPATCH(xen_sysret32) +RELOC(xen_sysret32, 1b+1) + +/* + Xen handles syscall callbacks much like ordinary exceptions, + which means we have: + - kernel gs + - kernel rsp + - an iret-like stack frame on the stack (including rcx and r11): + ss + rsp + rflags + cs + rip + r11 + rsp-> rcx + + In all the entrypoints, we undo all that to make it look + like a CPU-generated syscall/sysenter and jump to the normal + entrypoint. + */ + +.macro undo_xen_syscall + mov 0*8(%rsp),%rcx + mov 1*8(%rsp),%r11 + mov 5*8(%rsp),%rsp +.endm + +/* Normal 64-bit system call target */ +ENTRY(xen_syscall_target) + undo_xen_syscall + jmp system_call_after_swapgs +ENDPROC(xen_syscall_target) + +#ifdef CONFIG_IA32_EMULATION + +/* 32-bit compat syscall target */ +ENTRY(xen_syscall32_target) + undo_xen_syscall + jmp ia32_cstar_target +ENDPROC(xen_syscall32_target) + +/* 32-bit compat sysenter target */ +ENTRY(xen_sysenter_target) + undo_xen_syscall + jmp ia32_sysenter_target +ENDPROC(xen_sysenter_target) + +#else /* !CONFIG_IA32_EMULATION */ + +ENTRY(xen_syscall32_target) +ENTRY(xen_sysenter_target) + lea 16(%rsp), %rsp /* strip %rcx,%r11 */ + mov $-ENOSYS, %rax + pushq $VGCF_in_syscall + jmp hypercall_iret +ENDPROC(xen_syscall32_target) +ENDPROC(xen_sysenter_target) + +#endif /* CONFIG_IA32_EMULATION */ diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 7c0cf6320a0a..63d49a523ed3 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -5,15 +5,24 @@ #include <linux/elfnote.h> #include <linux/init.h> + #include <asm/boot.h> +#include <asm/asm.h> +#include <asm/page.h> + #include <xen/interface/elfnote.h> #include <asm/xen/interface.h> __INIT ENTRY(startup_xen) - movl %esi,xen_start_info cld - movl $(init_thread_union+THREAD_SIZE),%esp +#ifdef CONFIG_X86_32 + mov %esi,xen_start_info + mov $init_thread_union+THREAD_SIZE,%esp +#else + mov %rsi,xen_start_info + mov $init_thread_union+THREAD_SIZE,%rsp +#endif jmp xen_start_kernel __FINIT @@ -21,21 +30,26 @@ ENTRY(startup_xen) .pushsection .text .align PAGE_SIZE_asm ENTRY(hypercall_page) - .skip 0x1000 + .skip PAGE_SIZE_asm .popsection ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux") ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION, .asciz "2.6") ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION, .asciz "xen-3.0") - ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, .long __PAGE_OFFSET) - ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .long startup_xen) - ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .long hypercall_page) +#ifdef CONFIG_X86_32 + ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, _ASM_PTR __PAGE_OFFSET) +#else + ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, _ASM_PTR __START_KERNEL_map) +#endif + ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, _ASM_PTR startup_xen) + ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, _ASM_PTR hypercall_page) ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb") ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes") ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic") ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, .quad _PAGE_PRESENT; .quad _PAGE_PRESENT) ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long 1) - ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW, .long __HYPERVISOR_VIRT_START) + ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW, _ASM_PTR __HYPERVISOR_VIRT_START) + ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, _ASM_PTR 0) #endif /*CONFIG_XEN */ diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 6f4b1045c1c2..dd3c23152a2e 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -26,6 +26,7 @@ char * __init xen_memory_setup(void); void __init xen_arch_setup(void); void __init xen_init_IRQ(void); void xen_enable_sysenter(void); +void xen_enable_syscall(void); void xen_vcpu_restore(void); void __init xen_build_dynamic_phys_to_machine(void); @@ -37,7 +38,6 @@ void __init xen_time_init(void); unsigned long xen_get_wallclock(void); int xen_set_wallclock(unsigned long time); unsigned long long xen_sched_clock(void); -void xen_timer_resume(void); irqreturn_t xen_debug_interrupt(int irq, void *dev_id); @@ -45,20 +45,15 @@ bool xen_vcpu_stolen(int vcpu); void xen_mark_init_mm_pinned(void); -void __init xen_fill_possible_map(void); - void __init xen_setup_vcpu_info_placement(void); -void xen_smp_prepare_boot_cpu(void); -void xen_smp_prepare_cpus(unsigned int max_cpus); -int xen_cpu_up(unsigned int cpu); -void xen_smp_cpus_done(unsigned int max_cpus); -void xen_smp_send_stop(void); -void xen_smp_send_reschedule(int cpu); -void xen_smp_send_call_function_ipi(cpumask_t mask); -void xen_smp_send_call_function_single_ipi(int cpu); +#ifdef CONFIG_SMP +void xen_smp_init(void); extern cpumask_t xen_cpu_initialized_map; +#else +static inline void xen_smp_init(void) {} +#endif /* Declare an asm function, along with symbols needed to make it @@ -73,7 +68,11 @@ DECL_ASM(void, xen_irq_disable_direct, void); DECL_ASM(unsigned long, xen_save_fl_direct, void); DECL_ASM(void, xen_restore_fl_direct, unsigned long); +/* These are not functions, and cannot be called normally */ void xen_iret(void); void xen_sysexit(void); +void xen_sysret32(void); +void xen_sysret64(void); +void xen_adjust_exception_frame(void); #endif /* XEN_OPS_H */ |